Merge branch 'master' of github.com:official-stockfish/Stockfish into nnue-player-merge-2020-08-28

# Conflicts:
#	README.md
#	src/Makefile
#	src/search.cpp
#	src/types.h
#	src/uci.cpp
#	src/ucioption.cpp
This commit is contained in:
nodchip
2020-08-28 11:26:11 +09:00
35 changed files with 954 additions and 768 deletions
+29 -8
View File
@@ -43,26 +43,47 @@ before_script:
- cd src - cd src
script: script:
# Download net
- make net
# Obtain bench reference from git log # Obtain bench reference from git log
- git log HEAD | grep "\b[Bb]ench[ :]\+[0-9]\{7\}" | head -n 1 | sed "s/[^0-9]*\([0-9]*\).*/\1/g" > git_sig - git log HEAD | grep "\b[Bb]ench[ :]\+[0-9]\{7\}" | head -n 1 | sed "s/[^0-9]*\([0-9]*\).*/\1/g" > git_sig
- export benchref=$(cat git_sig) - export benchref=$(cat git_sig)
- echo "Reference bench:" $benchref - echo "Reference bench:" $benchref
#
# Compiler version string # Compiler version string
- $COMPILER -v - $COMPILER -v
# # test help target
- make help
# Verify bench number against various builds # Verify bench number against various builds
- export CXXFLAGS="-Werror -D_GLIBCXX_DEBUG" - export CXXFLAGS="-Werror -D_GLIBCXX_DEBUG"
- make clean && make -j2 ARCH=x86-64 optimize=no debug=yes build && ../tests/signature.sh $benchref - make clean && make -j2 ARCH=x86-64-modern optimize=no debug=yes build && ../tests/signature.sh $benchref
- export CXXFLAGS="-Werror"
- make clean && make -j2 ARCH=x86-64-modern build && ../tests/signature.sh $benchref
- make clean && make -j2 ARCH=x86-64-ssse3 build && ../tests/signature.sh $benchref
- make clean && make -j2 ARCH=x86-64-sse3-popcnt build && ../tests/signature.sh $benchref
- make clean && make -j2 ARCH=x86-64 build && ../tests/signature.sh $benchref
- if [[ "$TRAVIS_OS_NAME" == "linux" ]]; then make clean && make -j2 ARCH=general-64 build && ../tests/signature.sh $benchref; fi
- if [[ "$TRAVIS_OS_NAME" == "linux" ]]; then make clean && make -j2 ARCH=x86-32 optimize=no debug=yes build && ../tests/signature.sh $benchref; fi - if [[ "$TRAVIS_OS_NAME" == "linux" ]]; then make clean && make -j2 ARCH=x86-32 optimize=no debug=yes build && ../tests/signature.sh $benchref; fi
- if [[ "$TRAVIS_OS_NAME" == "linux" ]]; then make clean && make -j2 ARCH=x86-32-sse41-popcnt build && ../tests/signature.sh $benchref; fi
- if [[ "$TRAVIS_OS_NAME" == "linux" ]]; then make clean && make -j2 ARCH=x86-32-sse2 build && ../tests/signature.sh $benchref; fi
- if [[ "$TRAVIS_OS_NAME" == "linux" ]]; then make clean && make -j2 ARCH=x86-32 build && ../tests/signature.sh $benchref; fi - if [[ "$TRAVIS_OS_NAME" == "linux" ]]; then make clean && make -j2 ARCH=x86-32 build && ../tests/signature.sh $benchref; fi
- if [[ "$TRAVIS_OS_NAME" == "linux" ]]; then make clean && make -j2 ARCH=general-32 build && ../tests/signature.sh $benchref; fi
# workaround: exclude a custom version of llvm+clang, which doesn't find llvm-profdata on ubuntu
- if [[ "$TRAVIS_OS_NAME" != "linux" || "$COMP" == "gcc" ]]; then make clean && make -j2 ARCH=x86-64-modern profile-build && ../tests/signature.sh $benchref; fi
# compile only for some more advanced architectures (might not run in travis)
- make clean && make -j2 ARCH=x86-64-avx2 build
- make clean && make -j2 ARCH=x86-64-bmi2 build
- make clean && make -j2 ARCH=x86-64-avx512 build
- make clean && make -j2 ARCH=x86-64-vnni512 build
- make clean && make -j2 ARCH=x86-64-vnni256 build
# #
# Check perft and reproducible search # Check perft and reproducible search
- export CXXFLAGS="-Werror" - make clean && make -j2 ARCH=x86-64-modern build
- make clean && make -j2 ARCH=x86-64 build
- ../tests/perft.sh - ../tests/perft.sh
- ../tests/reprosearch.sh - ../tests/reprosearch.sh
@@ -70,11 +91,11 @@ script:
# Valgrind # Valgrind
# #
- export CXXFLAGS="-O1 -fno-inline" - export CXXFLAGS="-O1 -fno-inline"
- if [ -x "$(command -v valgrind )" ]; then make clean && make -j2 ARCH=x86-64 debug=yes optimize=no build > /dev/null && ../tests/instrumented.sh --valgrind; fi - if [ -x "$(command -v valgrind )" ]; then make clean && make -j2 ARCH=x86-64-modern debug=yes optimize=no build > /dev/null && ../tests/instrumented.sh --valgrind; fi
- if [ -x "$(command -v valgrind )" ]; then ../tests/instrumented.sh --valgrind-thread; fi - if [ -x "$(command -v valgrind )" ]; then ../tests/instrumented.sh --valgrind-thread; fi
# #
# Sanitizer # Sanitizer
# #
- if [[ "$TRAVIS_OS_NAME" == "linux" ]]; then make clean && make -j2 ARCH=x86-64 sanitize=undefined optimize=no debug=yes build > /dev/null && ../tests/instrumented.sh --sanitizer-undefined; fi - if [[ "$TRAVIS_OS_NAME" == "linux" ]]; then make clean && make -j2 ARCH=x86-64-modern sanitize=undefined optimize=no debug=yes build > /dev/null && ../tests/instrumented.sh --sanitizer-undefined; fi
- if [[ "$TRAVIS_OS_NAME" == "linux" ]]; then make clean && make -j2 ARCH=x86-64 sanitize=thread optimize=no debug=yes build > /dev/null && ../tests/instrumented.sh --sanitizer-thread; fi - if [[ "$TRAVIS_OS_NAME" == "linux" ]]; then make clean && make -j2 ARCH=x86-64-modern sanitize=thread optimize=no debug=yes build > /dev/null && ../tests/instrumented.sh --sanitizer-thread; fi
+3
View File
@@ -53,11 +53,13 @@ Ernesto Gatti
Linmiao Xu (linrock) Linmiao Xu (linrock)
Fabian Beuke (madnight) Fabian Beuke (madnight)
Fabian Fichter (ianfab) Fabian Fichter (ianfab)
Fanael Linithien (Fanael)
fanon fanon
Fauzi Akram Dabat (FauziAkram) Fauzi Akram Dabat (FauziAkram)
Felix Wittmann Felix Wittmann
gamander gamander
Gary Heckman (gheckman) Gary Heckman (gheckman)
George Sobala (gsobala)
gguliash gguliash
Gian-Carlo Pascutto (gcp) Gian-Carlo Pascutto (gcp)
Gontran Lemaire (gonlem) Gontran Lemaire (gonlem)
@@ -126,6 +128,7 @@ Niklas Fiekas (niklasf)
Nikolay Kostov (NikolayIT) Nikolay Kostov (NikolayIT)
Nguyen Pham (nguyenpham) Nguyen Pham (nguyenpham)
Norman Schmidt (FireFather) Norman Schmidt (FireFather)
notruck
Ondrej Mosnáček (WOnder93) Ondrej Mosnáček (WOnder93)
Oskar Werkelin Ahlin Oskar Werkelin Ahlin
Pablo Vazquez Pablo Vazquez
+14
View File
@@ -61,6 +61,20 @@ before_build:
build_script: build_script:
- cmake --build . --config %CONFIGURATION% -- /verbosity:minimal - cmake --build . --config %CONFIGURATION% -- /verbosity:minimal
- ps: |
# Download default NNUE net from fishtest
$nnuenet = Get-Content -Path src\ucioption.cpp | Select-String -CaseSensitive -Pattern "Option" | Select-String -CaseSensitive -Pattern "nn-[a-z0-9]{12}.nnue"
$dummy = $nnuenet -match "(?<nnuenet>nn-[a-z0-9]{12}.nnue)"
$nnuenet = $Matches.nnuenet
Write-Host "Default net:" $nnuenet
$nnuedownloadurl = "https://tests.stockfishchess.org/api/nn/$nnuenet"
$nnuefilepath = "src\${env:CONFIGURATION}\$nnuenet"
if (Test-Path -Path $nnuefilepath) {
Write-Host "Already available."
} else {
Write-Host "Downloading $nnuedownloadurl to $nnuefilepath"
Invoke-WebRequest -Uri $nnuedownloadurl -OutFile $nnuefilepath
}
before_test: before_test:
- cd src/%CONFIGURATION% - cd src/%CONFIGURATION%
+297 -169
View File
@@ -82,14 +82,16 @@ endif
# bits = 64/32 --- -DIS_64BIT --- 64-/32-bit operating system # bits = 64/32 --- -DIS_64BIT --- 64-/32-bit operating system
# prefetch = yes/no --- -DUSE_PREFETCH --- Use prefetch asm-instruction # prefetch = yes/no --- -DUSE_PREFETCH --- Use prefetch asm-instruction
# popcnt = yes/no --- -DUSE_POPCNT --- Use popcnt asm-instruction # popcnt = yes/no --- -DUSE_POPCNT --- Use popcnt asm-instruction
# pext = yes/no --- -DUSE_PEXT --- Use pext x86_64 asm-instruction
# sse = yes/no --- -msse --- Use Intel Streaming SIMD Extensions # sse = yes/no --- -msse --- Use Intel Streaming SIMD Extensions
# sse3 = yes/no --- -msse3 --- Use Intel Streaming SIMD Extensions 3 # mmx = yes/no --- -mmmx --- Use Intel MMX instructions
# sse2 = yes/no --- -msse2 --- Use Intel Streaming SIMD Extensions 2
# ssse3 = yes/no --- -mssse3 --- Use Intel Supplemental Streaming SIMD Extensions 3 # ssse3 = yes/no --- -mssse3 --- Use Intel Supplemental Streaming SIMD Extensions 3
# sse41 = yes/no --- -msse4.1 --- Use Intel Streaming SIMD Extensions 4.1 # sse41 = yes/no --- -msse4.1 --- Use Intel Streaming SIMD Extensions 4.1
# sse42 = yes/no --- -msse4.2 --- Use Intel Streaming SIMD Extensions 4.2
# avx2 = yes/no --- -mavx2 --- Use Intel Advanced Vector Extensions 2 # avx2 = yes/no --- -mavx2 --- Use Intel Advanced Vector Extensions 2
# pext = yes/no --- -DUSE_PEXT --- Use pext x86_64 asm-instruction
# avx512 = yes/no --- -mavx512bw --- Use Intel Advanced Vector Extensions 512 # avx512 = yes/no --- -mavx512bw --- Use Intel Advanced Vector Extensions 512
# vnni256 = yes/no --- -mavx512vnni --- Use Intel Vector Neural Network Instructions 256
# vnni512 = yes/no --- -mavx512vnni --- Use Intel Vector Neural Network Instructions 512
# neon = yes/no --- -DUSE_NEON --- Use ARM SIMD architecture # neon = yes/no --- -DUSE_NEON --- Use ARM SIMD architecture
# #
# Note that Makefile is space sensitive, so when adding new architectures # Note that Makefile is space sensitive, so when adding new architectures
@@ -97,152 +99,184 @@ endif
# at the end of the line for flag values. # at the end of the line for flag values.
### 2.1. General and architecture defaults ### 2.1. General and architecture defaults
# explicitly check for the list of supported architectures (as listed with make help),
# the user can override with `make ARCH=x86-32-vnni256 SUPPORTED_ARCH=true`
ifeq ($(ARCH),$(filter $(ARCH),x86-64-vnni512 x86-64-vnni256 x86-64-avx512 x86-64-bmi2 x86-64-avx2 \
x86-64-sse41-popcnt x86-64-modern x86-64-ssse3 x86-64-sse3-popcnt \
x86-64 x86-32-sse41-popcnt x86-32-sse2 x86-32 ppc-64 ppc-32 \
armv7 armv7-neon armv8 apple-silicon general-64 general-32))
SUPPORTED_ARCH=true
else
SUPPORTED_ARCH=false
endif
optimize = yes optimize = yes
debug = no debug = no
sanitize = no sanitize = no
bits = 64 bits = 64
prefetch = no prefetch = no
popcnt = no popcnt = no
pext = no
sse = no sse = no
sse3 = no mmx = no
sse2 = no
ssse3 = no ssse3 = no
sse41 = no sse41 = no
sse42 = no
avx2 = no avx2 = no
pext = no
avx512 = no avx512 = no
vnni256 = no
vnni512 = no
neon = no neon = no
ARCH = x86-64-modern ARCH = x86-64-modern
STRIP = strip
### 2.2 Architecture specific ### 2.2 Architecture specific
ifeq ($(findstring x86,$(ARCH)),x86)
# x86-32/64
ifeq ($(findstring x86-32,$(ARCH)),x86-32)
arch = i386
bits = 32
sse = yes
mmx = yes
else
arch = x86_64
sse = yes
sse2 = yes
endif
ifeq ($(findstring -sse,$(ARCH)),-sse)
sse = yes
endif
ifeq ($(findstring -popcnt,$(ARCH)),-popcnt)
popcnt = yes
endif
ifeq ($(findstring -mmx,$(ARCH)),-mmx)
mmx = yes
endif
ifeq ($(findstring -sse2,$(ARCH)),-sse2)
sse = yes
sse2 = yes
endif
ifeq ($(findstring -ssse3,$(ARCH)),-ssse3)
sse = yes
sse2 = yes
ssse3 = yes
endif
ifeq ($(findstring -sse41,$(ARCH)),-sse41)
sse = yes
sse2 = yes
ssse3 = yes
sse41 = yes
endif
ifeq ($(findstring -modern,$(ARCH)),-modern)
popcnt = yes
sse = yes
sse2 = yes
ssse3 = yes
sse41 = yes
endif
ifeq ($(findstring -avx2,$(ARCH)),-avx2)
popcnt = yes
sse = yes
sse2 = yes
ssse3 = yes
sse41 = yes
avx2 = yes
endif
ifeq ($(findstring -bmi2,$(ARCH)),-bmi2)
popcnt = yes
sse = yes
sse2 = yes
ssse3 = yes
sse41 = yes
avx2 = yes
pext = yes
endif
ifeq ($(findstring -avx512,$(ARCH)),-avx512)
popcnt = yes
sse = yes
sse2 = yes
ssse3 = yes
sse41 = yes
avx2 = yes
pext = yes
avx512 = yes
endif
ifeq ($(findstring -vnni256,$(ARCH)),-vnni256)
popcnt = yes
sse = yes
sse2 = yes
ssse3 = yes
sse41 = yes
avx2 = yes
pext = yes
vnni256 = yes
endif
ifeq ($(findstring -vnni512,$(ARCH)),-vnni512)
popcnt = yes
sse = yes
sse2 = yes
ssse3 = yes
sse41 = yes
avx2 = yes
pext = yes
avx512 = yes
vnni512 = yes
endif
ifeq ($(sse),yes)
prefetch = yes
endif
# 64-bit pext is not available on x86-32
ifeq ($(bits),32)
pext = no
endif
else
# all other architectures
ifeq ($(ARCH),general-32) ifeq ($(ARCH),general-32)
arch = any arch = any
bits = 32 bits = 32
endif endif
ifeq ($(ARCH),x86-32-old)
arch = i386
bits = 32
endif
ifeq ($(ARCH),x86-32)
arch = i386
bits = 32
prefetch = yes
sse = yes
endif
ifeq ($(ARCH),general-64) ifeq ($(ARCH),general-64)
arch = any arch = any
endif endif
ifeq ($(ARCH),x86-64)
arch = x86_64
prefetch = yes
sse = yes
endif
ifeq ($(ARCH),x86-64-sse3)
arch = x86_64
prefetch = yes
sse = yes
sse3 = yes
endif
ifeq ($(ARCH),x86-64-sse3-popcnt)
arch = x86_64
prefetch = yes
sse = yes
sse3 = yes
popcnt = yes
endif
ifeq ($(ARCH),x86-64-ssse3)
arch = x86_64
prefetch = yes
sse = yes
sse3 = yes
ssse3 = yes
endif
ifeq ($(ARCH),x86-64-sse41)
arch = x86_64
prefetch = yes
popcnt = yes
sse = yes
sse3 = yes
ssse3 = yes
sse41 = yes
endif
ifeq ($(ARCH),x86-64-modern)
arch = x86_64
prefetch = yes
popcnt = yes
sse = yes
sse3 = yes
ssse3 = yes
sse41 = yes
endif
ifeq ($(ARCH),x86-64-sse42)
arch = x86_64
prefetch = yes
popcnt = yes
sse = yes
sse3 = yes
ssse3 = yes
sse41 = yes
sse42 = yes
endif
ifeq ($(ARCH),x86-64-avx2)
arch = x86_64
prefetch = yes
popcnt = yes
sse = yes
sse3 = yes
ssse3 = yes
sse41 = yes
sse42 = yes
avx2 = yes
endif
ifeq ($(ARCH),x86-64-bmi2)
arch = x86_64
prefetch = yes
popcnt = yes
sse = yes
sse3 = yes
ssse3 = yes
sse41 = yes
sse42 = yes
avx2 = yes
pext = yes
endif
ifeq ($(ARCH),x86-64-avx512)
arch = x86_64
prefetch = yes
popcnt = yes
sse = yes
sse3 = yes
ssse3 = yes
sse41 = yes
sse42 = yes
avx2 = yes
pext = yes
avx512 = yes
endif
ifeq ($(ARCH),armv7) ifeq ($(ARCH),armv7)
arch = armv7 arch = armv7
prefetch = yes prefetch = yes
bits = 32 bits = 32
endif endif
ifeq ($(ARCH),armv7-neon)
arch = armv7
prefetch = yes
popcnt = yes
neon = yes
bits = 32
endif
ifeq ($(ARCH),armv8) ifeq ($(ARCH),armv8)
arch = armv8-a arch = armv8
prefetch = yes prefetch = yes
popcnt = yes popcnt = yes
neon = yes neon = yes
@@ -266,6 +300,8 @@ ifeq ($(ARCH),ppc-64)
prefetch = yes prefetch = yes
endif endif
endif
### ========================================================================== ### ==========================================================================
### Section 3. Low-level Configuration ### Section 3. Low-level Configuration
### ========================================================================== ### ==========================================================================
@@ -284,7 +320,7 @@ ifeq ($(COMP),gcc)
CXX=g++ CXX=g++
CXXFLAGS += -pedantic -Wextra -Wshadow CXXFLAGS += -pedantic -Wextra -Wshadow
ifeq ($(ARCH),$(filter $(ARCH),armv7 armv8)) ifeq ($(arch),$(filter $(arch),armv7 armv8))
ifeq ($(OS),Android) ifeq ($(OS),Android)
CXXFLAGS += -m$(bits) CXXFLAGS += -m$(bits)
LDFLAGS += -m$(bits) LDFLAGS += -m$(bits)
@@ -294,12 +330,13 @@ ifeq ($(COMP),gcc)
LDFLAGS += -m$(bits) LDFLAGS += -m$(bits)
endif endif
ifeq ($(arch),$(filter $(arch),armv7))
LDFLAGS += -latomic
endif
ifneq ($(KERNEL),Darwin) ifneq ($(KERNEL),Darwin)
LDFLAGS += -Wl,--no-as-needed LDFLAGS += -Wl,--no-as-needed
endif endif
gccversion = $(shell $(CXX) --version)
gccisclang = $(findstring clang,$(gccversion))
endif endif
ifeq ($(COMP),mingw) ifeq ($(COMP),mingw)
@@ -344,7 +381,7 @@ ifeq ($(COMP),clang)
endif endif
endif endif
ifeq ($(ARCH),$(filter $(ARCH),armv7 armv8)) ifeq ($(arch),$(filter $(arch),armv7 armv8))
ifeq ($(OS),Android) ifeq ($(OS),Android)
CXXFLAGS += -m$(bits) CXXFLAGS += -m$(bits)
LDFLAGS += -m$(bits) LDFLAGS += -m$(bits)
@@ -371,6 +408,26 @@ endif
ifeq ($(KERNEL),Darwin) ifeq ($(KERNEL),Darwin)
CXXFLAGS += -arch $(arch) -mmacosx-version-min=10.14 CXXFLAGS += -arch $(arch) -mmacosx-version-min=10.14
LDFLAGS += -arch $(arch) -mmacosx-version-min=10.14 LDFLAGS += -arch $(arch) -mmacosx-version-min=10.14
XCRUN = xcrun
endif
# To cross-compile for Android, NDK version r21 or later is recommended.
# In earlier NDK versions, you'll need to pass -fno-addrsig if using GNU binutils.
# Currently we don't know how to make PGO builds with the NDK yet.
ifeq ($(COMP),ndk)
CXXFLAGS += -stdlib=libc++ -fPIE
ifeq ($(arch),armv7)
comp=armv7a-linux-androideabi16-clang
CXX=armv7a-linux-androideabi16-clang++
CXXFLAGS += -mthumb -march=armv7-a -mfloat-abi=softfp -mfpu=neon
STRIP=arm-linux-androideabi-strip
endif
ifeq ($(arch),armv8)
comp=aarch64-linux-android21-clang
CXX=aarch64-linux-android21-clang++
STRIP=aarch64-linux-android-strip
endif
LDFLAGS += -static-libstdc++ -pie -lm -latomic
endif endif
### Travis CI script uses COMPILER to overwrite CXX ### Travis CI script uses COMPILER to overwrite CXX
@@ -383,16 +440,29 @@ ifdef COMPCXX
CXX=$(COMPCXX) CXX=$(COMPCXX)
endif endif
### Sometimes gcc is really clang
ifeq ($(COMP),gcc)
gccversion = $(shell $(CXX) --version)
gccisclang = $(findstring clang,$(gccversion))
ifneq ($(gccisclang),)
profile_make = clang-profile-make
profile_use = clang-profile-use
endif
endif
### On mingw use Windows threads, otherwise POSIX ### On mingw use Windows threads, otherwise POSIX
ifneq ($(comp),mingw) ifneq ($(comp),mingw)
CXXFLAGS += -DUSE_PTHREADS
# On Android Bionic's C library comes with its own pthread implementation bundled in # On Android Bionic's C library comes with its own pthread implementation bundled in
ifneq ($(OS),Android) ifneq ($(OS),Android)
# Haiku has pthreads in its libroot, so only link it in on other platforms # Haiku has pthreads in its libroot, so only link it in on other platforms
ifneq ($(KERNEL),Haiku) ifneq ($(KERNEL),Haiku)
ifneq ($(COMP),ndk)
LDFLAGS += -lpthread LDFLAGS += -lpthread
endif endif
endif endif
endif endif
endif
### 3.2.1 Debugging ### 3.2.1 Debugging
ifeq ($(debug),no) ifeq ($(debug),no)
@@ -434,7 +504,6 @@ endif
ifeq ($(prefetch),yes) ifeq ($(prefetch),yes)
ifeq ($(sse),yes) ifeq ($(sse),yes)
CXXFLAGS += -msse CXXFLAGS += -msse
DEPENDFLAGS += -msse
endif endif
else else
CXXFLAGS += -DNO_PREFETCH CXXFLAGS += -DNO_PREFETCH
@@ -442,7 +511,7 @@ endif
### 3.6 popcnt ### 3.6 popcnt
ifeq ($(popcnt),yes) ifeq ($(popcnt),yes)
ifeq ($(arch),$(filter $(arch),ppc64 armv8-a arm64)) ifeq ($(arch),$(filter $(arch),ppc64 armv7 armv8 arm64))
CXXFLAGS += -DUSE_POPCNT CXXFLAGS += -DUSE_POPCNT
else ifeq ($(comp),icc) else ifeq ($(comp),icc)
CXXFLAGS += -msse3 -DUSE_POPCNT CXXFLAGS += -msse3 -DUSE_POPCNT
@@ -451,6 +520,7 @@ ifeq ($(popcnt),yes)
endif endif
endif endif
ifeq ($(avx2),yes) ifeq ($(avx2),yes)
CXXFLAGS += -DUSE_AVX2 CXXFLAGS += -DUSE_AVX2
ifeq ($(comp),$(filter $(comp),gcc clang mingw)) ifeq ($(comp),$(filter $(comp),gcc clang mingw))
@@ -461,14 +531,21 @@ endif
ifeq ($(avx512),yes) ifeq ($(avx512),yes)
CXXFLAGS += -DUSE_AVX512 CXXFLAGS += -DUSE_AVX512
ifeq ($(comp),$(filter $(comp),gcc clang mingw)) ifeq ($(comp),$(filter $(comp),gcc clang mingw))
CXXFLAGS += -mavx512bw CXXFLAGS += -mavx512f -mavx512bw
endif endif
endif endif
ifeq ($(sse42),yes) ifeq ($(vnni256),yes)
CXXFLAGS += -DUSE_SSE42 CXXFLAGS += -DUSE_VNNI
ifeq ($(comp),$(filter $(comp),gcc clang mingw)) ifeq ($(comp),$(filter $(comp),gcc clang mingw))
CXXFLAGS += -msse4.2 CXXFLAGS += -mavx512f -mavx512bw -mavx512vnni -mavx512dq -mavx512vl -mprefer-vector-width=256
endif
endif
ifeq ($(vnni512),yes)
CXXFLAGS += -DUSE_VNNI
ifeq ($(comp),$(filter $(comp),gcc clang mingw))
CXXFLAGS += -mavx512vnni -mavx512dq -mavx512vl
endif endif
endif endif
@@ -486,19 +563,29 @@ ifeq ($(ssse3),yes)
endif endif
endif endif
ifeq ($(sse3),yes) ifeq ($(sse2),yes)
CXXFLAGS += -DUSE_SSE3 CXXFLAGS += -DUSE_SSE2
ifeq ($(comp),$(filter $(comp),gcc clang mingw)) ifeq ($(comp),$(filter $(comp),gcc clang mingw))
CXXFLAGS += -msse3 CXXFLAGS += -msse2
endif
endif
ifeq ($(mmx),yes)
CXXFLAGS += -DUSE_MMX
ifeq ($(comp),$(filter $(comp),gcc clang mingw))
CXXFLAGS += -mmmx
endif endif
endif endif
ifeq ($(neon),yes) ifeq ($(neon),yes)
CXXFLAGS += -DUSE_NEON CXXFLAGS += -DUSE_NEON
ifeq ($(KERNEL),Linux)
ifneq ($(COMP),ndk)
ifneq ($(arch),armv8)
CXXFLAGS += -mfpu=neon
endif
endif
endif endif
ifeq ($(arch),x86_64)
CXXFLAGS += -DUSE_SSE2
endif endif
### 3.7 pext ### 3.7 pext
@@ -514,7 +601,10 @@ endif
### needs access to the optimization flags. ### needs access to the optimization flags.
ifeq ($(optimize),yes) ifeq ($(optimize),yes)
ifeq ($(debug), no) ifeq ($(debug), no)
ifeq ($(comp),clang) ifeq ($(COMP),ndk)
CXXFLAGS += -flto=thin
LDFLAGS += $(CXXFLAGS)
else ifeq ($(comp),clang)
CXXFLAGS += -flto=thin CXXFLAGS += -flto=thin
LDFLAGS += $(CXXFLAGS) LDFLAGS += $(CXXFLAGS)
@@ -524,13 +614,18 @@ ifeq ($(debug), no)
ifeq ($(gccisclang),) ifeq ($(gccisclang),)
CXXFLAGS += -flto CXXFLAGS += -flto
LDFLAGS += $(CXXFLAGS) -flto=jobserver LDFLAGS += $(CXXFLAGS) -flto=jobserver
ifneq ($(findstring MINGW,$(KERNEL)),)
LDFLAGS += -save-temps
else ifneq ($(findstring MSYS,$(KERNEL)),)
LDFLAGS += -save-temps
endif
else else
CXXFLAGS += -flto=thin CXXFLAGS += -flto=thin
LDFLAGS += $(CXXFLAGS) LDFLAGS += $(CXXFLAGS)
endif endif
# To use LTO and static linking on windows, the tool chain requires a recent gcc: # To use LTO and static linking on windows, the tool chain requires a recent gcc:
# gcc version 10.1 in msys2 or TDM-GCC version 9.2 are know to work, older might not. # gcc version 10.1 in msys2 or TDM-GCC version 9.2 are known to work, older might not.
# So, only enable it for a cross from Linux by default. # So, only enable it for a cross from Linux by default.
else ifeq ($(comp),mingw) else ifeq ($(comp),mingw)
ifeq ($(KERNEL),Linux) ifeq ($(KERNEL),Linux)
@@ -552,6 +647,7 @@ endif
### Section 4. Public Targets ### Section 4. Public Targets
### ========================================================================== ### ==========================================================================
help: help:
@echo "" @echo ""
@echo "To compile stockfish, type: " @echo "To compile stockfish, type: "
@@ -560,31 +656,34 @@ help:
@echo "" @echo ""
@echo "Supported targets:" @echo "Supported targets:"
@echo "" @echo ""
@echo "help > Display architecture details"
@echo "build > Standard build" @echo "build > Standard build"
@echo "profile-build > Standard build with PGO" @echo "net > Download the default nnue net"
@echo "profile-build > Faster build (with profile-guided optimization)"
@echo "strip > Strip executable" @echo "strip > Strip executable"
@echo "install > Install executable" @echo "install > Install executable"
@echo "clean > Clean up" @echo "clean > Clean up"
@echo "net > Download the default nnue net"
@echo "" @echo ""
@echo "Supported archs:" @echo "Supported archs:"
@echo "" @echo ""
@echo "x86-64-vnni512 > x86 64-bit with vnni support 512bit wide"
@echo "x86-64-vnni256 > x86 64-bit with vnni support 256bit wide"
@echo "x86-64-avx512 > x86 64-bit with avx512 support" @echo "x86-64-avx512 > x86 64-bit with avx512 support"
@echo "x86-64-bmi2 > x86 64-bit with bmi2 support" @echo "x86-64-bmi2 > x86 64-bit with bmi2 support"
@echo "x86-64-avx2 > x86 64-bit with avx2 support" @echo "x86-64-avx2 > x86 64-bit with avx2 support"
@echo "x86-64-sse42 > x86 64-bit with sse42 support" @echo "x86-64-sse41-popcnt > x86 64-bit with sse41 and popcnt support"
@echo "x86-64-modern > x86 64-bit with sse41 support (x86-64-sse41)" @echo "x86-64-modern > common modern CPU, currently x86-64-sse41-popcnt"
@echo "x86-64-sse41 > x86 64-bit with sse41 support"
@echo "x86-64-ssse3 > x86 64-bit with ssse3 support" @echo "x86-64-ssse3 > x86 64-bit with ssse3 support"
@echo "x86-64-sse3-popcnt > x86 64-bit with sse3 and popcnt support" @echo "x86-64-sse3-popcnt > x86 64-bit with sse3 and popcnt support"
@echo "x86-64-sse3 > x86 64-bit with sse3 support" @echo "x86-64 > x86 64-bit generic (with sse2 support)"
@echo "x86-64 > x86 64-bit generic" @echo "x86-32-sse41-popcnt > x86 32-bit with sse41 and popcnt support"
@echo "x86-32 > x86 32-bit (also enables SSE)" @echo "x86-32-sse2 > x86 32-bit with sse2 support"
@echo "x86-32-old > x86 32-bit fall back for old hardware" @echo "x86-32 > x86 32-bit generic (with mmx and sse support)"
@echo "ppc-64 > PPC 64-bit" @echo "ppc-64 > PPC 64-bit"
@echo "ppc-32 > PPC 32-bit" @echo "ppc-32 > PPC 32-bit"
@echo "armv7 > ARMv7 32-bit" @echo "armv7 > ARMv7 32-bit"
@echo "armv8 > ARMv8 64-bit" @echo "armv7-neon > ARMv7 32-bit with popcnt and neon"
@echo "armv8 > ARMv8 64-bit with popcnt and neon"
@echo "apple-silicon > Apple silicon ARM64" @echo "apple-silicon > Apple silicon ARM64"
@echo "general-64 > unspecified 64-bit" @echo "general-64 > unspecified 64-bit"
@echo "general-32 > unspecified 32-bit" @echo "general-32 > unspecified 32-bit"
@@ -595,20 +694,26 @@ help:
@echo "mingw > Gnu compiler with MinGW under Windows" @echo "mingw > Gnu compiler with MinGW under Windows"
@echo "clang > LLVM Clang compiler" @echo "clang > LLVM Clang compiler"
@echo "icc > Intel compiler" @echo "icc > Intel compiler"
@echo "ndk > Google NDK to cross-compile for Android"
@echo "" @echo ""
@echo "Simple examples. If you don't know what to do, you likely want to run: " @echo "Simple examples. If you don't know what to do, you likely want to run: "
@echo "" @echo ""
@echo "make -j build ARCH=x86-64 (This is for 64-bit systems)" @echo "make -j build ARCH=x86-64 (A portable, slow compile for 64-bit systems)"
@echo "make -j build ARCH=x86-32 (This is for 32-bit systems)" @echo "make -j build ARCH=x86-32 (A portable, slow compile for 32-bit systems)"
@echo "" @echo ""
@echo "Advanced examples, for experienced users: " @echo "Advanced examples, for experienced users looking for performance: "
@echo "" @echo ""
@echo "make -j build ARCH=x86-64-modern COMP=clang" @echo "make help ARCH=x86-64-bmi2"
@echo "make -j profile-build ARCH=x86-64-bmi2 COMP=gcc COMPCXX=g++-4.8" @echo "make -j profile-build ARCH=x86-64-bmi2 COMP=gcc COMPCXX=g++-9.0"
@echo "" @echo "make -j build ARCH=x86-64-ssse3 COMP=clang"
@echo "The selected architecture $(ARCH) enables the following configuration: "
@echo "" @echo ""
@echo "-------------------------------"
ifeq ($(SUPPORTED_ARCH), true)
@echo "The selected architecture $(ARCH) will enable the following configuration: "
@$(MAKE) ARCH=$(ARCH) COMP=$(COMP) config-sanity @$(MAKE) ARCH=$(ARCH) COMP=$(COMP) config-sanity
else
@echo "Specify a supported architecture with the ARCH option for more details"
endif
.PHONY: help build profile-build strip install clean net objclean profileclean \ .PHONY: help build profile-build strip install clean net objclean profileclean \
@@ -618,7 +723,7 @@ help:
build: config-sanity build: config-sanity
$(MAKE) ARCH=$(ARCH) COMP=$(COMP) all $(MAKE) ARCH=$(ARCH) COMP=$(COMP) all
profile-build: config-sanity objclean profileclean profile-build: net config-sanity objclean profileclean
@echo "" @echo ""
@echo "Step 1/4. Building instrumented executable ..." @echo "Step 1/4. Building instrumented executable ..."
$(MAKE) ARCH=$(ARCH) COMP=$(COMP) $(profile_make) $(MAKE) ARCH=$(ARCH) COMP=$(COMP) $(profile_make)
@@ -634,7 +739,7 @@ profile-build: config-sanity objclean profileclean
$(MAKE) ARCH=$(ARCH) COMP=$(COMP) profileclean $(MAKE) ARCH=$(ARCH) COMP=$(COMP) profileclean
strip: strip:
strip $(EXE) $(STRIP) $(EXE)
install: install:
-mkdir -p -m 755 $(BINDIR) -mkdir -p -m 755 $(BINDIR)
@@ -649,17 +754,34 @@ net:
$(eval nnuenet := $(shell grep EvalFile ucioption.cpp | grep Option | sed 's/.*\(nn-[a-z0-9]\{12\}.nnue\).*/\1/')) $(eval nnuenet := $(shell grep EvalFile ucioption.cpp | grep Option | sed 's/.*\(nn-[a-z0-9]\{12\}.nnue\).*/\1/'))
@echo "Default net: $(nnuenet)" @echo "Default net: $(nnuenet)"
$(eval nnuedownloadurl := https://tests.stockfishchess.org/api/nn/$(nnuenet)) $(eval nnuedownloadurl := https://tests.stockfishchess.org/api/nn/$(nnuenet))
$(eval curl_or_wget := $(shell if hash curl 2>/dev/null; then echo "curl -sL"; elif hash wget 2>/dev/null; then echo "wget -qO-"; fi)) $(eval curl_or_wget := $(shell if hash curl 2>/dev/null; then echo "curl -skL"; elif hash wget 2>/dev/null; then echo "wget -qO-"; fi))
@if test -f "$(nnuenet)"; then echo "Already available."; else echo "Downloading $(nnuedownloadurl)"; $(curl_or_wget) $(nnuedownloadurl) > $(nnuenet); fi @if test -f "$(nnuenet)"; then \
echo "Already available."; \
else \
if [ "x$(curl_or_wget)" = "x" ]; then \
echo "Automatic download failed: neither curl nor wget is installed. Install one of these tools or download the net manually"; exit 1; \
else \
echo "Downloading $(nnuedownloadurl)"; $(curl_or_wget) $(nnuedownloadurl) > $(nnuenet);\
fi; \
fi;
$(eval shasum_command := $(shell if hash shasum 2>/dev/null; then echo "shasum -a 256 "; elif hash sha256sum 2>/dev/null; then echo "sha256sum "; fi))
@if [ "x$(shasum_command)" != "x" ]; then \
if [ "$(nnuenet)" != "nn-"`$(shasum_command) $(nnuenet) | cut -c1-12`".nnue" ]; then \
echo "Failed download or $(nnuenet) corrupted, please delete!"; exit 1; \
fi \
else \
echo "shasum / sha256sum not found, skipping net validation"; \
fi
# clean binaries and objects # clean binaries and objects
objclean: objclean:
@rm -f $(EXE) *.o ./syzygy/*.o ./learn/*.o ./extra/*.o ./eval/*.o ./nnue/*.o ./nnue/features/*.o @rm -f $(EXE) *.o ./syzygy/*.o ./nnue/*.o ./nnue/features/*.o ./learn/*.o ./extra/*.o ./eval/*.o
# clean auxiliary profiling files # clean auxiliary profiling files
profileclean: profileclean:
@rm -rf profdir @rm -rf profdir
@rm -f bench.txt *.gcda *.gcno ./syzygy/*.gcda ./learn/*.gcda ./extra/*.gcda ./eval/*.gcda ./nnue/*.gcda ./nnue/features/*.gcda @rm -f bench.txt *.gcda *.gcno ./syzygy/*.gcda ./nnue/*.gcda ./nnue/features/*.gcda *.s ./learn/*.gcda ./extra/*.gcda ./eval/*.gcda
@rm -f stockfish.profdata *.profraw @rm -f stockfish.profdata *.profraw
default: default:
@@ -683,14 +805,16 @@ config-sanity:
@echo "os: '$(OS)'" @echo "os: '$(OS)'"
@echo "prefetch: '$(prefetch)'" @echo "prefetch: '$(prefetch)'"
@echo "popcnt: '$(popcnt)'" @echo "popcnt: '$(popcnt)'"
@echo "pext: '$(pext)'"
@echo "sse: '$(sse)'" @echo "sse: '$(sse)'"
@echo "sse3: '$(sse3)'" @echo "mmx: '$(mmx)'"
@echo "sse2: '$(sse2)'"
@echo "ssse3: '$(ssse3)'" @echo "ssse3: '$(ssse3)'"
@echo "sse41: '$(sse41)'" @echo "sse41: '$(sse41)'"
@echo "sse42: '$(sse42)'"
@echo "avx2: '$(avx2)'" @echo "avx2: '$(avx2)'"
@echo "pext: '$(pext)'"
@echo "avx512: '$(avx512)'" @echo "avx512: '$(avx512)'"
@echo "vnni256: '$(vnni256)'"
@echo "vnni512: '$(vnni512)'"
@echo "neon: '$(neon)'" @echo "neon: '$(neon)'"
@echo "" @echo ""
@echo "Flags:" @echo "Flags:"
@@ -703,22 +827,26 @@ config-sanity:
@test "$(debug)" = "yes" || test "$(debug)" = "no" @test "$(debug)" = "yes" || test "$(debug)" = "no"
@test "$(sanitize)" = "undefined" || test "$(sanitize)" = "thread" || test "$(sanitize)" = "address" || test "$(sanitize)" = "no" @test "$(sanitize)" = "undefined" || test "$(sanitize)" = "thread" || test "$(sanitize)" = "address" || test "$(sanitize)" = "no"
@test "$(optimize)" = "yes" || test "$(optimize)" = "no" @test "$(optimize)" = "yes" || test "$(optimize)" = "no"
@test "$(SUPPORTED_ARCH)" = "true"
@test "$(arch)" = "any" || test "$(arch)" = "x86_64" || test "$(arch)" = "i386" || \ @test "$(arch)" = "any" || test "$(arch)" = "x86_64" || test "$(arch)" = "i386" || \
test "$(arch)" = "ppc64" || test "$(arch)" = "ppc" || \ test "$(arch)" = "ppc64" || test "$(arch)" = "ppc" || \
test "$(arch)" = "armv7" || test "$(arch)" = "armv8-a" || test "$(arch)" = "arm64" test "$(arch)" = "armv7" || test "$(arch)" = "armv8" || test "$(arch)" = "arm64"
@test "$(bits)" = "32" || test "$(bits)" = "64" @test "$(bits)" = "32" || test "$(bits)" = "64"
@test "$(prefetch)" = "yes" || test "$(prefetch)" = "no" @test "$(prefetch)" = "yes" || test "$(prefetch)" = "no"
@test "$(popcnt)" = "yes" || test "$(popcnt)" = "no" @test "$(popcnt)" = "yes" || test "$(popcnt)" = "no"
@test "$(pext)" = "yes" || test "$(pext)" = "no"
@test "$(sse)" = "yes" || test "$(sse)" = "no" @test "$(sse)" = "yes" || test "$(sse)" = "no"
@test "$(sse3)" = "yes" || test "$(sse3)" = "no" @test "$(mmx)" = "yes" || test "$(mmx)" = "no"
@test "$(sse2)" = "yes" || test "$(sse2)" = "no"
@test "$(ssse3)" = "yes" || test "$(ssse3)" = "no" @test "$(ssse3)" = "yes" || test "$(ssse3)" = "no"
@test "$(sse41)" = "yes" || test "$(sse41)" = "no" @test "$(sse41)" = "yes" || test "$(sse41)" = "no"
@test "$(sse42)" = "yes" || test "$(sse42)" = "no"
@test "$(avx2)" = "yes" || test "$(avx2)" = "no" @test "$(avx2)" = "yes" || test "$(avx2)" = "no"
@test "$(pext)" = "yes" || test "$(pext)" = "no"
@test "$(avx512)" = "yes" || test "$(avx512)" = "no" @test "$(avx512)" = "yes" || test "$(avx512)" = "no"
@test "$(vnni256)" = "yes" || test "$(vnni256)" = "no"
@test "$(vnni512)" = "yes" || test "$(vnni512)" = "no"
@test "$(neon)" = "yes" || test "$(neon)" = "no" @test "$(neon)" = "yes" || test "$(neon)" = "no"
@test "$(comp)" = "gcc" || test "$(comp)" = "icc" || test "$(comp)" = "mingw" || test "$(comp)" = "clang" @test "$(comp)" = "gcc" || test "$(comp)" = "icc" || test "$(comp)" = "mingw" || test "$(comp)" = "clang" \
|| test "$(comp)" = "armv7a-linux-androideabi16-clang" || test "$(comp)" = "aarch64-linux-android21-clang"
$(EXE): $(OBJS) $(EXE): $(OBJS)
+$(CXX) -o $@ $(OBJS) $(LDFLAGS) +$(CXX) -o $@ $(OBJS) $(LDFLAGS)
@@ -730,7 +858,7 @@ clang-profile-make:
all all
clang-profile-use: clang-profile-use:
llvm-profdata merge -output=stockfish.profdata *.profraw $(XCRUN) llvm-profdata merge -output=stockfish.profdata *.profraw
$(MAKE) ARCH=$(ARCH) COMP=$(COMP) \ $(MAKE) ARCH=$(ARCH) COMP=$(COMP) \
EXTRACXXFLAGS='-fprofile-instr-use=stockfish.profdata' \ EXTRACXXFLAGS='-fprofile-instr-use=stockfish.profdata' \
EXTRALDFLAGS='-fprofile-use ' \ EXTRALDFLAGS='-fprofile-use ' \
+11 -2
View File
@@ -95,8 +95,9 @@ const vector<string> Defaults = {
/// setup_bench() builds a list of UCI commands to be run by bench. There /// setup_bench() builds a list of UCI commands to be run by bench. There
/// are five parameters: TT size in MB, number of search threads that /// are five parameters: TT size in MB, number of search threads that
/// should be used, the limit value spent for each position, a file name /// should be used, the limit value spent for each position, a file name
/// where to look for positions in FEN format and the type of the limit: /// where to look for positions in FEN format, the type of the limit:
/// depth, perft, nodes and movetime (in millisecs). /// depth, perft, nodes and movetime (in millisecs), and evaluation type
/// mixed (default), classical, NNUE.
/// ///
/// bench -> search default positions up to depth 13 /// bench -> search default positions up to depth 13
/// bench 64 1 15 -> search default positions up to depth 15 (TT = 64MB) /// bench 64 1 15 -> search default positions up to depth 15 (TT = 64MB)
@@ -115,6 +116,7 @@ vector<string> setup_bench(const Position& current, istream& is) {
string limit = (is >> token) ? token : "13"; string limit = (is >> token) ? token : "13";
string fenFile = (is >> token) ? token : "default"; string fenFile = (is >> token) ? token : "default";
string limitType = (is >> token) ? token : "depth"; string limitType = (is >> token) ? token : "depth";
string evalType = (is >> token) ? token : "mixed";
go = limitType == "eval" ? "eval" : "go " + limitType + " " + limit; go = limitType == "eval" ? "eval" : "go " + limitType + " " + limit;
@@ -146,13 +148,20 @@ vector<string> setup_bench(const Position& current, istream& is) {
list.emplace_back("setoption name Hash value " + ttSize); list.emplace_back("setoption name Hash value " + ttSize);
list.emplace_back("ucinewgame"); list.emplace_back("ucinewgame");
size_t posCounter = 0;
for (const string& fen : fens) for (const string& fen : fens)
if (fen.find("setoption") != string::npos) if (fen.find("setoption") != string::npos)
list.emplace_back(fen); list.emplace_back(fen);
else else
{ {
if (evalType == "classical" || (evalType == "mixed" && posCounter % 2 == 0))
list.emplace_back("setoption name Use NNUE value false");
else if (evalType == "NNUE" || (evalType == "mixed" && posCounter % 2 != 0))
list.emplace_back("setoption name Use NNUE value true");
list.emplace_back("position fen " + fen); list.emplace_back("position fen " + fen);
list.emplace_back(go); list.emplace_back(go);
++posCounter;
} }
return list; return list;
+11 -1
View File
@@ -39,6 +39,16 @@ namespace {
Bitboard BishopTable[0x1480]; // To store bishop attacks Bitboard BishopTable[0x1480]; // To store bishop attacks
void init_magics(PieceType pt, Bitboard table[], Magic magics[]); void init_magics(PieceType pt, Bitboard table[], Magic magics[]);
}
/// safe_destination() returns the bitboard of target square for the given step
/// from the given square. If the step is off the board, returns empty bitboard.
inline Bitboard safe_destination(Square s, int step) {
Square to = Square(s + step);
return is_ok(to) && distance(s, to) <= 2 ? square_bb(to) : Bitboard(0);
} }
@@ -110,7 +120,7 @@ namespace {
Direction RookDirections[4] = {NORTH, SOUTH, EAST, WEST}; Direction RookDirections[4] = {NORTH, SOUTH, EAST, WEST};
Direction BishopDirections[4] = {NORTH_EAST, SOUTH_EAST, SOUTH_WEST, NORTH_WEST}; Direction BishopDirections[4] = {NORTH_EAST, SOUTH_EAST, SOUTH_WEST, NORTH_WEST};
for(Direction d : (pt == ROOK ? RookDirections : BishopDirections)) for (Direction d : (pt == ROOK ? RookDirections : BishopDirections))
{ {
Square s = sq; Square s = sq;
while(safe_destination(s, d) && !(occupied & s)) while(safe_destination(s, d) && !(occupied & s))
-10
View File
@@ -279,16 +279,6 @@ inline int edge_distance(File f) { return std::min(f, File(FILE_H - f)); }
inline int edge_distance(Rank r) { return std::min(r, Rank(RANK_8 - r)); } inline int edge_distance(Rank r) { return std::min(r, Rank(RANK_8 - r)); }
/// safe_destination() returns the bitboard of target square for the given step
/// from the given square. If the step is off the board, returns empty bitboard.
inline Bitboard safe_destination(Square s, int step)
{
Square to = Square(s + step);
return is_ok(to) && distance(s, to) <= 2 ? square_bb(to) : Bitboard(0);
}
/// attacks_bb(Square) returns the pseudo attacks of the give piece type /// attacks_bb(Square) returns the pseudo attacks of the give piece type
/// assuming an empty board. /// assuming an empty board.
+62 -52
View File
@@ -61,10 +61,11 @@ namespace Eval {
UCI::OptionsMap defaults; UCI::OptionsMap defaults;
UCI::init(defaults); UCI::init(defaults);
std::cerr << "NNUE evaluation used, but the network file " << eval_file << " was not loaded successfully. " sync_cout << "info string ERROR: NNUE evaluation used, but the network file " << eval_file << " was not loaded successfully." << sync_endl;
<< "These network evaluation parameters must be available, and compatible with this version of the code. " sync_cout << "info string ERROR: The UCI option EvalFile might need to specify the full path, including the directory/folder name, to the file." << sync_endl;
<< "The UCI option EvalFile might need to specify the full path, including the directory/folder name, to the file. " sync_cout << "info string ERROR: The default net can be downloaded from: https://tests.stockfishchess.org/api/nn/"+std::string(defaults["EvalFile"]) << sync_endl;
<< "The default net can be downloaded from: https://tests.stockfishchess.org/api/nn/"+std::string(defaults["EvalFile"]) << std::endl; sync_cout << "info string ERROR: If the UCI option Use NNUE is set to true, network evaluation parameters compatible with the program must be available." << sync_endl;
sync_cout << "info string ERROR: The engine will be terminated now." << sync_endl;
std::exit(EXIT_FAILURE); std::exit(EXIT_FAILURE);
} }
@@ -122,7 +123,8 @@ namespace {
constexpr Value LazyThreshold1 = Value(1400); constexpr Value LazyThreshold1 = Value(1400);
constexpr Value LazyThreshold2 = Value(1300); constexpr Value LazyThreshold2 = Value(1300);
constexpr Value SpaceThreshold = Value(12222); constexpr Value SpaceThreshold = Value(12222);
constexpr Value NNUEThreshold = Value(460); constexpr Value NNUEThreshold1 = Value(550);
constexpr Value NNUEThreshold2 = Value(150);
// KingAttackWeights[PieceType] contains king attack weights by piece type // KingAttackWeights[PieceType] contains king attack weights by piece type
constexpr int KingAttackWeights[PIECE_TYPE_NB] = { 0, 0, 81, 52, 44, 10 }; constexpr int KingAttackWeights[PIECE_TYPE_NB] = { 0, 0, 81, 52, 44, 10 };
@@ -294,8 +296,8 @@ namespace {
attackedBy2[Us] = dblAttackByPawn | (attackedBy[Us][KING] & attackedBy[Us][PAWN]); attackedBy2[Us] = dblAttackByPawn | (attackedBy[Us][KING] & attackedBy[Us][PAWN]);
// Init our king safety tables // Init our king safety tables
Square s = make_square(Utility::clamp(file_of(ksq), FILE_B, FILE_G), Square s = make_square(std::clamp(file_of(ksq), FILE_B, FILE_G),
Utility::clamp(rank_of(ksq), RANK_2, RANK_7)); std::clamp(rank_of(ksq), RANK_2, RANK_7));
kingRing[Us] = attacks_bb<KING>(s) | s; kingRing[Us] = attacks_bb<KING>(s) | s;
kingAttackersCount[Them] = popcount(kingRing[Us] & pe->pawn_attacks(Them)); kingAttackersCount[Them] = popcount(kingRing[Us] & pe->pawn_attacks(Them));
@@ -692,8 +694,8 @@ namespace {
Square blockSq = s + Up; Square blockSq = s + Up;
// Adjust bonus based on the king's proximity // Adjust bonus based on the king's proximity
bonus += make_score(0, ( (king_proximity(Them, blockSq) * 19) / 4 bonus += make_score(0, ( king_proximity(Them, blockSq) * 19 / 4
- king_proximity(Us, blockSq) * 2) * w); - king_proximity(Us, blockSq) * 2) * w);
// If blockSq is not the queening square then consider also a second push // If blockSq is not the queening square then consider also a second push
if (r != RANK_7) if (r != RANK_7)
@@ -737,7 +739,7 @@ namespace {
// Evaluation::space() computes a space evaluation for a given side, aiming to improve game // Evaluation::space() computes a space evaluation for a given side, aiming to improve game
// play in the opening. It is based on the number of safe squares on the 4 central files // play in the opening. It is based on the number of safe squares on the four central files
// on ranks 2 to 4. Completely safe squares behind a friendly pawn are counted twice. // on ranks 2 to 4. Completely safe squares behind a friendly pawn are counted twice.
// Finally, the space bonus is multiplied by a weight which decreases according to occupancy. // Finally, the space bonus is multiplied by a weight which decreases according to occupancy.
@@ -810,7 +812,7 @@ namespace {
// Now apply the bonus: note that we find the attacking side by extracting the // Now apply the bonus: note that we find the attacking side by extracting the
// sign of the midgame or endgame values, and that we carefully cap the bonus // sign of the midgame or endgame values, and that we carefully cap the bonus
// so that the midgame and endgame scores do not change sign after the bonus. // so that the midgame and endgame scores do not change sign after the bonus.
int u = ((mg > 0) - (mg < 0)) * Utility::clamp(complexity + 50, -abs(mg), 0); int u = ((mg > 0) - (mg < 0)) * std::clamp(complexity + 50, -abs(mg), 0);
int v = ((eg > 0) - (eg < 0)) * std::max(complexity, -abs(eg)); int v = ((eg > 0) - (eg < 0)) * std::max(complexity, -abs(eg));
mg += u; mg += u;
@@ -935,9 +937,6 @@ make_v:
// Side to move point of view // Side to move point of view
v = (pos.side_to_move() == WHITE ? v : -v) + Tempo; v = (pos.side_to_move() == WHITE ? v : -v) + Tempo;
// Damp down the evaluation linearly when shuffling
v = v * (100 - pos.rule50_count()) / 100;
return v; return v;
} }
@@ -954,14 +953,21 @@ Value Eval::evaluate(const Position& pos) {
} }
#endif #endif
if (Eval::useNNUE) bool classical = !Eval::useNNUE
{ || abs(eg_value(pos.psq_score())) * 16 > NNUEThreshold1 * (16 + pos.rule50_count());
Value v = eg_value(pos.psq_score()); Value v = classical ? Evaluation<NO_TRACE>(pos).value()
// Take NNUE eval only on balanced positions : NNUE::evaluate(pos) * 5 / 4 + Tempo;
if (abs(v) < NNUEThreshold + 20 * pos.count<PAWN>())
return NNUE::evaluate(pos) + Tempo; if (classical && Eval::useNNUE && abs(v) * 16 < NNUEThreshold2 * (16 + pos.rule50_count()))
} v = NNUE::evaluate(pos) * 5 / 4 + Tempo;
return Evaluation<NO_TRACE>(pos).value();
// Damp down the evaluation linearly when shuffling
v = v * (100 - pos.rule50_count()) / 100;
// Guarantee evaluation does not hit the tablebase range
v = std::clamp(v, VALUE_TB_LOSS_IN_MAX_PLY + 1, VALUE_TB_WIN_IN_MAX_PLY - 1);
return v;
} }
/// trace() is like evaluate(), but instead of returning a value, it returns /// trace() is like evaluate(), but instead of returning a value, it returns
@@ -979,42 +985,46 @@ std::string Eval::trace(const Position& pos) {
Value v; Value v;
if (Eval::useNNUE) std::memset(scores, 0, sizeof(scores));
{
v = NNUE::evaluate(pos);
}
else
{
std::memset(scores, 0, sizeof(scores));
pos.this_thread()->contempt = SCORE_ZERO; // Reset any dynamic contempt pos.this_thread()->contempt = SCORE_ZERO; // Reset any dynamic contempt
v = Evaluation<TRACE>(pos).value(); v = Evaluation<TRACE>(pos).value();
ss << std::showpoint << std::noshowpos << std::fixed << std::setprecision(2) ss << std::showpoint << std::noshowpos << std::fixed << std::setprecision(2)
<< " Term | White | Black | Total \n" << " Term | White | Black | Total \n"
<< " | MG EG | MG EG | MG EG \n" << " | MG EG | MG EG | MG EG \n"
<< " ------------+-------------+-------------+------------\n" << " ------------+-------------+-------------+------------\n"
<< " Material | " << Term(MATERIAL) << " Material | " << Term(MATERIAL)
<< " Imbalance | " << Term(IMBALANCE) << " Imbalance | " << Term(IMBALANCE)
<< " Pawns | " << Term(PAWN) << " Pawns | " << Term(PAWN)
<< " Knights | " << Term(KNIGHT) << " Knights | " << Term(KNIGHT)
<< " Bishops | " << Term(BISHOP) << " Bishops | " << Term(BISHOP)
<< " Rooks | " << Term(ROOK) << " Rooks | " << Term(ROOK)
<< " Queens | " << Term(QUEEN) << " Queens | " << Term(QUEEN)
<< " Mobility | " << Term(MOBILITY) << " Mobility | " << Term(MOBILITY)
<< " King safety | " << Term(KING) << " King safety | " << Term(KING)
<< " Threats | " << Term(THREAT) << " Threats | " << Term(THREAT)
<< " Passed | " << Term(PASSED) << " Passed | " << Term(PASSED)
<< " Space | " << Term(SPACE) << " Space | " << Term(SPACE)
<< " Winnable | " << Term(WINNABLE) << " Winnable | " << Term(WINNABLE)
<< " ------------+-------------+-------------+------------\n" << " ------------+-------------+-------------+------------\n"
<< " Total | " << Term(TOTAL); << " Total | " << Term(TOTAL);
}
v = pos.side_to_move() == WHITE ? v : -v; v = pos.side_to_move() == WHITE ? v : -v;
ss << "\nFinal evaluation: " << to_cp(v) << " (white side)\n"; ss << "\nClassical evaluation: " << to_cp(v) << " (white side)\n";
if (Eval::useNNUE)
{
v = NNUE::evaluate(pos);
v = pos.side_to_move() == WHITE ? v : -v;
ss << "\nNNUE evaluation: " << to_cp(v) << " (white side)\n";
}
v = evaluate(pos);
v = pos.side_to_move() == WHITE ? v : -v;
ss << "\nFinal evaluation: " << to_cp(v) << " (white side)\n";
return ss.str(); return ss.str();
} }
+1 -1
View File
@@ -130,7 +130,7 @@ Entry* probe(const Position& pos) {
Value npm_w = pos.non_pawn_material(WHITE); Value npm_w = pos.non_pawn_material(WHITE);
Value npm_b = pos.non_pawn_material(BLACK); Value npm_b = pos.non_pawn_material(BLACK);
Value npm = Utility::clamp(npm_w + npm_b, EndgameLimit, MidgameLimit); Value npm = std::clamp(npm_w + npm_b, EndgameLimit, MidgameLimit);
// Map total non-pawn material into [PHASE_ENDGAME, PHASE_MIDGAME] // Map total non-pawn material into [PHASE_ENDGAME, PHASE_MIDGAME]
e->gamePhase = Phase(((npm - EndgameLimit) * PHASE_MIDGAME) / (MidgameLimit - EndgameLimit)); e->gamePhase = Phase(((npm - EndgameLimit) * PHASE_MIDGAME) / (MidgameLimit - EndgameLimit));
+34 -16
View File
@@ -51,6 +51,11 @@ typedef bool(*fun3_t)(HANDLE, CONST GROUP_AFFINITY*, PGROUP_AFFINITY);
#include <sys/mman.h> #include <sys/mman.h>
#endif #endif
#if defined(__APPLE__) || defined(__ANDROID__) || defined(__OpenBSD__) || (defined(__GLIBCXX__) && !defined(_GLIBCXX_HAVE_ALIGNED_ALLOC) && !defined(_WIN32))
#define POSIXALIGNEDALLOC
#include <stdlib.h>
#endif
#include "misc.h" #include "misc.h"
#include "thread.h" #include "thread.h"
@@ -214,26 +219,33 @@ const std::string compiler_info() {
compiler += "\nCompilation settings include: "; compiler += "\nCompilation settings include: ";
compiler += (Is64Bit ? " 64bit" : " 32bit"); compiler += (Is64Bit ? " 64bit" : " 32bit");
#if defined(USE_VNNI)
compiler += " VNNI";
#endif
#if defined(USE_AVX512) #if defined(USE_AVX512)
compiler += " AVX512"; compiler += " AVX512";
#endif #endif
compiler += (HasPext ? " BMI2" : "");
#if defined(USE_AVX2) #if defined(USE_AVX2)
compiler += " AVX2"; compiler += " AVX2";
#endif #endif
#if defined(USE_SSE42)
compiler += " SSE42";
#endif
#if defined(USE_SSE41) #if defined(USE_SSE41)
compiler += " SSE41"; compiler += " SSE41";
#endif #endif
#if defined(USE_SSSE3) #if defined(USE_SSSE3)
compiler += " SSSE3"; compiler += " SSSE3";
#endif #endif
#if defined(USE_SSE3) #if defined(USE_SSE2)
compiler += " SSE3"; compiler += " SSE2";
#endif #endif
compiler += (HasPext ? " BMI2" : ""); compiler += (HasPopCnt ? " POPCNT" : "");
compiler += (HasPopCnt ? " POPCNT" : ""); #if defined(USE_MMX)
compiler += " MMX";
#endif
#if defined(USE_NEON)
compiler += " NEON";
#endif
#if !defined(NDEBUG) #if !defined(NDEBUG)
compiler += " DEBUG"; compiler += " DEBUG";
#endif #endif
@@ -316,14 +328,17 @@ void prefetch(void* addr) {
#endif #endif
/// Wrappers for systems where the c++17 implementation doesn't guarantee the availability of aligned_alloc.
/// Memory allocated with std_aligned_alloc must be freed with std_aligned_free. /// std_aligned_alloc() is our wrapper for systems where the c++17 implementation
/// /// does not guarantee the availability of aligned_alloc(). Memory allocated with
/// std_aligned_alloc() must be freed with std_aligned_free().
void* std_aligned_alloc(size_t alignment, size_t size) { void* std_aligned_alloc(size_t alignment, size_t size) {
#if (defined(__APPLE__) && defined(_LIBCPP_HAS_C11_FEATURES)) || defined(__ANDROID__) || defined(__OpenBSD__) || (defined(__GLIBCXX__) && !defined(_GLIBCXX_HAVE_ALIGNED_ALLOC) && !defined(_WIN32))
return aligned_alloc(alignment, size); #if defined(POSIXALIGNEDALLOC)
#elif (defined(_WIN32) || (defined(__APPLE__) && !defined(_LIBCPP_HAS_C11_FEATURES))) void *mem;
return posix_memalign(&mem, alignment, size) ? nullptr : mem;
#elif defined(_WIN32)
return _mm_malloc(size, alignment); return _mm_malloc(size, alignment);
#else #else
return std::aligned_alloc(alignment, size); return std::aligned_alloc(alignment, size);
@@ -331,16 +346,17 @@ void* std_aligned_alloc(size_t alignment, size_t size) {
} }
void std_aligned_free(void* ptr) { void std_aligned_free(void* ptr) {
#if (defined(__APPLE__) && defined(_LIBCPP_HAS_C11_FEATURES)) || defined(__ANDROID__) || defined(__OpenBSD__) || (defined(__GLIBCXX__) && !defined(_GLIBCXX_HAVE_ALIGNED_ALLOC) && !defined(_WIN32))
#if defined(POSIXALIGNEDALLOC)
free(ptr); free(ptr);
#elif (defined(_WIN32) || (defined(__APPLE__) && !defined(_LIBCPP_HAS_C11_FEATURES))) #elif defined(_WIN32)
_mm_free(ptr); _mm_free(ptr);
#else #else
free(ptr); free(ptr);
#endif #endif
} }
/// aligned_ttmem_alloc() will return suitably aligned memory, and if possible use large pages. /// aligned_ttmem_alloc() will return suitably aligned memory, if possible using large pages.
/// The returned pointer is the aligned one, while the mem argument is the one that needs /// The returned pointer is the aligned one, while the mem argument is the one that needs
/// to be passed to free. With c++17 some of this functionality could be simplified. /// to be passed to free. With c++17 some of this functionality could be simplified.
@@ -352,7 +368,9 @@ void* aligned_ttmem_alloc(size_t allocSize, void*& mem) {
size_t size = ((allocSize + alignment - 1) / alignment) * alignment; // multiple of alignment size_t size = ((allocSize + alignment - 1) / alignment) * alignment; // multiple of alignment
if (posix_memalign(&mem, alignment, size)) if (posix_memalign(&mem, alignment, size))
mem = nullptr; mem = nullptr;
#if defined(MADV_HUGEPAGE)
madvise(mem, allocSize, MADV_HUGEPAGE); madvise(mem, allocSize, MADV_HUGEPAGE);
#endif
return mem; return mem;
} }
-8
View File
@@ -67,14 +67,6 @@ std::ostream& operator<<(std::ostream&, SyncCout);
#define sync_cout std::cout << IO_LOCK #define sync_cout std::cout << IO_LOCK
#define sync_endl std::endl << IO_UNLOCK #define sync_endl std::endl << IO_UNLOCK
namespace Utility {
/// Clamp a value between lo and hi. Available in c++17.
template<class T> constexpr const T& clamp(const T& v, const T& lo, const T& hi) {
return v < lo ? lo : v > hi ? hi : v;
}
}
/// xorshift64star Pseudo-Random Number Generator /// xorshift64star Pseudo-Random Number Generator
/// This class is based on original code written and dedicated /// This class is based on original code written and dedicated
+1 -1
View File
@@ -248,7 +248,7 @@ namespace {
*moveList++ = make_move(ksq, pop_lsb(&b)); *moveList++ = make_move(ksq, pop_lsb(&b));
if ((Type != CAPTURES) && pos.can_castle(Us & ANY_CASTLING)) if ((Type != CAPTURES) && pos.can_castle(Us & ANY_CASTLING))
for(CastlingRights cr : { Us & KING_SIDE, Us & QUEEN_SIDE } ) for (CastlingRights cr : { Us & KING_SIDE, Us & QUEEN_SIDE } )
if (!pos.castling_impeded(cr) && pos.can_castle(cr)) if (!pos.castling_impeded(cr) && pos.can_castle(cr))
*moveList++ = make<CASTLING>(ksq, pos.castling_rook_square(cr)); *moveList++ = make<CASTLING>(ksq, pos.castling_rook_square(cr));
} }
+7 -7
View File
@@ -182,7 +182,7 @@ top:
--endMoves; --endMoves;
++stage; ++stage;
/* fallthrough */ [[fallthrough]];
case REFUTATION: case REFUTATION:
if (select<Next>([&](){ return *cur != MOVE_NONE if (select<Next>([&](){ return *cur != MOVE_NONE
@@ -190,7 +190,7 @@ top:
&& pos.pseudo_legal(*cur); })) && pos.pseudo_legal(*cur); }))
return *(cur - 1); return *(cur - 1);
++stage; ++stage;
/* fallthrough */ [[fallthrough]];
case QUIET_INIT: case QUIET_INIT:
if (!skipQuiets) if (!skipQuiets)
@@ -203,7 +203,7 @@ top:
} }
++stage; ++stage;
/* fallthrough */ [[fallthrough]];
case QUIET: case QUIET:
if ( !skipQuiets if ( !skipQuiets
@@ -217,7 +217,7 @@ top:
endMoves = endBadCaptures; endMoves = endBadCaptures;
++stage; ++stage;
/* fallthrough */ [[fallthrough]];
case BAD_CAPTURE: case BAD_CAPTURE:
return select<Next>([](){ return true; }); return select<Next>([](){ return true; });
@@ -228,7 +228,7 @@ top:
score<EVASIONS>(); score<EVASIONS>();
++stage; ++stage;
/* fallthrough */ [[fallthrough]];
case EVASION: case EVASION:
return select<Best>([](){ return true; }); return select<Best>([](){ return true; });
@@ -246,14 +246,14 @@ top:
return MOVE_NONE; return MOVE_NONE;
++stage; ++stage;
/* fallthrough */ [[fallthrough]];
case QCHECK_INIT: case QCHECK_INIT:
cur = moves; cur = moves;
endMoves = generate<QUIET_CHECKS>(pos, cur); endMoves = generate<QUIET_CHECKS>(pos, cur);
++stage; ++stage;
/* fallthrough */ [[fallthrough]];
case QCHECK: case QCHECK:
return select<Next>([](){ return true; }); return select<Next>([](){ return true; });
+3 -3
View File
@@ -86,9 +86,9 @@ enum StatsType { NoCaptures, Captures };
/// the move's from and to squares, see www.chessprogramming.org/Butterfly_Boards /// the move's from and to squares, see www.chessprogramming.org/Butterfly_Boards
typedef Stats<int16_t, 10692, COLOR_NB, int(SQUARE_NB) * int(SQUARE_NB)> ButterflyHistory; typedef Stats<int16_t, 10692, COLOR_NB, int(SQUARE_NB) * int(SQUARE_NB)> ButterflyHistory;
/// At higher depths LowPlyHistory records successful quiet moves near the root and quiet /// At higher depths LowPlyHistory records successful quiet moves near the root
/// moves which are/were in the PV (ttPv) /// and quiet moves which are/were in the PV (ttPv). It is cleared with each new
/// It is cleared with each new search and filled during iterative deepening /// search and filled during iterative deepening.
constexpr int MAX_LPH = 4; constexpr int MAX_LPH = 4;
typedef Stats<int16_t, 10692, MAX_LPH, int(SQUARE_NB) * int(SQUARE_NB)> LowPlyHistory; typedef Stats<int16_t, 10692, MAX_LPH, int(SQUARE_NB) * int(SQUARE_NB)> LowPlyHistory;
+29 -33
View File
@@ -29,30 +29,29 @@
#include "evaluate_nnue.h" #include "evaluate_nnue.h"
ExtPieceSquare kpp_board_index[PIECE_NB] = {
// convention: W - us, B - them
// viewed from other side, W and B are reversed
{ PS_NONE, PS_NONE },
{ PS_W_PAWN, PS_B_PAWN },
{ PS_W_KNIGHT, PS_B_KNIGHT },
{ PS_W_BISHOP, PS_B_BISHOP },
{ PS_W_ROOK, PS_B_ROOK },
{ PS_W_QUEEN, PS_B_QUEEN },
{ PS_W_KING, PS_B_KING },
{ PS_NONE, PS_NONE },
{ PS_NONE, PS_NONE },
{ PS_B_PAWN, PS_W_PAWN },
{ PS_B_KNIGHT, PS_W_KNIGHT },
{ PS_B_BISHOP, PS_W_BISHOP },
{ PS_B_ROOK, PS_W_ROOK },
{ PS_B_QUEEN, PS_W_QUEEN },
{ PS_B_KING, PS_W_KING },
{ PS_NONE, PS_NONE }
};
namespace Eval::NNUE { namespace Eval::NNUE {
uint32_t kpp_board_index[PIECE_NB][COLOR_NB] = {
// convention: W - us, B - them
// viewed from other side, W and B are reversed
{ PS_NONE, PS_NONE },
{ PS_W_PAWN, PS_B_PAWN },
{ PS_W_KNIGHT, PS_B_KNIGHT },
{ PS_W_BISHOP, PS_B_BISHOP },
{ PS_W_ROOK, PS_B_ROOK },
{ PS_W_QUEEN, PS_B_QUEEN },
{ PS_W_KING, PS_B_KING },
{ PS_NONE, PS_NONE },
{ PS_NONE, PS_NONE },
{ PS_B_PAWN, PS_W_PAWN },
{ PS_B_KNIGHT, PS_W_KNIGHT },
{ PS_B_BISHOP, PS_W_BISHOP },
{ PS_B_ROOK, PS_W_ROOK },
{ PS_B_QUEEN, PS_W_QUEEN },
{ PS_B_KING, PS_W_KING },
{ PS_NONE, PS_NONE }
};
// Input feature converter // Input feature converter
AlignedPtr<FeatureTransformer> feature_transformer; AlignedPtr<FeatureTransformer> feature_transformer;
@@ -86,7 +85,7 @@ namespace Eval::NNUE {
bool ReadParameters(std::istream& stream, const AlignedPtr<T>& pointer) { bool ReadParameters(std::istream& stream, const AlignedPtr<T>& pointer) {
std::uint32_t header; std::uint32_t header;
stream.read(reinterpret_cast<char*>(&header), sizeof(header)); header = read_little_endian<std::uint32_t>(stream);
if (!stream || header != T::GetHashValue()) return false; if (!stream || header != T::GetHashValue()) return false;
return pointer->ReadParameters(stream); return pointer->ReadParameters(stream);
} }
@@ -109,13 +108,13 @@ namespace Eval::NNUE {
} }
// Read network header // Read network header
bool ReadHeader(std::istream& stream, bool ReadHeader(std::istream& stream, std::uint32_t* hash_value, std::string* architecture)
std::uint32_t* hash_value, std::string* architecture) { {
std::uint32_t version, size; std::uint32_t version, size;
stream.read(reinterpret_cast<char*>(&version), sizeof(version));
stream.read(reinterpret_cast<char*>(hash_value), sizeof(*hash_value)); version = read_little_endian<std::uint32_t>(stream);
stream.read(reinterpret_cast<char*>(&size), sizeof(size)); *hash_value = read_little_endian<std::uint32_t>(stream);
size = read_little_endian<std::uint32_t>(stream);
if (!stream || version != kVersion) return false; if (!stream || version != kVersion) return false;
architecture->resize(size); architecture->resize(size);
stream.read(&(*architecture)[0], size); stream.read(&(*architecture)[0], size);
@@ -202,10 +201,7 @@ namespace Eval::NNUE {
// Evaluation function. Perform differential calculation. // Evaluation function. Perform differential calculation.
Value evaluate(const Position& pos) { Value evaluate(const Position& pos) {
Value v = ComputeScore(pos, false); return ComputeScore(pos, false);
v = Utility::clamp(v, VALUE_TB_LOSS_IN_MAX_PLY + 1, VALUE_TB_WIN_IN_MAX_PLY - 1);
return v;
} }
// Evaluation function. Perform full calculation. // Evaluation function. Perform full calculation.
+1 -2
View File
@@ -106,8 +106,7 @@ namespace Eval::NNUE::Features {
reset[perspective] = false; reset[perspective] = false;
switch (trigger) { switch (trigger) {
case TriggerEvent::kFriendKingMoved: case TriggerEvent::kFriendKingMoved:
reset[perspective] = reset[perspective] = dp.piece[0] == make_piece(perspective, KING);
dp.pieceId[0] == PIECE_ID_KING + perspective;
break; break;
default: default:
assert(false); assert(false);
+19 -39
View File
@@ -23,25 +23,17 @@
namespace Eval::NNUE::Features { namespace Eval::NNUE::Features {
// Find the index of the feature quantity from the king position and PieceSquare // Orient a square according to perspective (rotates by 180 for black)
template <Side AssociatedKing> inline Square orient(Color perspective, Square s) {
inline IndexType HalfKP<AssociatedKing>::MakeIndex(Square sq_k, PieceSquare p) { return Square(int(s) ^ (bool(perspective) * 63));
return static_cast<IndexType>(PS_END) * static_cast<IndexType>(sq_k) + p;
} }
// Get pieces information // Find the index of the feature quantity from the king position and PieceSquare
template <Side AssociatedKing> template <Side AssociatedKing>
inline void HalfKP<AssociatedKing>::GetPieces( inline IndexType HalfKP<AssociatedKing>::MakeIndex(
const Position& pos, Color perspective, Color perspective, Square s, Piece pc, Square ksq) {
PieceSquare** pieces, Square* sq_target_k) {
*pieces = (perspective == BLACK) ? return IndexType(orient(perspective, s) + kpp_board_index[pc][perspective] + PS_END * ksq);
pos.eval_list()->piece_list_fb() :
pos.eval_list()->piece_list_fw();
const PieceId target = (AssociatedKing == Side::kFriend) ?
static_cast<PieceId>(PIECE_ID_KING + perspective) :
static_cast<PieceId>(PIECE_ID_KING + ~perspective);
*sq_target_k = static_cast<Square>(((*pieces)[target] - PS_W_KING) % SQUARE_NB);
} }
// Get a list of indices for active features // Get a list of indices for active features
@@ -49,16 +41,11 @@ namespace Eval::NNUE::Features {
void HalfKP<AssociatedKing>::AppendActiveIndices( void HalfKP<AssociatedKing>::AppendActiveIndices(
const Position& pos, Color perspective, IndexList* active) { const Position& pos, Color perspective, IndexList* active) {
// Do nothing if array size is small to avoid compiler warning Square ksq = orient(perspective, pos.square<KING>(perspective));
if (RawFeatures::kMaxActiveDimensions < kMaxActiveDimensions) return; Bitboard bb = pos.pieces() & ~pos.pieces(KING);
while (bb) {
PieceSquare* pieces; Square s = pop_lsb(&bb);
Square sq_target_k; active->push_back(MakeIndex(perspective, s, pos.piece_on(s), ksq));
GetPieces(pos, perspective, &pieces, &sq_target_k);
for (PieceId i = PIECE_ID_ZERO; i < PIECE_ID_KING; ++i) {
if (pieces[i] != PS_NONE) {
active->push_back(MakeIndex(sq_target_k, pieces[i]));
}
} }
} }
@@ -68,22 +55,15 @@ namespace Eval::NNUE::Features {
const Position& pos, Color perspective, const Position& pos, Color perspective,
IndexList* removed, IndexList* added) { IndexList* removed, IndexList* added) {
PieceSquare* pieces; Square ksq = orient(perspective, pos.square<KING>(perspective));
Square sq_target_k;
GetPieces(pos, perspective, &pieces, &sq_target_k);
const auto& dp = pos.state()->dirtyPiece; const auto& dp = pos.state()->dirtyPiece;
for (int i = 0; i < dp.dirty_num; ++i) { for (int i = 0; i < dp.dirty_num; ++i) {
if (dp.pieceId[i] >= PIECE_ID_KING) continue; Piece pc = dp.piece[i];
const auto old_p = static_cast<PieceSquare>( if (type_of(pc) == KING) continue;
dp.old_piece[i].from[perspective]); if (dp.from[i] != SQ_NONE)
if (old_p != PS_NONE) { removed->push_back(MakeIndex(perspective, dp.from[i], pc, ksq));
removed->push_back(MakeIndex(sq_target_k, old_p)); if (dp.to[i] != SQ_NONE)
} added->push_back(MakeIndex(perspective, dp.to[i], pc, ksq));
const auto new_p = static_cast<PieceSquare>(
dp.new_piece[i].from[perspective]);
if (new_p != PS_NONE) {
added->push_back(MakeIndex(sq_target_k, new_p));
}
} }
} }
+3 -7
View File
@@ -41,7 +41,7 @@ namespace Eval::NNUE::Features {
static constexpr IndexType kDimensions = static constexpr IndexType kDimensions =
static_cast<IndexType>(SQUARE_NB) * static_cast<IndexType>(PS_END); static_cast<IndexType>(SQUARE_NB) * static_cast<IndexType>(PS_END);
// Maximum number of simultaneously active features // Maximum number of simultaneously active features
static constexpr IndexType kMaxActiveDimensions = PIECE_ID_KING; static constexpr IndexType kMaxActiveDimensions = 30; // Kings don't count
// Trigger for full calculation instead of difference calculation // Trigger for full calculation instead of difference calculation
static constexpr TriggerEvent kRefreshTrigger = TriggerEvent::kFriendKingMoved; static constexpr TriggerEvent kRefreshTrigger = TriggerEvent::kFriendKingMoved;
@@ -53,13 +53,9 @@ namespace Eval::NNUE::Features {
static void AppendChangedIndices(const Position& pos, Color perspective, static void AppendChangedIndices(const Position& pos, Color perspective,
IndexList* removed, IndexList* added); IndexList* removed, IndexList* added);
// Index of a feature for a given king position and another piece on some square
static IndexType MakeIndex(Square sq_k, PieceSquare p);
private: private:
// Get pieces information // Index of a feature for a given king position and another piece on some square
static void GetPieces(const Position& pos, Color perspective, static IndexType MakeIndex(Color perspective, Square s, Piece pc, Square sq_k);
PieceSquare** pieces, Square* sq_target_k);
}; };
} // namespace Eval::NNUE::Features } // namespace Eval::NNUE::Features
+106 -36
View File
@@ -70,11 +70,10 @@ namespace Eval::NNUE::Layers {
// Read network parameters // Read network parameters
bool ReadParameters(std::istream& stream) { bool ReadParameters(std::istream& stream) {
if (!previous_layer_.ReadParameters(stream)) return false; if (!previous_layer_.ReadParameters(stream)) return false;
stream.read(reinterpret_cast<char*>(biases_), for (std::size_t i = 0; i < kOutputDimensions; ++i)
kOutputDimensions * sizeof(BiasType)); biases_[i] = read_little_endian<BiasType>(stream);
stream.read(reinterpret_cast<char*>(weights_), for (std::size_t i = 0; i < kOutputDimensions * kPaddedInputDimensions; ++i)
kOutputDimensions * kPaddedInputDimensions * weights_[i] = read_little_endian<WeightType>(stream);
sizeof(WeightType));
return !stream.fail(); return !stream.fail();
} }
@@ -98,19 +97,32 @@ namespace Eval::NNUE::Layers {
#if defined(USE_AVX512) #if defined(USE_AVX512)
constexpr IndexType kNumChunks = kPaddedInputDimensions / (kSimdWidth * 2); constexpr IndexType kNumChunks = kPaddedInputDimensions / (kSimdWidth * 2);
const __m512i kOnes = _mm512_set1_epi16(1);
const auto input_vector = reinterpret_cast<const __m512i*>(input); const auto input_vector = reinterpret_cast<const __m512i*>(input);
#if !defined(USE_VNNI)
const __m512i kOnes = _mm512_set1_epi16(1);
#endif
#elif defined(USE_AVX2) #elif defined(USE_AVX2)
constexpr IndexType kNumChunks = kPaddedInputDimensions / kSimdWidth; constexpr IndexType kNumChunks = kPaddedInputDimensions / kSimdWidth;
const __m256i kOnes = _mm256_set1_epi16(1);
const auto input_vector = reinterpret_cast<const __m256i*>(input); const auto input_vector = reinterpret_cast<const __m256i*>(input);
#if !defined(USE_VNNI)
const __m256i kOnes = _mm256_set1_epi16(1);
#endif
#elif defined(USE_SSSE3) #elif defined(USE_SSE2)
constexpr IndexType kNumChunks = kPaddedInputDimensions / kSimdWidth; constexpr IndexType kNumChunks = kPaddedInputDimensions / kSimdWidth;
#ifndef USE_SSSE3
const __m128i kZeros = _mm_setzero_si128();
#else
const __m128i kOnes = _mm_set1_epi16(1); const __m128i kOnes = _mm_set1_epi16(1);
#endif
const auto input_vector = reinterpret_cast<const __m128i*>(input); const auto input_vector = reinterpret_cast<const __m128i*>(input);
#elif defined(USE_MMX)
constexpr IndexType kNumChunks = kPaddedInputDimensions / kSimdWidth;
const __m64 kZeros = _mm_setzero_si64();
const auto input_vector = reinterpret_cast<const __m64*>(input);
#elif defined(USE_NEON) #elif defined(USE_NEON)
constexpr IndexType kNumChunks = kPaddedInputDimensions / kSimdWidth; constexpr IndexType kNumChunks = kPaddedInputDimensions / kSimdWidth;
const auto input_vector = reinterpret_cast<const int8x8_t*>(input); const auto input_vector = reinterpret_cast<const int8x8_t*>(input);
@@ -123,60 +135,115 @@ namespace Eval::NNUE::Layers {
__m512i sum = _mm512_setzero_si512(); __m512i sum = _mm512_setzero_si512();
const auto row = reinterpret_cast<const __m512i*>(&weights_[offset]); const auto row = reinterpret_cast<const __m512i*>(&weights_[offset]);
for (IndexType j = 0; j < kNumChunks; ++j) { for (IndexType j = 0; j < kNumChunks; ++j) {
__m512i product = _mm512_maddubs_epi16( #if defined(USE_VNNI)
_mm512_load_si512(&input_vector[j]), _mm512_load_si512(&row[j])); sum = _mm512_dpbusd_epi32(sum, _mm512_loadA_si512(&input_vector[j]), _mm512_load_si512(&row[j]));
#else
__m512i product = _mm512_maddubs_epi16(_mm512_loadA_si512(&input_vector[j]), _mm512_load_si512(&row[j]));
product = _mm512_madd_epi16(product, kOnes); product = _mm512_madd_epi16(product, kOnes);
sum = _mm512_add_epi32(sum, product); sum = _mm512_add_epi32(sum, product);
#endif
} }
output[i] = _mm512_reduce_add_epi32(sum) + biases_[i];
// Note: Changing kMaxSimdWidth from 32 to 64 breaks loading existing networks. // Note: Changing kMaxSimdWidth from 32 to 64 breaks loading existing networks.
// As a result kPaddedInputDimensions may not be an even multiple of 64(512bit) // As a result kPaddedInputDimensions may not be an even multiple of 64(512bit)
// and we have to do one more 256bit chunk. // and we have to do one more 256bit chunk.
if (kPaddedInputDimensions != kNumChunks * kSimdWidth * 2) if (kPaddedInputDimensions != kNumChunks * kSimdWidth * 2)
{ {
const auto iv_256 = reinterpret_cast<const __m256i*>(input); const auto iv256 = reinterpret_cast<const __m256i*>(&input_vector[kNumChunks]);
const auto row_256 = reinterpret_cast<const __m256i*>(&weights_[offset]); const auto row256 = reinterpret_cast<const __m256i*>(&row[kNumChunks]);
int j = kNumChunks * 2; #if defined(USE_VNNI)
__m256i product256 = _mm256_dpbusd_epi32(
__m256i sum256 = _mm256_maddubs_epi16( _mm512_castsi512_si256(sum), _mm256_loadA_si256(&iv256[0]), _mm256_load_si256(&row256[0]));
_mm256_load_si256(&iv_256[j]), _mm256_load_si256(&row_256[j])); sum = _mm512_inserti32x8(sum, product256, 0);
sum256 = _mm256_madd_epi16(sum256, _mm256_set1_epi16(1)); #else
sum256 = _mm256_hadd_epi32(sum256, sum256); __m256i product256 = _mm256_maddubs_epi16(_mm256_loadA_si256(&iv256[0]), _mm256_load_si256(&row256[0]));
sum256 = _mm256_hadd_epi32(sum256, sum256); sum = _mm512_add_epi32(sum, _mm512_cvtepi16_epi32(product256));
const __m128i lo = _mm256_extracti128_si256(sum256, 0); #endif
const __m128i hi = _mm256_extracti128_si256(sum256, 1);
output[i] += _mm_cvtsi128_si32(lo) + _mm_cvtsi128_si32(hi);
} }
output[i] = _mm512_reduce_add_epi32(sum) + biases_[i];
#elif defined(USE_AVX2) #elif defined(USE_AVX2)
__m256i sum = _mm256_setzero_si256(); __m256i sum = _mm256_setzero_si256();
const auto row = reinterpret_cast<const __m256i*>(&weights_[offset]); const auto row = reinterpret_cast<const __m256i*>(&weights_[offset]);
for (IndexType j = 0; j < kNumChunks; ++j) { for (IndexType j = 0; j < kNumChunks; ++j) {
__m256i product = _mm256_maddubs_epi16( #if defined(USE_VNNI)
_mm256_load_si256(&input_vector[j]), _mm256_load_si256(&row[j])); sum = _mm256_dpbusd_epi32(sum, _mm256_loadA_si256(&input_vector[j]), _mm256_load_si256(&row[j]));
#else
__m256i product = _mm256_maddubs_epi16(_mm256_loadA_si256(&input_vector[j]), _mm256_load_si256(&row[j]));
product = _mm256_madd_epi16(product, kOnes); product = _mm256_madd_epi16(product, kOnes);
sum = _mm256_add_epi32(sum, product); sum = _mm256_add_epi32(sum, product);
#endif
} }
sum = _mm256_hadd_epi32(sum, sum); __m128i sum128 = _mm_add_epi32(_mm256_castsi256_si128(sum), _mm256_extracti128_si256(sum, 1));
sum = _mm256_hadd_epi32(sum, sum); sum128 = _mm_add_epi32(sum128, _mm_shuffle_epi32(sum128, _MM_PERM_BADC));
const __m128i lo = _mm256_extracti128_si256(sum, 0); sum128 = _mm_add_epi32(sum128, _mm_shuffle_epi32(sum128, _MM_PERM_CDAB));
const __m128i hi = _mm256_extracti128_si256(sum, 1); output[i] = _mm_cvtsi128_si32(sum128) + biases_[i];
output[i] = _mm_cvtsi128_si32(lo) + _mm_cvtsi128_si32(hi) + biases_[i];
#elif defined(USE_SSSE3) #elif defined(USE_SSSE3)
__m128i sum = _mm_cvtsi32_si128(biases_[i]); __m128i sum = _mm_setzero_si128();
const auto row = reinterpret_cast<const __m128i*>(&weights_[offset]); const auto row = reinterpret_cast<const __m128i*>(&weights_[offset]);
for (IndexType j = 0; j < kNumChunks; ++j) { for (int j = 0; j < (int)kNumChunks - 1; j += 2) {
__m128i product = _mm_maddubs_epi16( __m128i product0 = _mm_maddubs_epi16(_mm_load_si128(&input_vector[j]), _mm_load_si128(&row[j]));
_mm_load_si128(&input_vector[j]), _mm_load_si128(&row[j])); product0 = _mm_madd_epi16(product0, kOnes);
sum = _mm_add_epi32(sum, product0);
__m128i product1 = _mm_maddubs_epi16(_mm_load_si128(&input_vector[j+1]), _mm_load_si128(&row[j+1]));
product1 = _mm_madd_epi16(product1, kOnes);
sum = _mm_add_epi32(sum, product1);
}
if (kNumChunks & 0x1) {
__m128i product = _mm_maddubs_epi16(_mm_load_si128(&input_vector[kNumChunks-1]), _mm_load_si128(&row[kNumChunks-1]));
product = _mm_madd_epi16(product, kOnes); product = _mm_madd_epi16(product, kOnes);
sum = _mm_add_epi32(sum, product); sum = _mm_add_epi32(sum, product);
} }
sum = _mm_hadd_epi32(sum, sum); sum = _mm_add_epi32(sum, _mm_shuffle_epi32(sum, 0x4E)); //_MM_PERM_BADC
sum = _mm_hadd_epi32(sum, sum); sum = _mm_add_epi32(sum, _mm_shuffle_epi32(sum, 0xB1)); //_MM_PERM_CDAB
output[i] = _mm_cvtsi128_si32(sum) + biases_[i];
#elif defined(USE_SSE2)
__m128i sum_lo = _mm_cvtsi32_si128(biases_[i]);
__m128i sum_hi = kZeros;
const auto row = reinterpret_cast<const __m128i*>(&weights_[offset]);
for (IndexType j = 0; j < kNumChunks; ++j) {
__m128i row_j = _mm_load_si128(&row[j]);
__m128i input_j = _mm_load_si128(&input_vector[j]);
__m128i row_signs = _mm_cmpgt_epi8(kZeros, row_j);
__m128i extended_row_lo = _mm_unpacklo_epi8(row_j, row_signs);
__m128i extended_row_hi = _mm_unpackhi_epi8(row_j, row_signs);
__m128i extended_input_lo = _mm_unpacklo_epi8(input_j, kZeros);
__m128i extended_input_hi = _mm_unpackhi_epi8(input_j, kZeros);
__m128i product_lo = _mm_madd_epi16(extended_row_lo, extended_input_lo);
__m128i product_hi = _mm_madd_epi16(extended_row_hi, extended_input_hi);
sum_lo = _mm_add_epi32(sum_lo, product_lo);
sum_hi = _mm_add_epi32(sum_hi, product_hi);
}
__m128i sum = _mm_add_epi32(sum_lo, sum_hi);
__m128i sum_high_64 = _mm_shuffle_epi32(sum, _MM_SHUFFLE(1, 0, 3, 2));
sum = _mm_add_epi32(sum, sum_high_64);
__m128i sum_second_32 = _mm_shufflelo_epi16(sum, _MM_SHUFFLE(1, 0, 3, 2));
sum = _mm_add_epi32(sum, sum_second_32);
output[i] = _mm_cvtsi128_si32(sum); output[i] = _mm_cvtsi128_si32(sum);
#elif defined(USE_MMX)
__m64 sum_lo = _mm_cvtsi32_si64(biases_[i]);
__m64 sum_hi = kZeros;
const auto row = reinterpret_cast<const __m64*>(&weights_[offset]);
for (IndexType j = 0; j < kNumChunks; ++j) {
__m64 row_j = row[j];
__m64 input_j = input_vector[j];
__m64 row_signs = _mm_cmpgt_pi8(kZeros, row_j);
__m64 extended_row_lo = _mm_unpacklo_pi8(row_j, row_signs);
__m64 extended_row_hi = _mm_unpackhi_pi8(row_j, row_signs);
__m64 extended_input_lo = _mm_unpacklo_pi8(input_j, kZeros);
__m64 extended_input_hi = _mm_unpackhi_pi8(input_j, kZeros);
__m64 product_lo = _mm_madd_pi16(extended_row_lo, extended_input_lo);
__m64 product_hi = _mm_madd_pi16(extended_row_hi, extended_input_hi);
sum_lo = _mm_add_pi32(sum_lo, product_lo);
sum_hi = _mm_add_pi32(sum_hi, product_hi);
}
__m64 sum = _mm_add_pi32(sum_lo, sum_hi);
sum = _mm_add_pi32(sum, _mm_unpackhi_pi32(sum, sum));
output[i] = _mm_cvtsi64_si32(sum);
#elif defined(USE_NEON) #elif defined(USE_NEON)
int32x4_t sum = {biases_[i]}; int32x4_t sum = {biases_[i]};
const auto row = reinterpret_cast<const int8x8_t*>(&weights_[offset]); const auto row = reinterpret_cast<const int8x8_t*>(&weights_[offset]);
@@ -196,6 +263,9 @@ namespace Eval::NNUE::Layers {
#endif #endif
} }
#if defined(USE_MMX)
_mm_empty();
#endif
return output; return output;
} }
+24 -7
View File
@@ -86,18 +86,17 @@ namespace Eval::NNUE::Layers {
const auto out = reinterpret_cast<__m256i*>(output); const auto out = reinterpret_cast<__m256i*>(output);
for (IndexType i = 0; i < kNumChunks; ++i) { for (IndexType i = 0; i < kNumChunks; ++i) {
const __m256i words0 = _mm256_srai_epi16(_mm256_packs_epi32( const __m256i words0 = _mm256_srai_epi16(_mm256_packs_epi32(
_mm256_load_si256(&in[i * 4 + 0]), _mm256_loadA_si256(&in[i * 4 + 0]),
_mm256_load_si256(&in[i * 4 + 1])), kWeightScaleBits); _mm256_loadA_si256(&in[i * 4 + 1])), kWeightScaleBits);
const __m256i words1 = _mm256_srai_epi16(_mm256_packs_epi32( const __m256i words1 = _mm256_srai_epi16(_mm256_packs_epi32(
_mm256_load_si256(&in[i * 4 + 2]), _mm256_loadA_si256(&in[i * 4 + 2]),
_mm256_load_si256(&in[i * 4 + 3])), kWeightScaleBits); _mm256_loadA_si256(&in[i * 4 + 3])), kWeightScaleBits);
_mm256_store_si256( _mm256_storeA_si256(&out[i], _mm256_permutevar8x32_epi32(_mm256_max_epi8(
&out[i], _mm256_permutevar8x32_epi32(_mm256_max_epi8(
_mm256_packs_epi16(words0, words1), kZero), kOffsets)); _mm256_packs_epi16(words0, words1), kZero), kOffsets));
} }
constexpr IndexType kStart = kNumChunks * kSimdWidth; constexpr IndexType kStart = kNumChunks * kSimdWidth;
#elif defined(USE_SSSE3) #elif defined(USE_SSE2)
constexpr IndexType kNumChunks = kInputDimensions / kSimdWidth; constexpr IndexType kNumChunks = kInputDimensions / kSimdWidth;
#ifdef USE_SSE41 #ifdef USE_SSE41
@@ -128,6 +127,24 @@ namespace Eval::NNUE::Layers {
} }
constexpr IndexType kStart = kNumChunks * kSimdWidth; constexpr IndexType kStart = kNumChunks * kSimdWidth;
#elif defined(USE_MMX)
constexpr IndexType kNumChunks = kInputDimensions / kSimdWidth;
const __m64 k0x80s = _mm_set1_pi8(-128);
const auto in = reinterpret_cast<const __m64*>(input);
const auto out = reinterpret_cast<__m64*>(output);
for (IndexType i = 0; i < kNumChunks; ++i) {
const __m64 words0 = _mm_srai_pi16(
_mm_packs_pi32(in[i * 4 + 0], in[i * 4 + 1]),
kWeightScaleBits);
const __m64 words1 = _mm_srai_pi16(
_mm_packs_pi32(in[i * 4 + 2], in[i * 4 + 3]),
kWeightScaleBits);
const __m64 packedbytes = _mm_packs_pi16(words0, words1);
out[i] = _mm_subs_pi8(_mm_adds_pi8(packedbytes, k0x80s), k0x80s);
}
_mm_empty();
constexpr IndexType kStart = kNumChunks * kSimdWidth;
#elif defined(USE_NEON) #elif defined(USE_NEON)
constexpr IndexType kNumChunks = kInputDimensions / (kSimdWidth / 2); constexpr IndexType kNumChunks = kInputDimensions / (kSimdWidth / 2);
const int8x8_t kZero = {0}; const int8x8_t kZero = {0};
+1 -1
View File
@@ -26,7 +26,7 @@
namespace Eval::NNUE { namespace Eval::NNUE {
// Class that holds the result of affine transformation of input features // Class that holds the result of affine transformation of input features
struct alignas(32) Accumulator { struct alignas(kCacheLineSize) Accumulator {
std::int16_t std::int16_t
accumulation[2][kRefreshTriggers.size()][kTransformedFeatureDimensions]; accumulation[2][kRefreshTriggers.size()][kTransformedFeatureDimensions];
Value score; Value score;
+72 -1
View File
@@ -21,6 +21,9 @@
#ifndef NNUE_COMMON_H_INCLUDED #ifndef NNUE_COMMON_H_INCLUDED
#define NNUE_COMMON_H_INCLUDED #define NNUE_COMMON_H_INCLUDED
#include <cstring>
#include <iostream>
#if defined(USE_AVX2) #if defined(USE_AVX2)
#include <immintrin.h> #include <immintrin.h>
@@ -33,10 +36,36 @@
#elif defined(USE_SSE2) #elif defined(USE_SSE2)
#include <emmintrin.h> #include <emmintrin.h>
#elif defined(USE_MMX)
#include <mmintrin.h>
#elif defined(USE_NEON) #elif defined(USE_NEON)
#include <arm_neon.h> #include <arm_neon.h>
#endif #endif
// HACK: Use _mm256_loadu_si256() instead of _mm256_load_si256. Otherwise a binary
// compiled with older g++ crashes because the output memory is not aligned
// even though alignas is specified.
#if defined(USE_AVX2)
#if defined(__GNUC__ ) && (__GNUC__ < 9) && defined(_WIN32) && !defined(__clang__)
#define _mm256_loadA_si256 _mm256_loadu_si256
#define _mm256_storeA_si256 _mm256_storeu_si256
#else
#define _mm256_loadA_si256 _mm256_load_si256
#define _mm256_storeA_si256 _mm256_store_si256
#endif
#endif
#if defined(USE_AVX512)
#if defined(__GNUC__ ) && (__GNUC__ < 9) && defined(_WIN32) && !defined(__clang__)
#define _mm512_loadA_si512 _mm512_loadu_si512
#define _mm512_storeA_si512 _mm512_storeu_si512
#else
#define _mm512_loadA_si512 _mm512_load_si512
#define _mm512_storeA_si512 _mm512_store_si512
#endif
#endif
namespace Eval::NNUE { namespace Eval::NNUE {
// Version of the evaluation file // Version of the evaluation file
@@ -56,12 +85,36 @@ namespace Eval::NNUE {
#elif defined(USE_SSE2) #elif defined(USE_SSE2)
constexpr std::size_t kSimdWidth = 16; constexpr std::size_t kSimdWidth = 16;
#elif defined(USE_MMX)
constexpr std::size_t kSimdWidth = 8;
#elif defined(USE_NEON) #elif defined(USE_NEON)
constexpr std::size_t kSimdWidth = 16; constexpr std::size_t kSimdWidth = 16;
#endif #endif
constexpr std::size_t kMaxSimdWidth = 32; constexpr std::size_t kMaxSimdWidth = 32;
// unique number for each piece type on each square
enum {
PS_NONE = 0,
PS_W_PAWN = 1,
PS_B_PAWN = 1 * SQUARE_NB + 1,
PS_W_KNIGHT = 2 * SQUARE_NB + 1,
PS_B_KNIGHT = 3 * SQUARE_NB + 1,
PS_W_BISHOP = 4 * SQUARE_NB + 1,
PS_B_BISHOP = 5 * SQUARE_NB + 1,
PS_W_ROOK = 6 * SQUARE_NB + 1,
PS_B_ROOK = 7 * SQUARE_NB + 1,
PS_W_QUEEN = 8 * SQUARE_NB + 1,
PS_B_QUEEN = 9 * SQUARE_NB + 1,
PS_W_KING = 10 * SQUARE_NB + 1,
PS_END = PS_W_KING, // pieces without kings (pawns included)
PS_B_KING = 11 * SQUARE_NB + 1,
PS_END2 = 12 * SQUARE_NB + 1
};
extern uint32_t kpp_board_index[PIECE_NB][COLOR_NB];
// Type of input feature after conversion // Type of input feature after conversion
using TransformedFeatureType = std::uint8_t; using TransformedFeatureType = std::uint8_t;
using IndexType = std::uint32_t; using IndexType = std::uint32_t;
@@ -73,7 +126,25 @@ namespace Eval::NNUE {
// Round n up to be a multiple of base // Round n up to be a multiple of base
template <typename IntType> template <typename IntType>
constexpr IntType CeilToMultiple(IntType n, IntType base) { constexpr IntType CeilToMultiple(IntType n, IntType base) {
return (n + base - 1) / base * base; return (n + base - 1) / base * base;
}
// read_little_endian() is our utility to read an integer (signed or unsigned, any size)
// from a stream in little-endian order. We swap the byte order after the read if
// necessary to return a result with the byte ordering of the compiling machine.
template <typename IntType>
inline IntType read_little_endian(std::istream& stream) {
IntType result;
std::uint8_t u[sizeof(IntType)];
typename std::make_unsigned<IntType>::type v = 0;
stream.read(reinterpret_cast<char*>(u), sizeof(IntType));
for (std::size_t i = 0; i < sizeof(IntType); ++i)
v = (v << 8) | u[sizeof(IntType) - i - 1];
std::memcpy(&result, &v, sizeof(IntType));
return result;
} }
} // namespace Eval::NNUE } // namespace Eval::NNUE
+73 -22
View File
@@ -62,10 +62,10 @@ namespace Eval::NNUE {
// Read network parameters // Read network parameters
bool ReadParameters(std::istream& stream) { bool ReadParameters(std::istream& stream) {
stream.read(reinterpret_cast<char*>(biases_), for (std::size_t i = 0; i < kHalfDimensions; ++i)
kHalfDimensions * sizeof(BiasType)); biases_[i] = read_little_endian<BiasType>(stream);
stream.read(reinterpret_cast<char*>(weights_), for (std::size_t i = 0; i < kHalfDimensions * kInputDimensions; ++i)
kHalfDimensions * kInputDimensions * sizeof(WeightType)); weights_[i] = read_little_endian<WeightType>(stream);
return !stream.fail(); return !stream.fail();
} }
@@ -104,7 +104,7 @@ namespace Eval::NNUE {
constexpr int kControl = 0b11011000; constexpr int kControl = 0b11011000;
const __m256i kZero = _mm256_setzero_si256(); const __m256i kZero = _mm256_setzero_si256();
#elif defined(USE_SSSE3) #elif defined(USE_SSE2)
constexpr IndexType kNumChunks = kHalfDimensions / kSimdWidth; constexpr IndexType kNumChunks = kHalfDimensions / kSimdWidth;
#ifdef USE_SSE41 #ifdef USE_SSE41
@@ -113,6 +113,10 @@ namespace Eval::NNUE {
const __m128i k0x80s = _mm_set1_epi8(-128); const __m128i k0x80s = _mm_set1_epi8(-128);
#endif #endif
#elif defined(USE_MMX)
constexpr IndexType kNumChunks = kHalfDimensions / kSimdWidth;
const __m64 k0x80s = _mm_set1_pi8(-128);
#elif defined(USE_NEON) #elif defined(USE_NEON)
constexpr IndexType kNumChunks = kHalfDimensions / (kSimdWidth / 2); constexpr IndexType kNumChunks = kHalfDimensions / (kSimdWidth / 2);
const int8x8_t kZero = {0}; const int8x8_t kZero = {0};
@@ -125,17 +129,15 @@ namespace Eval::NNUE {
#if defined(USE_AVX2) #if defined(USE_AVX2)
auto out = reinterpret_cast<__m256i*>(&output[offset]); auto out = reinterpret_cast<__m256i*>(&output[offset]);
for (IndexType j = 0; j < kNumChunks; ++j) { for (IndexType j = 0; j < kNumChunks; ++j) {
__m256i sum0 = __m256i sum0 = _mm256_loadA_si256(
_mm256_load_si256(&reinterpret_cast<const __m256i*>( &reinterpret_cast<const __m256i*>(accumulation[perspectives[p]][0])[j * 2 + 0]);
accumulation[perspectives[p]][0])[j * 2 + 0]); __m256i sum1 = _mm256_loadA_si256(
__m256i sum1 = &reinterpret_cast<const __m256i*>(accumulation[perspectives[p]][0])[j * 2 + 1]);
_mm256_load_si256(&reinterpret_cast<const __m256i*>( _mm256_storeA_si256(&out[j], _mm256_permute4x64_epi64(_mm256_max_epi8(
accumulation[perspectives[p]][0])[j * 2 + 1]);
_mm256_store_si256(&out[j], _mm256_permute4x64_epi64(_mm256_max_epi8(
_mm256_packs_epi16(sum0, sum1), kZero), kControl)); _mm256_packs_epi16(sum0, sum1), kZero), kControl));
} }
#elif defined(USE_SSSE3) #elif defined(USE_SSE2)
auto out = reinterpret_cast<__m128i*>(&output[offset]); auto out = reinterpret_cast<__m128i*>(&output[offset]);
for (IndexType j = 0; j < kNumChunks; ++j) { for (IndexType j = 0; j < kNumChunks; ++j) {
__m128i sum0 = _mm_load_si128(&reinterpret_cast<const __m128i*>( __m128i sum0 = _mm_load_si128(&reinterpret_cast<const __m128i*>(
@@ -155,6 +157,17 @@ namespace Eval::NNUE {
); );
} }
#elif defined(USE_MMX)
auto out = reinterpret_cast<__m64*>(&output[offset]);
for (IndexType j = 0; j < kNumChunks; ++j) {
__m64 sum0 = *(&reinterpret_cast<const __m64*>(
accumulation[perspectives[p]][0])[j * 2 + 0]);
__m64 sum1 = *(&reinterpret_cast<const __m64*>(
accumulation[perspectives[p]][0])[j * 2 + 1]);
const __m64 packedbytes = _mm_packs_pi16(sum0, sum1);
out[j] = _mm_subs_pi8(_mm_adds_pi8(packedbytes, k0x80s), k0x80s);
}
#elif defined(USE_NEON) #elif defined(USE_NEON)
const auto out = reinterpret_cast<int8x8_t*>(&output[offset]); const auto out = reinterpret_cast<int8x8_t*>(&output[offset]);
for (IndexType j = 0; j < kNumChunks; ++j) { for (IndexType j = 0; j < kNumChunks; ++j) {
@@ -172,6 +185,9 @@ namespace Eval::NNUE {
#endif #endif
} }
#if defined(USE_MMX)
_mm_empty();
#endif
} }
private: private:
@@ -187,23 +203,37 @@ namespace Eval::NNUE {
kHalfDimensions * sizeof(BiasType)); kHalfDimensions * sizeof(BiasType));
for (const auto index : active_indices[perspective]) { for (const auto index : active_indices[perspective]) {
const IndexType offset = kHalfDimensions * index; const IndexType offset = kHalfDimensions * index;
#if defined(USE_AVX512)
auto accumulation = reinterpret_cast<__m512i*>(
&accumulator.accumulation[perspective][i][0]);
auto column = reinterpret_cast<const __m512i*>(&weights_[offset]);
constexpr IndexType kNumChunks = kHalfDimensions / kSimdWidth;
for (IndexType j = 0; j < kNumChunks; ++j)
_mm512_storeA_si512(&accumulation[j], _mm512_add_epi16(_mm512_loadA_si512(&accumulation[j]), column[j]));
#if defined(USE_AVX2) #elif defined(USE_AVX2)
auto accumulation = reinterpret_cast<__m256i*>( auto accumulation = reinterpret_cast<__m256i*>(
&accumulator.accumulation[perspective][i][0]); &accumulator.accumulation[perspective][i][0]);
auto column = reinterpret_cast<const __m256i*>(&weights_[offset]); auto column = reinterpret_cast<const __m256i*>(&weights_[offset]);
constexpr IndexType kNumChunks = kHalfDimensions / (kSimdWidth / 2); constexpr IndexType kNumChunks = kHalfDimensions / (kSimdWidth / 2);
for (IndexType j = 0; j < kNumChunks; ++j) { for (IndexType j = 0; j < kNumChunks; ++j)
accumulation[j] = _mm256_add_epi16(accumulation[j], column[j]); _mm256_storeA_si256(&accumulation[j], _mm256_add_epi16(_mm256_loadA_si256(&accumulation[j]), column[j]));
}
#elif defined(USE_SSE2) #elif defined(USE_SSE2)
auto accumulation = reinterpret_cast<__m128i*>( auto accumulation = reinterpret_cast<__m128i*>(
&accumulator.accumulation[perspective][i][0]); &accumulator.accumulation[perspective][i][0]);
auto column = reinterpret_cast<const __m128i*>(&weights_[offset]); auto column = reinterpret_cast<const __m128i*>(&weights_[offset]);
constexpr IndexType kNumChunks = kHalfDimensions / (kSimdWidth / 2); constexpr IndexType kNumChunks = kHalfDimensions / (kSimdWidth / 2);
for (IndexType j = 0; j < kNumChunks; ++j) { for (IndexType j = 0; j < kNumChunks; ++j)
accumulation[j] = _mm_add_epi16(accumulation[j], column[j]); accumulation[j] = _mm_add_epi16(accumulation[j], column[j]);
#elif defined(USE_MMX)
auto accumulation = reinterpret_cast<__m64*>(
&accumulator.accumulation[perspective][i][0]);
auto column = reinterpret_cast<const __m64*>(&weights_[offset]);
constexpr IndexType kNumChunks = kHalfDimensions / (kSimdWidth / 2);
for (IndexType j = 0; j < kNumChunks; ++j) {
accumulation[j] = _mm_add_pi16(accumulation[j], column[j]);
} }
#elif defined(USE_NEON) #elif defined(USE_NEON)
@@ -211,18 +241,19 @@ namespace Eval::NNUE {
&accumulator.accumulation[perspective][i][0]); &accumulator.accumulation[perspective][i][0]);
auto column = reinterpret_cast<const int16x8_t*>(&weights_[offset]); auto column = reinterpret_cast<const int16x8_t*>(&weights_[offset]);
constexpr IndexType kNumChunks = kHalfDimensions / (kSimdWidth / 2); constexpr IndexType kNumChunks = kHalfDimensions / (kSimdWidth / 2);
for (IndexType j = 0; j < kNumChunks; ++j) { for (IndexType j = 0; j < kNumChunks; ++j)
accumulation[j] = vaddq_s16(accumulation[j], column[j]); accumulation[j] = vaddq_s16(accumulation[j], column[j]);
}
#else #else
for (IndexType j = 0; j < kHalfDimensions; ++j) { for (IndexType j = 0; j < kHalfDimensions; ++j)
accumulator.accumulation[perspective][i][j] += weights_[offset + j]; accumulator.accumulation[perspective][i][j] += weights_[offset + j];
}
#endif #endif
} }
} }
#if defined(USE_MMX)
_mm_empty();
#endif
accumulator.computed_accumulation = true; accumulator.computed_accumulation = true;
accumulator.computed_score = false; accumulator.computed_score = false;
@@ -249,6 +280,11 @@ namespace Eval::NNUE {
auto accumulation = reinterpret_cast<__m128i*>( auto accumulation = reinterpret_cast<__m128i*>(
&accumulator.accumulation[perspective][i][0]); &accumulator.accumulation[perspective][i][0]);
#elif defined(USE_MMX)
constexpr IndexType kNumChunks = kHalfDimensions / (kSimdWidth / 2);
auto accumulation = reinterpret_cast<__m64*>(
&accumulator.accumulation[perspective][i][0]);
#elif defined(USE_NEON) #elif defined(USE_NEON)
constexpr IndexType kNumChunks = kHalfDimensions / (kSimdWidth / 2); constexpr IndexType kNumChunks = kHalfDimensions / (kSimdWidth / 2);
auto accumulation = reinterpret_cast<int16x8_t*>( auto accumulation = reinterpret_cast<int16x8_t*>(
@@ -278,6 +314,12 @@ namespace Eval::NNUE {
accumulation[j] = _mm_sub_epi16(accumulation[j], column[j]); accumulation[j] = _mm_sub_epi16(accumulation[j], column[j]);
} }
#elif defined(USE_MMX)
auto column = reinterpret_cast<const __m64*>(&weights_[offset]);
for (IndexType j = 0; j < kNumChunks; ++j) {
accumulation[j] = _mm_sub_pi16(accumulation[j], column[j]);
}
#elif defined(USE_NEON) #elif defined(USE_NEON)
auto column = reinterpret_cast<const int16x8_t*>(&weights_[offset]); auto column = reinterpret_cast<const int16x8_t*>(&weights_[offset]);
for (IndexType j = 0; j < kNumChunks; ++j) { for (IndexType j = 0; j < kNumChunks; ++j) {
@@ -309,6 +351,12 @@ namespace Eval::NNUE {
accumulation[j] = _mm_add_epi16(accumulation[j], column[j]); accumulation[j] = _mm_add_epi16(accumulation[j], column[j]);
} }
#elif defined(USE_MMX)
auto column = reinterpret_cast<const __m64*>(&weights_[offset]);
for (IndexType j = 0; j < kNumChunks; ++j) {
accumulation[j] = _mm_add_pi16(accumulation[j], column[j]);
}
#elif defined(USE_NEON) #elif defined(USE_NEON)
auto column = reinterpret_cast<const int16x8_t*>(&weights_[offset]); auto column = reinterpret_cast<const int16x8_t*>(&weights_[offset]);
for (IndexType j = 0; j < kNumChunks; ++j) { for (IndexType j = 0; j < kNumChunks; ++j) {
@@ -325,6 +373,9 @@ namespace Eval::NNUE {
} }
} }
} }
#if defined(USE_MMX)
_mm_empty();
#endif
accumulator.computed_accumulation = true; accumulator.computed_accumulation = true;
accumulator.computed_score = false; accumulator.computed_score = false;
+1 -1
View File
@@ -219,7 +219,7 @@ Score Entry::evaluate_shelter(const Position& pos, Square ksq) const {
Score bonus = make_score(5, 5); Score bonus = make_score(5, 5);
File center = Utility::clamp(file_of(ksq), FILE_B, FILE_G); File center = std::clamp(file_of(ksq), FILE_B, FILE_G);
for (File f = File(center - 1); f <= File(center + 1); ++f) for (File f = File(center - 1); f <= File(center + 1); ++f)
{ {
b = ourPawns & file_bb(f); b = ourPawns & file_bb(f);
+25 -74
View File
@@ -198,9 +198,6 @@ Position& Position::set(const string& fenStr, bool isChess960, StateInfo* si, Th
std::fill_n(&pieceList[0][0], sizeof(pieceList) / sizeof(Square), SQ_NONE); std::fill_n(&pieceList[0][0], sizeof(pieceList) / sizeof(Square), SQ_NONE);
st = si; st = si;
// Each piece on board gets a unique ID used to track the piece later
PieceId piece_id, next_piece_id = PIECE_ID_ZERO;
ss >> std::noskipws; ss >> std::noskipws;
// 1. Piece placement // 1. Piece placement
@@ -212,21 +209,8 @@ Position& Position::set(const string& fenStr, bool isChess960, StateInfo* si, Th
else if (token == '/') else if (token == '/')
sq += 2 * SOUTH; sq += 2 * SOUTH;
else if ((idx = PieceToChar.find(token)) != string::npos) else if ((idx = PieceToChar.find(token)) != string::npos) {
{ put_piece(Piece(idx), sq);
auto pc = Piece(idx);
put_piece(pc, sq);
if (Eval::useNNUE)
{
// Kings get a fixed ID, other pieces get ID in order of placement
piece_id =
(idx == W_KING) ? PIECE_ID_WKING :
(idx == B_KING) ? PIECE_ID_BKING :
next_piece_id++;
evalList.put_piece(piece_id, sq, pc);
}
++sq; ++sq;
} }
} }
@@ -721,8 +705,6 @@ void Position::do_move(Move m, StateInfo& newSt, bool givesCheck) {
// Used by NNUE // Used by NNUE
st->accumulator.computed_accumulation = false; st->accumulator.computed_accumulation = false;
st->accumulator.computed_score = false; st->accumulator.computed_score = false;
PieceId dp0 = PIECE_ID_NONE;
PieceId dp1 = PIECE_ID_NONE;
auto& dp = st->dirtyPiece; auto& dp = st->dirtyPiece;
dp.dirty_num = 1; dp.dirty_num = 1;
@@ -775,12 +757,10 @@ void Position::do_move(Move m, StateInfo& newSt, bool givesCheck) {
if (Eval::useNNUE) if (Eval::useNNUE)
{ {
dp.dirty_num = 2; // 2 pieces moved dp.dirty_num = 2; // 1 piece moved, 1 piece captured
dp1 = piece_id_on(capsq); dp.piece[1] = captured;
dp.pieceId[1] = dp1; dp.from[1] = capsq;
dp.old_piece[1] = evalList.piece_with_id(dp1); dp.to[1] = SQ_NONE;
evalList.put_piece(dp1, capsq, NO_PIECE);
dp.new_piece[1] = evalList.piece_with_id(dp1);
} }
// Update board and piece lists // Update board and piece lists
@@ -821,11 +801,9 @@ void Position::do_move(Move m, StateInfo& newSt, bool givesCheck) {
{ {
if (Eval::useNNUE) if (Eval::useNNUE)
{ {
dp0 = piece_id_on(from); dp.piece[0] = pc;
dp.pieceId[0] = dp0; dp.from[0] = from;
dp.old_piece[0] = evalList.piece_with_id(dp0); dp.to[0] = to;
evalList.put_piece(dp0, to, pc);
dp.new_piece[0] = evalList.piece_with_id(dp0);
} }
move_piece(from, to); move_piece(from, to);
@@ -854,9 +832,12 @@ void Position::do_move(Move m, StateInfo& newSt, bool givesCheck) {
if (Eval::useNNUE) if (Eval::useNNUE)
{ {
dp0 = piece_id_on(to); // Promoting pawn to SQ_NONE, promoted piece from SQ_NONE
evalList.put_piece(dp0, to, promotion); dp.to[0] = SQ_NONE;
dp.new_piece[0] = evalList.piece_with_id(dp0); dp.piece[dp.dirty_num] = promotion;
dp.from[dp.dirty_num] = SQ_NONE;
dp.to[dp.dirty_num] = to;
dp.dirty_num++;
} }
// Update hash keys // Update hash keys
@@ -950,12 +931,6 @@ void Position::undo_move(Move m) {
{ {
move_piece(to, from); // Put the piece back at the source square move_piece(to, from); // Put the piece back at the source square
if (Eval::useNNUE)
{
PieceId dp0 = st->dirtyPiece.pieceId[0];
evalList.put_piece(dp0, from, pc);
}
if (st->capturedPiece) if (st->capturedPiece)
{ {
Square capsq = to; Square capsq = to;
@@ -972,14 +947,6 @@ void Position::undo_move(Move m) {
} }
put_piece(st->capturedPiece, capsq); // Restore the captured piece put_piece(st->capturedPiece, capsq); // Restore the captured piece
if (Eval::useNNUE)
{
PieceId dp1 = st->dirtyPiece.pieceId[1];
assert(evalList.piece_with_id(dp1).from[WHITE] == PS_NONE);
assert(evalList.piece_with_id(dp1).from[BLACK] == PS_NONE);
evalList.put_piece(dp1, capsq, st->capturedPiece);
}
} }
} }
@@ -1001,32 +968,16 @@ void Position::do_castling(Color us, Square from, Square& to, Square& rfrom, Squ
rto = relative_square(us, kingSide ? SQ_F1 : SQ_D1); rto = relative_square(us, kingSide ? SQ_F1 : SQ_D1);
to = relative_square(us, kingSide ? SQ_G1 : SQ_C1); to = relative_square(us, kingSide ? SQ_G1 : SQ_C1);
if (Eval::useNNUE) if (Do && Eval::useNNUE)
{ {
PieceId dp0, dp1;
auto& dp = st->dirtyPiece; auto& dp = st->dirtyPiece;
dp.dirty_num = 2; // 2 pieces moved dp.piece[0] = make_piece(us, KING);
dp.from[0] = from;
if (Do) dp.to[0] = to;
{ dp.piece[1] = make_piece(us, ROOK);
dp0 = piece_id_on(from); dp.from[1] = rfrom;
dp1 = piece_id_on(rfrom); dp.to[1] = rto;
dp.pieceId[0] = dp0; dp.dirty_num = 2;
dp.old_piece[0] = evalList.piece_with_id(dp0);
evalList.put_piece(dp0, to, make_piece(us, KING));
dp.new_piece[0] = evalList.piece_with_id(dp0);
dp.pieceId[1] = dp1;
dp.old_piece[1] = evalList.piece_with_id(dp1);
evalList.put_piece(dp1, rto, make_piece(us, ROOK));
dp.new_piece[1] = evalList.piece_with_id(dp1);
}
else
{
dp0 = piece_id_on(to);
dp1 = piece_id_on(rto);
evalList.put_piece(dp0, from, make_piece(us, KING));
evalList.put_piece(dp1, rfrom, make_piece(us, ROOK));
}
} }
// Remove both pieces first since squares could overlap in Chess960 // Remove both pieces first since squares could overlap in Chess960
@@ -1145,8 +1096,8 @@ bool Position::see_ge(Move m, Value threshold) const {
// Don't allow pinned pieces to attack (except the king) as long as // Don't allow pinned pieces to attack (except the king) as long as
// there are pinners on their original square. // there are pinners on their original square.
if (st->pinners[~stm] & occupied) if (pinners(~stm) & occupied)
stmAttackers &= ~st->blockersForKing[stm]; stmAttackers &= ~blockers_for_king(stm);
if (!stmAttackers) if (!stmAttackers)
break; break;
+5 -23
View File
@@ -116,6 +116,7 @@ public:
Bitboard checkers() const; Bitboard checkers() const;
Bitboard blockers_for_king(Color c) const; Bitboard blockers_for_king(Color c) const;
Bitboard check_squares(PieceType pt) const; Bitboard check_squares(PieceType pt) const;
Bitboard pinners(Color c) const;
bool is_discovery_check_on_king(Color c, Move m) const; bool is_discovery_check_on_king(Color c, Move m) const;
// Attacks to/from a given square // Attacks to/from a given square
@@ -173,7 +174,6 @@ public:
// Used by NNUE // Used by NNUE
StateInfo* state() const; StateInfo* state() const;
const EvalList* eval_list() const;
#if defined(EVAL_LEARN) #if defined(EVAL_LEARN)
// --sfenization helper // --sfenization helper
@@ -208,9 +208,6 @@ private:
template<bool Do> template<bool Do>
void do_castling(Color us, Square from, Square& to, Square& rfrom, Square& rto); void do_castling(Color us, Square from, Square& to, Square& rfrom, Square& rto);
// ID of a piece on a given square
PieceId piece_id_on(Square sq) const;
// Data members // Data members
Piece board[SQUARE_NB]; Piece board[SQUARE_NB];
Bitboard byTypeBB[PIECE_TYPE_NB]; Bitboard byTypeBB[PIECE_TYPE_NB];
@@ -227,9 +224,6 @@ private:
Thread* thisThread; Thread* thisThread;
StateInfo* st; StateInfo* st;
bool chess960; bool chess960;
// List of pieces used in NNUE evaluation function
EvalList evalList;
}; };
namespace PSQT { namespace PSQT {
@@ -332,6 +326,10 @@ inline Bitboard Position::blockers_for_king(Color c) const {
return st->blockersForKing[c]; return st->blockersForKing[c];
} }
inline Bitboard Position::pinners(Color c) const {
return st->pinners[c];
}
inline Bitboard Position::check_squares(PieceType pt) const { inline Bitboard Position::check_squares(PieceType pt) const {
return st->checkSquares[pt]; return st->checkSquares[pt];
} }
@@ -469,20 +467,4 @@ inline StateInfo* Position::state() const {
return st; return st;
} }
inline const EvalList* Position::eval_list() const {
return &evalList;
}
inline PieceId Position::piece_id_on(Square sq) const
{
assert(piece_on(sq) != NO_PIECE);
PieceId pid = evalList.piece_id_list[sq];
assert(is_ok(pid));
return pid;
}
#endif // #ifndef POSITION_H_INCLUDED #endif // #ifndef POSITION_H_INCLUDED
+76 -77
View File
@@ -63,9 +63,9 @@ namespace {
constexpr uint64_t TtHitAverageResolution = 1024; constexpr uint64_t TtHitAverageResolution = 1024;
// Razor and futility margins // Razor and futility margins
constexpr int RazorMargin = 527; constexpr int RazorMargin = 510;
Value futility_margin(Depth d, bool improving) { Value futility_margin(Depth d, bool improving) {
return Value(227 * (d - improving)); return Value(223 * (d - improving));
} }
bool training; bool training;
@@ -75,7 +75,7 @@ namespace {
Depth reduction(bool i, Depth d, int mn) { Depth reduction(bool i, Depth d, int mn) {
int r = Reductions[d] * Reductions[mn]; int r = Reductions[d] * Reductions[mn];
return (r + 570) / 1024 + (!i && r > 1018); return (r + 509) / 1024 + (!i && r > 894);
} }
constexpr int futility_move_count(bool improving, Depth depth) { constexpr int futility_move_count(bool improving, Depth depth) {
@@ -84,7 +84,7 @@ namespace {
// History and stats update bonus, based on depth // History and stats update bonus, based on depth
int stat_bonus(Depth d) { int stat_bonus(Depth d) {
return d > 15 ? 27 : 17 * d * d + 133 * d - 134; return d > 13 ? 29 : 17 * d * d + 134 * d - 134;
} }
// Add a small random component to draw evaluations to avoid 3fold-blindness // Add a small random component to draw evaluations to avoid 3fold-blindness
@@ -194,7 +194,7 @@ namespace {
void Search::init() { void Search::init() {
for (int i = 1; i < MAX_MOVES; ++i) for (int i = 1; i < MAX_MOVES; ++i)
Reductions[i] = int((24.8 + std::log(Threads.size())) * std::log(i)); Reductions[i] = int((22.0 + std::log(Threads.size())) * std::log(i));
training = Options["Training"]; training = Options["Training"];
} }
@@ -339,7 +339,7 @@ void Thread::search() {
// for match (TC 60+0.6) results spanning a wide range of k values. // for match (TC 60+0.6) results spanning a wide range of k values.
PRNG rng(now()); PRNG rng(now());
double floatLevel = Options["UCI_LimitStrength"] ? double floatLevel = Options["UCI_LimitStrength"] ?
Utility::clamp(std::pow((Options["UCI_Elo"] - 1346.6) / 143.4, 1 / 0.806), 0.0, 20.0) : std::clamp(std::pow((Options["UCI_Elo"] - 1346.6) / 143.4, 1 / 0.806), 0.0, 20.0) :
double(Options["Skill Level"]); double(Options["Skill Level"]);
int intLevel = int(floatLevel) + int intLevel = int(floatLevel) +
((floatLevel - int(floatLevel)) * 1024 > rng.rand<unsigned>() % 1024 ? 1 : 0); ((floatLevel - int(floatLevel)) * 1024 > rng.rand<unsigned>() % 1024 ? 1 : 0);
@@ -407,12 +407,12 @@ void Thread::search() {
if (rootDepth >= 4) if (rootDepth >= 4)
{ {
Value prev = rootMoves[pvIdx].previousScore; Value prev = rootMoves[pvIdx].previousScore;
delta = Value(19); delta = Value(17);
alpha = std::max(prev - delta,-VALUE_INFINITE); alpha = std::max(prev - delta,-VALUE_INFINITE);
beta = std::min(prev + delta, VALUE_INFINITE); beta = std::min(prev + delta, VALUE_INFINITE);
// Adjust contempt based on root move's previousScore (dynamic contempt) // Adjust contempt based on root move's previousScore (dynamic contempt)
int dct = ct + (110 - ct / 2) * prev / (abs(prev) + 140); int dct = ct + (105 - ct / 2) * prev / (abs(prev) + 149);
contempt = (us == WHITE ? make_score(dct, dct / 2) contempt = (us == WHITE ? make_score(dct, dct / 2)
: -make_score(dct, dct / 2)); : -make_score(dct, dct / 2));
@@ -510,13 +510,13 @@ void Thread::search() {
&& !Threads.stop && !Threads.stop
&& !mainThread->stopOnPonderhit) && !mainThread->stopOnPonderhit)
{ {
double fallingEval = (296 + 6 * (mainThread->bestPreviousScore - bestValue) double fallingEval = (318 + 6 * (mainThread->bestPreviousScore - bestValue)
+ 6 * (mainThread->iterValue[iterIdx] - bestValue)) / 725.0; + 6 * (mainThread->iterValue[iterIdx] - bestValue)) / 825.0;
fallingEval = Utility::clamp(fallingEval, 0.5, 1.5); fallingEval = std::clamp(fallingEval, 0.5, 1.5);
// If the bestMove is stable over several iterations, reduce time accordingly // If the bestMove is stable over several iterations, reduce time accordingly
timeReduction = lastBestMoveDepth + 10 < completedDepth ? 1.92 : 0.95; timeReduction = lastBestMoveDepth + 9 < completedDepth ? 1.92 : 0.95;
double reduction = (1.47 + mainThread->previousTimeReduction) / (2.22 * timeReduction); double reduction = (1.47 + mainThread->previousTimeReduction) / (2.32 * timeReduction);
// Use part of the gained time from a previous stable move for the current move // Use part of the gained time from a previous stable move for the current move
for (Thread* th : Threads) for (Thread* th : Threads)
@@ -541,7 +541,7 @@ void Thread::search() {
} }
else if ( Threads.increaseDepth else if ( Threads.increaseDepth
&& !mainThread->ponder && !mainThread->ponder
&& Time.elapsed() > totalTime * 0.56) && Time.elapsed() > totalTime * 0.58)
Threads.increaseDepth = false; Threads.increaseDepth = false;
else else
Threads.increaseDepth = true; Threads.increaseDepth = true;
@@ -600,7 +600,7 @@ namespace {
Key posKey; Key posKey;
Move ttMove, move, excludedMove, bestMove; Move ttMove, move, excludedMove, bestMove;
Depth extension, newDepth; Depth extension, newDepth;
Value bestValue, value, ttValue, eval, maxValue, probcutBeta; Value bestValue, value, ttValue, eval, maxValue, probCutBeta;
bool ttHit, ttPv, formerPv, givesCheck, improving, didLMR, priorCapture; bool ttHit, ttPv, formerPv, givesCheck, improving, didLMR, priorCapture;
bool captureOrPromotion, doFullDepthSearch, moveCountPruning, bool captureOrPromotion, doFullDepthSearch, moveCountPruning,
ttCapture, singularQuietLMR; ttCapture, singularQuietLMR;
@@ -798,11 +798,7 @@ namespace {
else else
{ {
if ((ss-1)->currentMove != MOVE_NULL) if ((ss-1)->currentMove != MOVE_NULL)
{ ss->staticEval = eval = evaluate(pos);
int bonus = -(ss-1)->statScore / 512;
ss->staticEval = eval = evaluate(pos) + bonus;
}
else else
ss->staticEval = eval = -(ss-1)->staticEval + 2 * Tempo; ss->staticEval = eval = -(ss-1)->staticEval + 2 * Tempo;
@@ -815,8 +811,9 @@ namespace {
&& eval <= alpha - RazorMargin) && eval <= alpha - RazorMargin)
return qsearch<NT>(pos, ss, alpha, beta); return qsearch<NT>(pos, ss, alpha, beta);
improving = (ss-2)->staticEval == VALUE_NONE ? (ss->staticEval > (ss-4)->staticEval improving = (ss-2)->staticEval == VALUE_NONE
|| (ss-4)->staticEval == VALUE_NONE) : ss->staticEval > (ss-2)->staticEval; ? ss->staticEval > (ss-4)->staticEval || (ss-4)->staticEval == VALUE_NONE
: ss->staticEval > (ss-2)->staticEval;
// Step 8. Futility pruning: child node (~50 Elo) // Step 8. Futility pruning: child node (~50 Elo)
if ( !PvNode if ( !PvNode
@@ -828,10 +825,10 @@ namespace {
// Step 9. Null move search with verification search (~40 Elo) // Step 9. Null move search with verification search (~40 Elo)
if ( !PvNode if ( !PvNode
&& (ss-1)->currentMove != MOVE_NULL && (ss-1)->currentMove != MOVE_NULL
&& (ss-1)->statScore < 23824 && (ss-1)->statScore < 22977
&& eval >= beta && eval >= beta
&& eval >= ss->staticEval && eval >= ss->staticEval
&& ss->staticEval >= beta - 28 * depth - 28 * improving + 94 * ttPv + 200 && ss->staticEval >= beta - 30 * depth - 28 * improving + 84 * ttPv + 182
&& !excludedMove && !excludedMove
&& pos.non_pawn_material(us) && pos.non_pawn_material(us)
&& (ss->ply >= thisThread->nmpMinPly || us != thisThread->nmpColor)) && (ss->ply >= thisThread->nmpMinPly || us != thisThread->nmpColor))
@@ -839,7 +836,7 @@ namespace {
assert(eval - beta >= 0); assert(eval - beta >= 0);
// Null move dynamic reduction based on depth and value // Null move dynamic reduction based on depth and value
Depth R = (737 + 77 * depth) / 246 + std::min(int(eval - beta) / 192, 3); Depth R = (817 + 71 * depth) / 213 + std::min(int(eval - beta) / 192, 3);
ss->currentMove = MOVE_NULL; ss->currentMove = MOVE_NULL;
ss->continuationHistory = &thisThread->continuationHistory[0][0][NO_PIECE][0]; ss->continuationHistory = &thisThread->continuationHistory[0][0][NO_PIECE][0];
@@ -875,7 +872,7 @@ namespace {
} }
} }
probcutBeta = beta + 176 - 49 * improving; probCutBeta = beta + 176 - 49 * improving;
// Step 10. ProbCut (~10 Elo) // Step 10. ProbCut (~10 Elo)
// If we have a good enough capture and a reduced search returns a value // If we have a good enough capture and a reduced search returns a value
@@ -883,21 +880,27 @@ namespace {
if ( !PvNode if ( !PvNode
&& depth > 4 && depth > 4
&& abs(beta) < VALUE_TB_WIN_IN_MAX_PLY && abs(beta) < VALUE_TB_WIN_IN_MAX_PLY
// if value from transposition table is lower than probCutBeta, don't attempt probCut
// there and in further interactions with transposition table cutoff depth is set to depth - 3
// because probCut search has depth set to depth - 4 but we also do a move before it
// so effective depth is equal to depth - 3
&& !( ttHit && !( ttHit
&& tte->depth() >= depth - 3 && tte->depth() >= depth - 3
&& ttValue != VALUE_NONE && ttValue != VALUE_NONE
&& ttValue < probcutBeta)) && ttValue < probCutBeta))
{ {
// if ttMove is a capture and value from transposition table is good enough produce probCut
// cutoff without digging into actual probCut search
if ( ttHit if ( ttHit
&& tte->depth() >= depth - 3 && tte->depth() >= depth - 3
&& ttValue != VALUE_NONE && ttValue != VALUE_NONE
&& ttValue >= probcutBeta && ttValue >= probCutBeta
&& ttMove && ttMove
&& pos.capture_or_promotion(ttMove)) && pos.capture_or_promotion(ttMove))
return probcutBeta; return probCutBeta;
assert(probcutBeta < VALUE_INFINITE); assert(probCutBeta < VALUE_INFINITE);
MovePicker mp(pos, ttMove, probcutBeta - ss->staticEval, &captureHistory); MovePicker mp(pos, ttMove, probCutBeta - ss->staticEval, &captureHistory);
int probCutCount = 0; int probCutCount = 0;
while ( (move = mp.next_move()) != MOVE_NONE while ( (move = mp.next_move()) != MOVE_NONE
@@ -919,16 +922,17 @@ namespace {
pos.do_move(move, st); pos.do_move(move, st);
// Perform a preliminary qsearch to verify that the move holds // Perform a preliminary qsearch to verify that the move holds
value = -qsearch<NonPV>(pos, ss+1, -probcutBeta, -probcutBeta+1); value = -qsearch<NonPV>(pos, ss+1, -probCutBeta, -probCutBeta+1);
// If the qsearch held, perform the regular search // If the qsearch held, perform the regular search
if (value >= probcutBeta) if (value >= probCutBeta)
value = -search<NonPV>(pos, ss+1, -probcutBeta, -probcutBeta+1, depth - 4, !cutNode); value = -search<NonPV>(pos, ss+1, -probCutBeta, -probCutBeta+1, depth - 4, !cutNode);
pos.undo_move(move); pos.undo_move(move);
if (value >= probcutBeta) if (value >= probCutBeta)
{ {
// if transposition table doesn't have equal or more deep info write probCut data into it
if ( !(ttHit if ( !(ttHit
&& tte->depth() >= depth - 3 && tte->depth() >= depth - 3
&& ttValue != VALUE_NONE)) && ttValue != VALUE_NONE))
@@ -940,16 +944,6 @@ namespace {
} }
} }
// Step 11. Internal iterative deepening (~1 Elo)
if (depth >= 7 && !ttMove)
{
search<NT>(pos, ss, alpha, beta, depth - 7, cutNode);
tte = TT.probe(posKey, ttHit);
ttValue = ttHit ? value_from_tt(tte->value(), ss->ply, pos.rule50_count()) : VALUE_NONE;
ttMove = ttHit ? tte->move() : MOVE_NONE;
}
moves_loop: // When in check, search starts from here moves_loop: // When in check, search starts from here
const PieceToHistory* contHist[] = { (ss-1)->continuationHistory, (ss-2)->continuationHistory, const PieceToHistory* contHist[] = { (ss-1)->continuationHistory, (ss-2)->continuationHistory,
@@ -973,7 +967,7 @@ moves_loop: // When in check, search starts from here
// Mark this node as being searched // Mark this node as being searched
ThreadHolding th(thisThread, posKey, ss->ply); ThreadHolding th(thisThread, posKey, ss->ply);
// Step 12. Loop through all pseudo-legal moves until no moves remain // Step 11. Loop through all pseudo-legal moves until no moves remain
// or a beta cutoff occurs. // or a beta cutoff occurs.
while ((move = mp.next_move(moveCountPruning)) != MOVE_NONE) while ((move = mp.next_move(moveCountPruning)) != MOVE_NONE)
{ {
@@ -1015,7 +1009,7 @@ moves_loop: // When in check, search starts from here
// Calculate new depth for this move // Calculate new depth for this move
newDepth = depth - 1; newDepth = depth - 1;
// Step 13. Pruning at shallow depth (~200 Elo) // Step 12. Pruning at shallow depth (~200 Elo)
if ( !rootNode if ( !rootNode
&& !(training && PvNode) && !(training && PvNode)
&& pos.non_pawn_material(us) && pos.non_pawn_material(us)
@@ -1037,17 +1031,17 @@ moves_loop: // When in check, search starts from here
continue; continue;
// Futility pruning: parent node (~5 Elo) // Futility pruning: parent node (~5 Elo)
if ( lmrDepth < 8 if ( lmrDepth < 7
&& !ss->inCheck && !ss->inCheck
&& ss->staticEval + 284 + 188 * lmrDepth <= alpha && ss->staticEval + 283 + 170 * lmrDepth <= alpha
&& (*contHist[0])[movedPiece][to_sq(move)] && (*contHist[0])[movedPiece][to_sq(move)]
+ (*contHist[1])[movedPiece][to_sq(move)] + (*contHist[1])[movedPiece][to_sq(move)]
+ (*contHist[3])[movedPiece][to_sq(move)] + (*contHist[3])[movedPiece][to_sq(move)]
+ (*contHist[5])[movedPiece][to_sq(move)] / 2 < 28388) + (*contHist[5])[movedPiece][to_sq(move)] / 2 < 27376)
continue; continue;
// Prune moves with negative SEE (~20 Elo) // Prune moves with negative SEE (~20 Elo)
if (!pos.see_ge(move, Value(-(29 - std::min(lmrDepth, 17)) * lmrDepth * lmrDepth))) if (!pos.see_ge(move, Value(-(29 - std::min(lmrDepth, 18)) * lmrDepth * lmrDepth)))
continue; continue;
} }
else else
@@ -1064,17 +1058,17 @@ moves_loop: // When in check, search starts from here
&& !(PvNode && abs(bestValue) < 2) && !(PvNode && abs(bestValue) < 2)
&& PieceValue[MG][type_of(movedPiece)] >= PieceValue[MG][type_of(pos.piece_on(to_sq(move)))] && PieceValue[MG][type_of(movedPiece)] >= PieceValue[MG][type_of(pos.piece_on(to_sq(move)))]
&& !ss->inCheck && !ss->inCheck
&& ss->staticEval + 178 + 261 * lmrDepth && ss->staticEval + 169 + 244 * lmrDepth
+ PieceValue[MG][type_of(pos.piece_on(to_sq(move)))] <= alpha) + PieceValue[MG][type_of(pos.piece_on(to_sq(move)))] <= alpha)
continue; continue;
// See based pruning // See based pruning
if (!pos.see_ge(move, Value(-202) * depth)) // (~25 Elo) if (!pos.see_ge(move, Value(-221) * depth)) // (~25 Elo)
continue; continue;
} }
} }
// Step 14. Extensions (~75 Elo) // Step 13. Extensions (~75 Elo)
// Singular extension search (~70 Elo). If all moves but one fail low on a // Singular extension search (~70 Elo). If all moves but one fail low on a
// search of (alpha-s, beta-s), and just one fails high on (alpha, beta), // search of (alpha-s, beta-s), and just one fails high on (alpha, beta),
@@ -1128,19 +1122,14 @@ moves_loop: // When in check, search starts from here
&& (pos.is_discovery_check_on_king(~us, move) || pos.see_ge(move))) && (pos.is_discovery_check_on_king(~us, move) || pos.see_ge(move)))
extension = 1; extension = 1;
// Passed pawn extension
else if ( move == ss->killers[0]
&& pos.advanced_pawn_push(move)
&& pos.pawn_passed(us, to_sq(move)))
extension = 1;
// Last captures extension // Last captures extension
else if ( PieceValue[EG][pos.captured_piece()] > PawnValueEg else if ( PieceValue[EG][pos.captured_piece()] > PawnValueEg
&& pos.non_pawn_material() <= 2 * RookValueMg) && pos.non_pawn_material() <= 2 * RookValueMg)
extension = 1; extension = 1;
// Castling extension // Castling extension
if (type_of(move) == CASTLING) if ( type_of(move) == CASTLING
&& popcount(pos.pieces(us) & ~pos.pieces(PAWN) & (to_sq(move) & KingSide ? KingSide : QueenSide)) <= 2)
extension = 1; extension = 1;
// Late irreversible move extension // Late irreversible move extension
@@ -1162,10 +1151,10 @@ moves_loop: // When in check, search starts from here
[movedPiece] [movedPiece]
[to_sq(move)]; [to_sq(move)];
// Step 15. Make the move // Step 14. Make the move
pos.do_move(move, st, givesCheck); pos.do_move(move, st, givesCheck);
// Step 16. Reduced depth search (LMR, ~200 Elo). If the move fails high it will be // Step 15. Reduced depth search (LMR, ~200 Elo). If the move fails high it will be
// re-searched at full depth. // re-searched at full depth.
if ( depth >= 3 if ( depth >= 3
&& moveCount > 1 + 2 * rootNode + 2 * (PvNode && abs(bestValue) < 2) && moveCount > 1 + 2 * rootNode + 2 * (PvNode && abs(bestValue) < 2)
@@ -1174,7 +1163,7 @@ moves_loop: // When in check, search starts from here
|| moveCountPruning || moveCountPruning
|| ss->staticEval + PieceValue[EG][pos.captured_piece()] <= alpha || ss->staticEval + PieceValue[EG][pos.captured_piece()] <= alpha
|| cutNode || cutNode
|| thisThread->ttHitAverage < 415 * TtHitAverageResolution * TtHitAverageWindow / 1024)) || thisThread->ttHitAverage < 427 * TtHitAverageResolution * TtHitAverageWindow / 1024))
{ {
Depth r = reduction(improving, depth, moveCount); Depth r = reduction(improving, depth, moveCount);
@@ -1186,7 +1175,7 @@ moves_loop: // When in check, search starts from here
r--; r--;
// Decrease reduction if the ttHit running average is large // Decrease reduction if the ttHit running average is large
if (thisThread->ttHitAverage > 473 * TtHitAverageResolution * TtHitAverageWindow / 1024) if (thisThread->ttHitAverage > 509 * TtHitAverageResolution * TtHitAverageWindow / 1024)
r--; r--;
// Reduction if other threads are searching this position // Reduction if other threads are searching this position
@@ -1229,17 +1218,17 @@ moves_loop: // When in check, search starts from here
+ (*contHist[0])[movedPiece][to_sq(move)] + (*contHist[0])[movedPiece][to_sq(move)]
+ (*contHist[1])[movedPiece][to_sq(move)] + (*contHist[1])[movedPiece][to_sq(move)]
+ (*contHist[3])[movedPiece][to_sq(move)] + (*contHist[3])[movedPiece][to_sq(move)]
- 4826; - 5287;
// Decrease/increase reduction by comparing opponent's stat score (~10 Elo) // Decrease/increase reduction by comparing opponent's stat score (~10 Elo)
if (ss->statScore >= -100 && (ss-1)->statScore < -112) if (ss->statScore >= -106 && (ss-1)->statScore < -104)
r--; r--;
else if ((ss-1)->statScore >= -125 && ss->statScore < -138) else if ((ss-1)->statScore >= -119 && ss->statScore < -140)
r++; r++;
// Decrease/increase reduction for moves with a good/bad history (~30 Elo) // Decrease/increase reduction for moves with a good/bad history (~30 Elo)
r -= ss->statScore / 14615; r -= ss->statScore / 14884;
} }
else else
{ {
@@ -1249,11 +1238,11 @@ moves_loop: // When in check, search starts from here
// Unless giving check, this capture is likely bad // Unless giving check, this capture is likely bad
if ( !givesCheck if ( !givesCheck
&& ss->staticEval + PieceValue[EG][pos.captured_piece()] + 211 * depth <= alpha) && ss->staticEval + PieceValue[EG][pos.captured_piece()] + 213 * depth <= alpha)
r++; r++;
} }
Depth d = Utility::clamp(newDepth - r, 1, newDepth); Depth d = std::clamp(newDepth - r, 1, newDepth);
value = -search<NonPV>(pos, ss+1, -(alpha+1), -alpha, d, true); value = -search<NonPV>(pos, ss+1, -(alpha+1), -alpha, d, true);
@@ -1268,7 +1257,7 @@ moves_loop: // When in check, search starts from here
didLMR = false; didLMR = false;
} }
// Step 17. Full depth search when LMR is skipped or fails high // Step 16. Full depth search when LMR is skipped or fails high
if (doFullDepthSearch) if (doFullDepthSearch)
{ {
value = -search<NonPV>(pos, ss+1, -(alpha+1), -alpha, newDepth, !cutNode); value = -search<NonPV>(pos, ss+1, -(alpha+1), -alpha, newDepth, !cutNode);
@@ -1296,12 +1285,12 @@ moves_loop: // When in check, search starts from here
value = -search<PV>(pos, ss+1, -beta, -alpha, newDepth, false); value = -search<PV>(pos, ss+1, -beta, -alpha, newDepth, false);
} }
// Step 18. Undo move // Step 17. Undo move
pos.undo_move(move); pos.undo_move(move);
assert(value > -VALUE_INFINITE && value < VALUE_INFINITE); assert(value > -VALUE_INFINITE && value < VALUE_INFINITE);
// Step 19. Check for a new best move // Step 18. Check for a new best move
// Finished searching the move. If a stop occurred, the return value of // Finished searching the move. If a stop occurred, the return value of
// the search cannot be trusted, and we return immediately without // the search cannot be trusted, and we return immediately without
// updating best move, PV and TT. // updating best move, PV and TT.
@@ -1378,7 +1367,7 @@ moves_loop: // When in check, search starts from here
return VALUE_DRAW; return VALUE_DRAW;
*/ */
// Step 20. Check for mate and stalemate // Step 19. Check for mate and stalemate
// All legal moves have been searched and if there are no legal moves, it // All legal moves have been searched and if there are no legal moves, it
// must be a mate or a stalemate. If we are in a singular extension search then // must be a mate or a stalemate. If we are in a singular extension search then
// return a fail low score. // return a fail low score.
@@ -1511,7 +1500,7 @@ moves_loop: // When in check, search starts from here
if (PvNode && bestValue > alpha) if (PvNode && bestValue > alpha)
alpha = bestValue; alpha = bestValue;
futilityBase = bestValue + 141; futilityBase = bestValue + 145;
} }
const PieceToHistory* contHist[] = { (ss-1)->continuationHistory, (ss-2)->continuationHistory, const PieceToHistory* contHist[] = { (ss-1)->continuationHistory, (ss-2)->continuationHistory,
@@ -1545,6 +1534,10 @@ moves_loop: // When in check, search starts from here
{ {
assert(type_of(move) != ENPASSANT); // Due to !pos.advanced_pawn_push assert(type_of(move) != ENPASSANT); // Due to !pos.advanced_pawn_push
// moveCount pruning
if (moveCount > 2)
continue;
futilityValue = futilityBase + PieceValue[EG][pos.piece_on(to_sq(move))]; futilityValue = futilityBase + PieceValue[EG][pos.piece_on(to_sq(move))];
if (futilityValue <= alpha) if (futilityValue <= alpha)
@@ -1586,6 +1579,12 @@ moves_loop: // When in check, search starts from here
[pos.moved_piece(move)] [pos.moved_piece(move)]
[to_sq(move)]; [to_sq(move)];
if ( !captureOrPromotion
&& moveCount >= abs(depth) + 1
&& (*contHist[0])[pos.moved_piece(move)][to_sq(move)] < CounterMovePruneThreshold
&& (*contHist[1])[pos.moved_piece(move)][to_sq(move)] < CounterMovePruneThreshold)
continue;
// Make and search the move // Make and search the move
pos.do_move(move, st, givesCheck); pos.do_move(move, st, givesCheck);
value = -qsearch<NT>(pos, ss+1, -beta, -alpha, depth - 1); value = -qsearch<NT>(pos, ss+1, -beta, -alpha, depth - 1);
@@ -1768,7 +1767,7 @@ moves_loop: // When in check, search starts from here
} }
if (depth > 11 && ss->ply < MAX_LPH) if (depth > 11 && ss->ply < MAX_LPH)
thisThread->lowPlyHistory[ss->ply][from_to(move)] << stat_bonus(depth - 6); thisThread->lowPlyHistory[ss->ply][from_to(move)] << stat_bonus(depth - 7);
} }
// When playing with strength handicap, choose best move among a set of RootMoves // When playing with strength handicap, choose best move among a set of RootMoves
+1 -1
View File
@@ -27,7 +27,7 @@
/// The implementation calls pthread_create() with the stack size parameter /// The implementation calls pthread_create() with the stack size parameter
/// equal to the linux 8MB default, on platforms that support it. /// equal to the linux 8MB default, on platforms that support it.
#if defined(__APPLE__) || defined(__MINGW32__) || defined(__MINGW64__) #if defined(__APPLE__) || defined(__MINGW32__) || defined(__MINGW64__) || defined(USE_PTHREADS)
#include <pthread.h> #include <pthread.h>
+9 -9
View File
@@ -38,9 +38,9 @@ void TimeManagement::init(Search::LimitsType& limits, Color us, int ply) {
TimePoint slowMover = TimePoint(Options["Slow Mover"]); TimePoint slowMover = TimePoint(Options["Slow Mover"]);
TimePoint npmsec = TimePoint(Options["nodestime"]); TimePoint npmsec = TimePoint(Options["nodestime"]);
// opt_scale is a percentage of available time to use for the current move. // optScale is a percentage of available time to use for the current move.
// max_scale is a multiplier applied to optimumTime. // maxScale is a multiplier applied to optimumTime.
double opt_scale, max_scale; double optScale, maxScale;
// If we have to play in 'nodes as time' mode, then convert from time // If we have to play in 'nodes as time' mode, then convert from time
// to nodes, and use resulting values in time management formulas. // to nodes, and use resulting values in time management formulas.
@@ -75,22 +75,22 @@ void TimeManagement::init(Search::LimitsType& limits, Color us, int ply) {
// game time for the current move, so also cap to 20% of available game time. // game time for the current move, so also cap to 20% of available game time.
if (limits.movestogo == 0) if (limits.movestogo == 0)
{ {
opt_scale = std::min(0.008 + std::pow(ply + 3.0, 0.5) / 250.0, optScale = std::min(0.008 + std::pow(ply + 3.0, 0.5) / 250.0,
0.2 * limits.time[us] / double(timeLeft)); 0.2 * limits.time[us] / double(timeLeft));
max_scale = std::min(7.0, 4.0 + ply / 12.0); maxScale = std::min(7.0, 4.0 + ply / 12.0);
} }
// x moves in y seconds (+ z increment) // x moves in y seconds (+ z increment)
else else
{ {
opt_scale = std::min((0.8 + ply / 128.0) / mtg, optScale = std::min((0.8 + ply / 128.0) / mtg,
0.8 * limits.time[us] / double(timeLeft)); 0.8 * limits.time[us] / double(timeLeft));
max_scale = std::min(6.3, 1.5 + 0.11 * mtg); maxScale = std::min(6.3, 1.5 + 0.11 * mtg);
} }
// Never use more than 80% of the available time for this move // Never use more than 80% of the available time for this move
optimumTime = TimePoint(opt_scale * timeLeft); optimumTime = TimePoint(optScale * timeLeft);
maximumTime = TimePoint(std::min(0.8 * limits.time[us] - moveOverhead, max_scale * optimumTime)); maximumTime = TimePoint(std::min(0.8 * limits.time[us] - moveOverhead, maxScale * optimumTime));
if (Options["Ponder"]) if (Options["Ponder"])
optimumTime += optimumTime / 4; optimumTime += optimumTime / 4;
+11 -10
View File
@@ -37,18 +37,19 @@ void TTEntry::save(Key k, Value v, bool pv, Bound b, Depth d, Move m, Value ev)
if (m || (uint16_t)k != key16) if (m || (uint16_t)k != key16)
move16 = (uint16_t)m; move16 = (uint16_t)m;
// Overwrite less valuable entries // Overwrite less valuable entries (cheapest checks first)
if ((uint16_t)k != key16 if (b == BOUND_EXACT
|| d - DEPTH_OFFSET > depth8 - 4 || (uint16_t)k != key16
|| b == BOUND_EXACT) || d - DEPTH_OFFSET > depth8 - 4)
{ {
assert(d >= DEPTH_OFFSET); assert(d > DEPTH_OFFSET);
assert(d < 256 + DEPTH_OFFSET);
key16 = (uint16_t)k; key16 = (uint16_t)k;
depth8 = (uint8_t)(d - DEPTH_OFFSET);
genBound8 = (uint8_t)(TT.generation8 | uint8_t(pv) << 2 | b);
value16 = (int16_t)v; value16 = (int16_t)v;
eval16 = (int16_t)ev; eval16 = (int16_t)ev;
genBound8 = (uint8_t)(TT.generation8 | uint8_t(pv) << 2 | b);
depth8 = (uint8_t)(d - DEPTH_OFFSET);
} }
} }
@@ -119,11 +120,11 @@ TTEntry* TranspositionTable::probe(const Key key, bool& found) const {
const uint16_t key16 = (uint16_t)key; // Use the low 16 bits as key inside the cluster const uint16_t key16 = (uint16_t)key; // Use the low 16 bits as key inside the cluster
for (int i = 0; i < ClusterSize; ++i) for (int i = 0; i < ClusterSize; ++i)
if (!tte[i].key16 || tte[i].key16 == key16) if (tte[i].key16 == key16 || !tte[i].depth8)
{ {
tte[i].genBound8 = uint8_t(generation8 | (tte[i].genBound8 & 0x7)); // Refresh tte[i].genBound8 = uint8_t(generation8 | (tte[i].genBound8 & 0x7)); // Refresh
return found = (bool)tte[i].key16, &tte[i]; return found = (bool)tte[i].depth8, &tte[i];
} }
// Find an entry to be replaced according to the replacement strategy // Find an entry to be replaced according to the replacement strategy
@@ -149,7 +150,7 @@ int TranspositionTable::hashfull() const {
int cnt = 0; int cnt = 0;
for (int i = 0; i < 1000; ++i) for (int i = 0; i < 1000; ++i)
for (int j = 0; j < ClusterSize; ++j) for (int j = 0; j < ClusterSize; ++j)
cnt += (table[i].entry[j].genBound8 & 0xF8) == generation8; cnt += table[i].entry[j].depth8 && (table[i].entry[j].genBound8 & 0xF8) == generation8;
return cnt / ClusterSize; return cnt / ClusterSize;
} }
+6 -6
View File
@@ -25,13 +25,13 @@
/// TTEntry struct is the 10 bytes transposition table entry, defined as below: /// TTEntry struct is the 10 bytes transposition table entry, defined as below:
/// ///
/// key 16 bit /// key 16 bit
/// move 16 bit /// depth 8 bit
/// value 16 bit
/// eval value 16 bit
/// generation 5 bit /// generation 5 bit
/// pv node 1 bit /// pv node 1 bit
/// bound type 2 bit /// bound type 2 bit
/// depth 8 bit /// move 16 bit
/// value 16 bit
/// eval value 16 bit
struct TTEntry { struct TTEntry {
@@ -47,11 +47,11 @@ private:
friend class TranspositionTable; friend class TranspositionTable;
uint16_t key16; uint16_t key16;
uint8_t depth8;
uint8_t genBound8;
uint16_t move16; uint16_t move16;
int16_t value16; int16_t value16;
int16_t eval16; int16_t eval16;
uint8_t genBound8;
uint8_t depth8;
}; };
+10 -134
View File
@@ -203,22 +203,6 @@ enum Piece {
PIECE_NB = 16 PIECE_NB = 16
}; };
// An ID used to track the pieces. Max. 32 pieces on board.
enum PieceId {
PIECE_ID_ZERO = 0,
PIECE_ID_KING = 30,
PIECE_ID_WKING = 30,
PIECE_ID_BKING = 31,
PIECE_ID_NONE = 32
};
inline PieceId operator++(PieceId& d, int) {
PieceId x = d;
d = PieceId(int(d) + 1);
return x;
}
constexpr Value PieceValue[PHASE_NB][PIECE_NB] = { constexpr Value PieceValue[PHASE_NB][PIECE_NB] = {
{ VALUE_ZERO, PawnValueMg, KnightValueMg, BishopValueMg, RookValueMg, QueenValueMg, VALUE_ZERO, VALUE_ZERO, { VALUE_ZERO, PawnValueMg, KnightValueMg, BishopValueMg, RookValueMg, QueenValueMg, VALUE_ZERO, VALUE_ZERO,
VALUE_ZERO, PawnValueMg, KnightValueMg, BishopValueMg, RookValueMg, QueenValueMg, VALUE_ZERO, VALUE_ZERO }, VALUE_ZERO, PawnValueMg, KnightValueMg, BishopValueMg, RookValueMg, QueenValueMg, VALUE_ZERO, VALUE_ZERO },
@@ -234,7 +218,8 @@ enum : int {
DEPTH_QS_RECAPTURES = -5, DEPTH_QS_RECAPTURES = -5,
DEPTH_NONE = -6, DEPTH_NONE = -6,
DEPTH_OFFSET = DEPTH_NONE
DEPTH_OFFSET = -7 // value used only for TT entry occupancy check
}; };
enum Square : int { enum Square : int {
@@ -272,118 +257,20 @@ enum Rank : int {
RANK_1, RANK_2, RANK_3, RANK_4, RANK_5, RANK_6, RANK_7, RANK_8, RANK_NB RANK_1, RANK_2, RANK_3, RANK_4, RANK_5, RANK_6, RANK_7, RANK_8, RANK_NB
}; };
// unique number for each piece type on each square // Keep track of what a move changes on the board (used by NNUE)
enum PieceSquare : uint32_t {
PS_NONE = 0,
PS_W_PAWN = 1,
PS_B_PAWN = 1 * SQUARE_NB + 1,
PS_W_KNIGHT = 2 * SQUARE_NB + 1,
PS_B_KNIGHT = 3 * SQUARE_NB + 1,
PS_W_BISHOP = 4 * SQUARE_NB + 1,
PS_B_BISHOP = 5 * SQUARE_NB + 1,
PS_W_ROOK = 6 * SQUARE_NB + 1,
PS_B_ROOK = 7 * SQUARE_NB + 1,
PS_W_QUEEN = 8 * SQUARE_NB + 1,
PS_B_QUEEN = 9 * SQUARE_NB + 1,
PS_W_KING = 10 * SQUARE_NB + 1,
PS_END = PS_W_KING, // pieces without kings (pawns included)
PS_B_KING = 11 * SQUARE_NB + 1,
PS_END2 = 12 * SQUARE_NB + 1,
PS_NOT_INIT = PS_END2 + 1,
};
struct ExtPieceSquare {
PieceSquare from[COLOR_NB];
};
// Array for finding the PieceSquare corresponding to the piece on the board
extern ExtPieceSquare kpp_board_index[PIECE_NB];
constexpr bool is_ok(PieceId pid);
constexpr Square rotate180(Square sq);
class Position;
// Structure holding which tracked piece (PieceId) is where (PieceSquare)
class EvalList {
public:
// Max. number of pieces without kings is 30 but must be a multiple of 4 in AVX2
static const int MAX_LENGTH = 32;
// Array that holds the piece id for the pieces on the board
PieceId piece_id_list[SQUARE_NB];
// List of pieces, separate from White and Black POV
PieceSquare* piece_list_fw() const { return const_cast<PieceSquare*>(pieceListFw); }
PieceSquare* piece_list_fb() const { return const_cast<PieceSquare*>(pieceListFb); }
// Place the piece pc with piece_id on the square sq on the board
void put_piece(PieceId piece_id, Square sq, Piece pc)
{
assert(is_ok(piece_id));
if (pc != NO_PIECE)
{
pieceListFw[piece_id] = PieceSquare(kpp_board_index[pc].from[WHITE] + sq);
pieceListFb[piece_id] = PieceSquare(kpp_board_index[pc].from[BLACK] + rotate180(sq));
piece_id_list[sq] = piece_id;
}
else
{
pieceListFw[piece_id] = PS_NONE;
pieceListFb[piece_id] = PS_NONE;
piece_id_list[sq] = piece_id;
}
}
// Convert the specified piece_id piece to ExtPieceSquare type and return it
ExtPieceSquare piece_with_id(PieceId piece_id) const
{
ExtPieceSquare eps;
eps.from[WHITE] = pieceListFw[piece_id];
eps.from[BLACK] = pieceListFb[piece_id];
return eps;
}
// Initialize the pieceList.
// Set the value of unused pieces to PieceSquare::PS_NONE in case you want to deal with dropped pieces.
// A normal evaluation function can be used as an evaluation function for missing frames.
// piece_no_list is initialized with PieceId::PIECE_ID_NONE to facilitate debugging.
void clear()
{
for (auto& p : pieceListFw)
p = PieceSquare::PS_NONE;
for (auto& p : pieceListFb)
p = PieceSquare::PS_NONE;
for (auto& v : piece_id_list)
v = PieceId::PIECE_ID_NONE;
}
// Check whether the pieceListFw[] held internally is a correct BonaPiece.
// Note: For debugging. slow.
bool is_valid(const Position& pos);
private:
PieceSquare pieceListFw[MAX_LENGTH];
PieceSquare pieceListFb[MAX_LENGTH];
};
// For differential evaluation of pieces that changed since last turn
struct DirtyPiece { struct DirtyPiece {
// Number of changed pieces // Number of changed pieces
int dirty_num; int dirty_num;
// The ids of changed pieces, max. 2 pieces can change in one move // Max 3 pieces can change in one move. A promotion with capture moves
PieceId pieceId[2]; // both the pawn and the captured piece to SQ_NONE and the piece promoted
// to from SQ_NONE to the capture square.
Piece piece[3];
// What changed from the piece with that piece number // From and to squares, which may be SQ_NONE
ExtPieceSquare old_piece[2]; Square from[3];
ExtPieceSquare new_piece[2]; Square to[3];
}; };
/// Score enum stores a middlegame and an endgame value in a single integer (enum). /// Score enum stores a middlegame and an endgame value in a single integer (enum).
@@ -433,8 +320,6 @@ ENABLE_FULL_OPERATORS_ON(Value)
ENABLE_FULL_OPERATORS_ON(Direction) ENABLE_FULL_OPERATORS_ON(Direction)
ENABLE_INCR_OPERATORS_ON(Piece) ENABLE_INCR_OPERATORS_ON(Piece)
ENABLE_INCR_OPERATORS_ON(PieceSquare)
ENABLE_INCR_OPERATORS_ON(PieceId)
ENABLE_INCR_OPERATORS_ON(PieceType) ENABLE_INCR_OPERATORS_ON(PieceType)
ENABLE_INCR_OPERATORS_ON(Square) ENABLE_INCR_OPERATORS_ON(Square)
ENABLE_INCR_OPERATORS_ON(File) ENABLE_INCR_OPERATORS_ON(File)
@@ -523,10 +408,6 @@ inline Color color_of(Piece pc) {
return Color(pc >> 3); return Color(pc >> 3);
} }
constexpr bool is_ok(PieceId pid) {
return pid < PIECE_ID_NONE;
}
constexpr bool is_ok(Square s) { constexpr bool is_ok(Square s) {
return s >= SQ_A1 && s <= SQ_H8; return s >= SQ_A1 && s <= SQ_H8;
} }
@@ -563,11 +444,6 @@ constexpr Square to_sq(Move m) {
return Square(m & 0x3F); return Square(m & 0x3F);
} }
// Return relative square when turning the board 180 degrees
constexpr Square rotate180(Square sq) {
return (Square)(sq ^ 0x3F);
}
constexpr int from_to(Move m) { constexpr int from_to(Move m) {
return m & 0xFFF; return m & 0xFFF;
} }
+1 -1
View File
@@ -260,7 +260,7 @@ double UCI::win_rate_model_double(double v, int ply) {
double b = (((bs[0] * m + bs[1]) * m + bs[2]) * m) + bs[3]; double b = (((bs[0] * m + bs[1]) * m + bs[2]) * m) + bs[3];
// Transform eval to centipawns with limited range // Transform eval to centipawns with limited range
double x = Utility::clamp(double(100 * v) / PawnValueEg, -1000.0, 1000.0); double x = std::clamp(double(100 * v) / PawnValueEg, -1000.0, 1000.0);
// Return win rate in per mille // Return win rate in per mille
return 1000.0 / (1 + std::exp((a - x) / b)); return 1000.0 / (1 + std::exp((a - x) / b));
+4 -2
View File
@@ -79,8 +79,10 @@ void init(OptionsMap& o) {
o["SyzygyProbeDepth"] << Option(1, 1, 100); o["SyzygyProbeDepth"] << Option(1, 1, 100);
o["Syzygy50MoveRule"] << Option(true); o["Syzygy50MoveRule"] << Option(true);
o["SyzygyProbeLimit"] << Option(7, 0, 7); o["SyzygyProbeLimit"] << Option(7, 0, 7);
o["Use NNUE"] << Option(false, on_use_NNUE); o["Use NNUE"] << Option(true, on_use_NNUE);
o["EvalFile"] << Option("nn-9931db908a9b.nnue", on_eval_file); // The default must follow the format nn-[SHA256 first 12 digits].nnue
// for the build process (profile-build and fishtest) to work.
o["EvalFile"] << Option("nn-82215d0fd0df.nnue", on_eval_file);
#ifdef EVAL_NNUE #ifdef EVAL_NNUE
// When the evaluation function is loaded at the ucinewgame timing, it is necessary to convert the new evaluation function. // When the evaluation function is loaded at the ucinewgame timing, it is necessary to convert the new evaluation function.
// I want to hit the test eval convert command, but there is no new evaluation function // I want to hit the test eval convert command, but there is no new evaluation function
+4 -4
View File
@@ -70,7 +70,7 @@ for args in "eval" \
"go depth 10" \ "go depth 10" \
"go movetime 1000" \ "go movetime 1000" \
"go wtime 8000 btime 8000 winc 500 binc 500" \ "go wtime 8000 btime 8000 winc 500 binc 500" \
"bench 128 $threads 10 default depth" "bench 128 $threads 8 default depth"
do do
echo "$prefix $exeprefix ./stockfish $args $postfix" echo "$prefix $exeprefix ./stockfish $args $postfix"
@@ -80,7 +80,7 @@ done
# more general testing, following an uci protocol exchange # more general testing, following an uci protocol exchange
cat << EOF > game.exp cat << EOF > game.exp
set timeout 10 set timeout 240
spawn $exeprefix ./stockfish spawn $exeprefix ./stockfish
send "uci\n" send "uci\n"
@@ -98,7 +98,7 @@ cat << EOF > game.exp
expect "bestmove" expect "bestmove"
send "position fen 5rk1/1K4p1/8/8/3B4/8/8/8 b - - 0 1\n" send "position fen 5rk1/1K4p1/8/8/3B4/8/8/8 b - - 0 1\n"
send "go depth 30\n" send "go depth 20\n"
expect "bestmove" expect "bestmove"
send "quit\n" send "quit\n"
@@ -121,7 +121,7 @@ cat << EOF > syzygy.exp
send "uci\n" send "uci\n"
send "setoption name SyzygyPath value ../tests/syzygy/\n" send "setoption name SyzygyPath value ../tests/syzygy/\n"
expect "info string Found 35 tablebases" {} timeout {exit 1} expect "info string Found 35 tablebases" {} timeout {exit 1}
send "bench 128 1 10 default depth\n" send "bench 128 1 8 default depth\n"
send "quit\n" send "quit\n"
expect eof expect eof