Merge pull request #94 from nodchip/nnue-player-merge-2020-08-28

Nnue player merge 2020 08 28
This commit is contained in:
nodchip
2020-08-30 09:27:12 +09:00
committed by GitHub
47 changed files with 1046 additions and 2276 deletions
+29 -8
View File
@@ -43,26 +43,47 @@ before_script:
- cd src - cd src
script: script:
# Download net
- make net
# Obtain bench reference from git log # Obtain bench reference from git log
- git log HEAD | grep "\b[Bb]ench[ :]\+[0-9]\{7\}" | head -n 1 | sed "s/[^0-9]*\([0-9]*\).*/\1/g" > git_sig - git log HEAD | grep "\b[Bb]ench[ :]\+[0-9]\{7\}" | head -n 1 | sed "s/[^0-9]*\([0-9]*\).*/\1/g" > git_sig
- export benchref=$(cat git_sig) - export benchref=$(cat git_sig)
- echo "Reference bench:" $benchref - echo "Reference bench:" $benchref
#
# Compiler version string # Compiler version string
- $COMPILER -v - $COMPILER -v
# # test help target
- make help
# Verify bench number against various builds # Verify bench number against various builds
- export CXXFLAGS="-Werror -D_GLIBCXX_DEBUG" - export CXXFLAGS="-Werror -D_GLIBCXX_DEBUG"
- make clean && make -j2 ARCH=x86-64 optimize=no debug=yes build && ../tests/signature.sh $benchref - make clean && make -j2 ARCH=x86-64-modern optimize=no debug=yes build && ../tests/signature.sh $benchref
- export CXXFLAGS="-Werror"
- make clean && make -j2 ARCH=x86-64-modern build && ../tests/signature.sh $benchref
- make clean && make -j2 ARCH=x86-64-ssse3 build && ../tests/signature.sh $benchref
- make clean && make -j2 ARCH=x86-64-sse3-popcnt build && ../tests/signature.sh $benchref
- make clean && make -j2 ARCH=x86-64 build && ../tests/signature.sh $benchref
- if [[ "$TRAVIS_OS_NAME" == "linux" ]]; then make clean && make -j2 ARCH=general-64 build && ../tests/signature.sh $benchref; fi
- if [[ "$TRAVIS_OS_NAME" == "linux" ]]; then make clean && make -j2 ARCH=x86-32 optimize=no debug=yes build && ../tests/signature.sh $benchref; fi - if [[ "$TRAVIS_OS_NAME" == "linux" ]]; then make clean && make -j2 ARCH=x86-32 optimize=no debug=yes build && ../tests/signature.sh $benchref; fi
- if [[ "$TRAVIS_OS_NAME" == "linux" ]]; then make clean && make -j2 ARCH=x86-32-sse41-popcnt build && ../tests/signature.sh $benchref; fi
- if [[ "$TRAVIS_OS_NAME" == "linux" ]]; then make clean && make -j2 ARCH=x86-32-sse2 build && ../tests/signature.sh $benchref; fi
- if [[ "$TRAVIS_OS_NAME" == "linux" ]]; then make clean && make -j2 ARCH=x86-32 build && ../tests/signature.sh $benchref; fi - if [[ "$TRAVIS_OS_NAME" == "linux" ]]; then make clean && make -j2 ARCH=x86-32 build && ../tests/signature.sh $benchref; fi
- if [[ "$TRAVIS_OS_NAME" == "linux" ]]; then make clean && make -j2 ARCH=general-32 build && ../tests/signature.sh $benchref; fi
# workaround: exclude a custom version of llvm+clang, which doesn't find llvm-profdata on ubuntu
- if [[ "$TRAVIS_OS_NAME" != "linux" || "$COMP" == "gcc" ]]; then make clean && make -j2 ARCH=x86-64-modern profile-build && ../tests/signature.sh $benchref; fi
# compile only for some more advanced architectures (might not run in travis)
- make clean && make -j2 ARCH=x86-64-avx2 build
- make clean && make -j2 ARCH=x86-64-bmi2 build
- make clean && make -j2 ARCH=x86-64-avx512 build
- make clean && make -j2 ARCH=x86-64-vnni512 build
- make clean && make -j2 ARCH=x86-64-vnni256 build
# #
# Check perft and reproducible search # Check perft and reproducible search
- export CXXFLAGS="-Werror" - make clean && make -j2 ARCH=x86-64-modern build
- make clean && make -j2 ARCH=x86-64 build
- ../tests/perft.sh - ../tests/perft.sh
- ../tests/reprosearch.sh - ../tests/reprosearch.sh
@@ -70,11 +91,11 @@ script:
# Valgrind # Valgrind
# #
- export CXXFLAGS="-O1 -fno-inline" - export CXXFLAGS="-O1 -fno-inline"
- if [ -x "$(command -v valgrind )" ]; then make clean && make -j2 ARCH=x86-64 debug=yes optimize=no build > /dev/null && ../tests/instrumented.sh --valgrind; fi - if [ -x "$(command -v valgrind )" ]; then make clean && make -j2 ARCH=x86-64-modern debug=yes optimize=no build > /dev/null && ../tests/instrumented.sh --valgrind; fi
- if [ -x "$(command -v valgrind )" ]; then ../tests/instrumented.sh --valgrind-thread; fi - if [ -x "$(command -v valgrind )" ]; then ../tests/instrumented.sh --valgrind-thread; fi
# #
# Sanitizer # Sanitizer
# #
- if [[ "$TRAVIS_OS_NAME" == "linux" ]]; then make clean && make -j2 ARCH=x86-64 sanitize=undefined optimize=no debug=yes build > /dev/null && ../tests/instrumented.sh --sanitizer-undefined; fi - if [[ "$TRAVIS_OS_NAME" == "linux" ]]; then make clean && make -j2 ARCH=x86-64-modern sanitize=undefined optimize=no debug=yes build > /dev/null && ../tests/instrumented.sh --sanitizer-undefined; fi
- if [[ "$TRAVIS_OS_NAME" == "linux" ]]; then make clean && make -j2 ARCH=x86-64 sanitize=thread optimize=no debug=yes build > /dev/null && ../tests/instrumented.sh --sanitizer-thread; fi - if [[ "$TRAVIS_OS_NAME" == "linux" ]]; then make clean && make -j2 ARCH=x86-64-modern sanitize=thread optimize=no debug=yes build > /dev/null && ../tests/instrumented.sh --sanitizer-thread; fi
+3
View File
@@ -53,11 +53,13 @@ Ernesto Gatti
Linmiao Xu (linrock) Linmiao Xu (linrock)
Fabian Beuke (madnight) Fabian Beuke (madnight)
Fabian Fichter (ianfab) Fabian Fichter (ianfab)
Fanael Linithien (Fanael)
fanon fanon
Fauzi Akram Dabat (FauziAkram) Fauzi Akram Dabat (FauziAkram)
Felix Wittmann Felix Wittmann
gamander gamander
Gary Heckman (gheckman) Gary Heckman (gheckman)
George Sobala (gsobala)
gguliash gguliash
Gian-Carlo Pascutto (gcp) Gian-Carlo Pascutto (gcp)
Gontran Lemaire (gonlem) Gontran Lemaire (gonlem)
@@ -126,6 +128,7 @@ Niklas Fiekas (niklasf)
Nikolay Kostov (NikolayIT) Nikolay Kostov (NikolayIT)
Nguyen Pham (nguyenpham) Nguyen Pham (nguyenpham)
Norman Schmidt (FireFather) Norman Schmidt (FireFather)
notruck
Ondrej Mosnáček (WOnder93) Ondrej Mosnáček (WOnder93)
Oskar Werkelin Ahlin Oskar Werkelin Ahlin
Pablo Vazquez Pablo Vazquez
+14
View File
@@ -61,6 +61,20 @@ before_build:
build_script: build_script:
- cmake --build . --config %CONFIGURATION% -- /verbosity:minimal - cmake --build . --config %CONFIGURATION% -- /verbosity:minimal
- ps: |
# Download default NNUE net from fishtest
$nnuenet = Get-Content -Path src\ucioption.cpp | Select-String -CaseSensitive -Pattern "Option" | Select-String -CaseSensitive -Pattern "nn-[a-z0-9]{12}.nnue"
$dummy = $nnuenet -match "(?<nnuenet>nn-[a-z0-9]{12}.nnue)"
$nnuenet = $Matches.nnuenet
Write-Host "Default net:" $nnuenet
$nnuedownloadurl = "https://tests.stockfishchess.org/api/nn/$nnuenet"
$nnuefilepath = "src\${env:CONFIGURATION}\$nnuenet"
if (Test-Path -Path $nnuefilepath) {
Write-Host "Already available."
} else {
Write-Host "Downloading $nnuedownloadurl to $nnuefilepath"
Invoke-WebRequest -Uri $nnuedownloadurl -OutFile $nnuefilepath
}
before_test: before_test:
- cd src/%CONFIGURATION% - cd src/%CONFIGURATION%
+297 -170
View File
@@ -40,7 +40,6 @@ PGOGENSFEN = ./$(EXE) gensfen depth 3 loop 100000
SRCS = benchmark.cpp bitbase.cpp bitboard.cpp endgame.cpp evaluate.cpp main.cpp \ SRCS = benchmark.cpp bitbase.cpp bitboard.cpp endgame.cpp evaluate.cpp main.cpp \
material.cpp misc.cpp movegen.cpp movepick.cpp pawns.cpp position.cpp psqt.cpp \ material.cpp misc.cpp movegen.cpp movepick.cpp pawns.cpp position.cpp psqt.cpp \
search.cpp thread.cpp timeman.cpp tt.cpp uci.cpp ucioption.cpp tune.cpp syzygy/tbprobe.cpp \ search.cpp thread.cpp timeman.cpp tt.cpp uci.cpp ucioption.cpp tune.cpp syzygy/tbprobe.cpp \
eval/evaluate_mir_inv_tools.cpp \
nnue/evaluate_nnue.cpp \ nnue/evaluate_nnue.cpp \
nnue/evaluate_nnue_learner.cpp \ nnue/evaluate_nnue_learner.cpp \
nnue/features/half_kp.cpp \ nnue/features/half_kp.cpp \
@@ -82,14 +81,16 @@ endif
# bits = 64/32 --- -DIS_64BIT --- 64-/32-bit operating system # bits = 64/32 --- -DIS_64BIT --- 64-/32-bit operating system
# prefetch = yes/no --- -DUSE_PREFETCH --- Use prefetch asm-instruction # prefetch = yes/no --- -DUSE_PREFETCH --- Use prefetch asm-instruction
# popcnt = yes/no --- -DUSE_POPCNT --- Use popcnt asm-instruction # popcnt = yes/no --- -DUSE_POPCNT --- Use popcnt asm-instruction
# pext = yes/no --- -DUSE_PEXT --- Use pext x86_64 asm-instruction
# sse = yes/no --- -msse --- Use Intel Streaming SIMD Extensions # sse = yes/no --- -msse --- Use Intel Streaming SIMD Extensions
# sse3 = yes/no --- -msse3 --- Use Intel Streaming SIMD Extensions 3 # mmx = yes/no --- -mmmx --- Use Intel MMX instructions
# sse2 = yes/no --- -msse2 --- Use Intel Streaming SIMD Extensions 2
# ssse3 = yes/no --- -mssse3 --- Use Intel Supplemental Streaming SIMD Extensions 3 # ssse3 = yes/no --- -mssse3 --- Use Intel Supplemental Streaming SIMD Extensions 3
# sse41 = yes/no --- -msse4.1 --- Use Intel Streaming SIMD Extensions 4.1 # sse41 = yes/no --- -msse4.1 --- Use Intel Streaming SIMD Extensions 4.1
# sse42 = yes/no --- -msse4.2 --- Use Intel Streaming SIMD Extensions 4.2
# avx2 = yes/no --- -mavx2 --- Use Intel Advanced Vector Extensions 2 # avx2 = yes/no --- -mavx2 --- Use Intel Advanced Vector Extensions 2
# pext = yes/no --- -DUSE_PEXT --- Use pext x86_64 asm-instruction
# avx512 = yes/no --- -mavx512bw --- Use Intel Advanced Vector Extensions 512 # avx512 = yes/no --- -mavx512bw --- Use Intel Advanced Vector Extensions 512
# vnni256 = yes/no --- -mavx512vnni --- Use Intel Vector Neural Network Instructions 256
# vnni512 = yes/no --- -mavx512vnni --- Use Intel Vector Neural Network Instructions 512
# neon = yes/no --- -DUSE_NEON --- Use ARM SIMD architecture # neon = yes/no --- -DUSE_NEON --- Use ARM SIMD architecture
# #
# Note that Makefile is space sensitive, so when adding new architectures # Note that Makefile is space sensitive, so when adding new architectures
@@ -97,152 +98,184 @@ endif
# at the end of the line for flag values. # at the end of the line for flag values.
### 2.1. General and architecture defaults ### 2.1. General and architecture defaults
# explicitly check for the list of supported architectures (as listed with make help),
# the user can override with `make ARCH=x86-32-vnni256 SUPPORTED_ARCH=true`
ifeq ($(ARCH),$(filter $(ARCH),x86-64-vnni512 x86-64-vnni256 x86-64-avx512 x86-64-bmi2 x86-64-avx2 \
x86-64-sse41-popcnt x86-64-modern x86-64-ssse3 x86-64-sse3-popcnt \
x86-64 x86-32-sse41-popcnt x86-32-sse2 x86-32 ppc-64 ppc-32 \
armv7 armv7-neon armv8 apple-silicon general-64 general-32))
SUPPORTED_ARCH=true
else
SUPPORTED_ARCH=false
endif
optimize = yes optimize = yes
debug = no debug = no
sanitize = no sanitize = no
bits = 64 bits = 64
prefetch = no prefetch = no
popcnt = no popcnt = no
pext = no
sse = no sse = no
sse3 = no mmx = no
sse2 = no
ssse3 = no ssse3 = no
sse41 = no sse41 = no
sse42 = no
avx2 = no avx2 = no
pext = no
avx512 = no avx512 = no
vnni256 = no
vnni512 = no
neon = no neon = no
ARCH = x86-64-modern ARCH = x86-64-modern
STRIP = strip
### 2.2 Architecture specific ### 2.2 Architecture specific
ifeq ($(findstring x86,$(ARCH)),x86)
# x86-32/64
ifeq ($(findstring x86-32,$(ARCH)),x86-32)
arch = i386
bits = 32
sse = yes
mmx = yes
else
arch = x86_64
sse = yes
sse2 = yes
endif
ifeq ($(findstring -sse,$(ARCH)),-sse)
sse = yes
endif
ifeq ($(findstring -popcnt,$(ARCH)),-popcnt)
popcnt = yes
endif
ifeq ($(findstring -mmx,$(ARCH)),-mmx)
mmx = yes
endif
ifeq ($(findstring -sse2,$(ARCH)),-sse2)
sse = yes
sse2 = yes
endif
ifeq ($(findstring -ssse3,$(ARCH)),-ssse3)
sse = yes
sse2 = yes
ssse3 = yes
endif
ifeq ($(findstring -sse41,$(ARCH)),-sse41)
sse = yes
sse2 = yes
ssse3 = yes
sse41 = yes
endif
ifeq ($(findstring -modern,$(ARCH)),-modern)
popcnt = yes
sse = yes
sse2 = yes
ssse3 = yes
sse41 = yes
endif
ifeq ($(findstring -avx2,$(ARCH)),-avx2)
popcnt = yes
sse = yes
sse2 = yes
ssse3 = yes
sse41 = yes
avx2 = yes
endif
ifeq ($(findstring -bmi2,$(ARCH)),-bmi2)
popcnt = yes
sse = yes
sse2 = yes
ssse3 = yes
sse41 = yes
avx2 = yes
pext = yes
endif
ifeq ($(findstring -avx512,$(ARCH)),-avx512)
popcnt = yes
sse = yes
sse2 = yes
ssse3 = yes
sse41 = yes
avx2 = yes
pext = yes
avx512 = yes
endif
ifeq ($(findstring -vnni256,$(ARCH)),-vnni256)
popcnt = yes
sse = yes
sse2 = yes
ssse3 = yes
sse41 = yes
avx2 = yes
pext = yes
vnni256 = yes
endif
ifeq ($(findstring -vnni512,$(ARCH)),-vnni512)
popcnt = yes
sse = yes
sse2 = yes
ssse3 = yes
sse41 = yes
avx2 = yes
pext = yes
avx512 = yes
vnni512 = yes
endif
ifeq ($(sse),yes)
prefetch = yes
endif
# 64-bit pext is not available on x86-32
ifeq ($(bits),32)
pext = no
endif
else
# all other architectures
ifeq ($(ARCH),general-32) ifeq ($(ARCH),general-32)
arch = any arch = any
bits = 32 bits = 32
endif endif
ifeq ($(ARCH),x86-32-old)
arch = i386
bits = 32
endif
ifeq ($(ARCH),x86-32)
arch = i386
bits = 32
prefetch = yes
sse = yes
endif
ifeq ($(ARCH),general-64) ifeq ($(ARCH),general-64)
arch = any arch = any
endif endif
ifeq ($(ARCH),x86-64)
arch = x86_64
prefetch = yes
sse = yes
endif
ifeq ($(ARCH),x86-64-sse3)
arch = x86_64
prefetch = yes
sse = yes
sse3 = yes
endif
ifeq ($(ARCH),x86-64-sse3-popcnt)
arch = x86_64
prefetch = yes
sse = yes
sse3 = yes
popcnt = yes
endif
ifeq ($(ARCH),x86-64-ssse3)
arch = x86_64
prefetch = yes
sse = yes
sse3 = yes
ssse3 = yes
endif
ifeq ($(ARCH),x86-64-sse41)
arch = x86_64
prefetch = yes
popcnt = yes
sse = yes
sse3 = yes
ssse3 = yes
sse41 = yes
endif
ifeq ($(ARCH),x86-64-modern)
arch = x86_64
prefetch = yes
popcnt = yes
sse = yes
sse3 = yes
ssse3 = yes
sse41 = yes
endif
ifeq ($(ARCH),x86-64-sse42)
arch = x86_64
prefetch = yes
popcnt = yes
sse = yes
sse3 = yes
ssse3 = yes
sse41 = yes
sse42 = yes
endif
ifeq ($(ARCH),x86-64-avx2)
arch = x86_64
prefetch = yes
popcnt = yes
sse = yes
sse3 = yes
ssse3 = yes
sse41 = yes
sse42 = yes
avx2 = yes
endif
ifeq ($(ARCH),x86-64-bmi2)
arch = x86_64
prefetch = yes
popcnt = yes
sse = yes
sse3 = yes
ssse3 = yes
sse41 = yes
sse42 = yes
avx2 = yes
pext = yes
endif
ifeq ($(ARCH),x86-64-avx512)
arch = x86_64
prefetch = yes
popcnt = yes
sse = yes
sse3 = yes
ssse3 = yes
sse41 = yes
sse42 = yes
avx2 = yes
pext = yes
avx512 = yes
endif
ifeq ($(ARCH),armv7) ifeq ($(ARCH),armv7)
arch = armv7 arch = armv7
prefetch = yes prefetch = yes
bits = 32 bits = 32
endif endif
ifeq ($(ARCH),armv7-neon)
arch = armv7
prefetch = yes
popcnt = yes
neon = yes
bits = 32
endif
ifeq ($(ARCH),armv8) ifeq ($(ARCH),armv8)
arch = armv8-a arch = armv8
prefetch = yes prefetch = yes
popcnt = yes popcnt = yes
neon = yes neon = yes
@@ -266,6 +299,8 @@ ifeq ($(ARCH),ppc-64)
prefetch = yes prefetch = yes
endif endif
endif
### ========================================================================== ### ==========================================================================
### Section 3. Low-level Configuration ### Section 3. Low-level Configuration
### ========================================================================== ### ==========================================================================
@@ -284,7 +319,7 @@ ifeq ($(COMP),gcc)
CXX=g++ CXX=g++
CXXFLAGS += -pedantic -Wextra -Wshadow CXXFLAGS += -pedantic -Wextra -Wshadow
ifeq ($(ARCH),$(filter $(ARCH),armv7 armv8)) ifeq ($(arch),$(filter $(arch),armv7 armv8))
ifeq ($(OS),Android) ifeq ($(OS),Android)
CXXFLAGS += -m$(bits) CXXFLAGS += -m$(bits)
LDFLAGS += -m$(bits) LDFLAGS += -m$(bits)
@@ -294,12 +329,13 @@ ifeq ($(COMP),gcc)
LDFLAGS += -m$(bits) LDFLAGS += -m$(bits)
endif endif
ifeq ($(arch),$(filter $(arch),armv7))
LDFLAGS += -latomic
endif
ifneq ($(KERNEL),Darwin) ifneq ($(KERNEL),Darwin)
LDFLAGS += -Wl,--no-as-needed LDFLAGS += -Wl,--no-as-needed
endif endif
gccversion = $(shell $(CXX) --version)
gccisclang = $(findstring clang,$(gccversion))
endif endif
ifeq ($(COMP),mingw) ifeq ($(COMP),mingw)
@@ -344,7 +380,7 @@ ifeq ($(COMP),clang)
endif endif
endif endif
ifeq ($(ARCH),$(filter $(ARCH),armv7 armv8)) ifeq ($(arch),$(filter $(arch),armv7 armv8))
ifeq ($(OS),Android) ifeq ($(OS),Android)
CXXFLAGS += -m$(bits) CXXFLAGS += -m$(bits)
LDFLAGS += -m$(bits) LDFLAGS += -m$(bits)
@@ -371,6 +407,26 @@ endif
ifeq ($(KERNEL),Darwin) ifeq ($(KERNEL),Darwin)
CXXFLAGS += -arch $(arch) -mmacosx-version-min=10.14 CXXFLAGS += -arch $(arch) -mmacosx-version-min=10.14
LDFLAGS += -arch $(arch) -mmacosx-version-min=10.14 LDFLAGS += -arch $(arch) -mmacosx-version-min=10.14
XCRUN = xcrun
endif
# To cross-compile for Android, NDK version r21 or later is recommended.
# In earlier NDK versions, you'll need to pass -fno-addrsig if using GNU binutils.
# Currently we don't know how to make PGO builds with the NDK yet.
ifeq ($(COMP),ndk)
CXXFLAGS += -stdlib=libc++ -fPIE
ifeq ($(arch),armv7)
comp=armv7a-linux-androideabi16-clang
CXX=armv7a-linux-androideabi16-clang++
CXXFLAGS += -mthumb -march=armv7-a -mfloat-abi=softfp -mfpu=neon
STRIP=arm-linux-androideabi-strip
endif
ifeq ($(arch),armv8)
comp=aarch64-linux-android21-clang
CXX=aarch64-linux-android21-clang++
STRIP=aarch64-linux-android-strip
endif
LDFLAGS += -static-libstdc++ -pie -lm -latomic
endif endif
### Travis CI script uses COMPILER to overwrite CXX ### Travis CI script uses COMPILER to overwrite CXX
@@ -383,16 +439,29 @@ ifdef COMPCXX
CXX=$(COMPCXX) CXX=$(COMPCXX)
endif endif
### Sometimes gcc is really clang
ifeq ($(COMP),gcc)
gccversion = $(shell $(CXX) --version)
gccisclang = $(findstring clang,$(gccversion))
ifneq ($(gccisclang),)
profile_make = clang-profile-make
profile_use = clang-profile-use
endif
endif
### On mingw use Windows threads, otherwise POSIX ### On mingw use Windows threads, otherwise POSIX
ifneq ($(comp),mingw) ifneq ($(comp),mingw)
CXXFLAGS += -DUSE_PTHREADS
# On Android Bionic's C library comes with its own pthread implementation bundled in # On Android Bionic's C library comes with its own pthread implementation bundled in
ifneq ($(OS),Android) ifneq ($(OS),Android)
# Haiku has pthreads in its libroot, so only link it in on other platforms # Haiku has pthreads in its libroot, so only link it in on other platforms
ifneq ($(KERNEL),Haiku) ifneq ($(KERNEL),Haiku)
ifneq ($(COMP),ndk)
LDFLAGS += -lpthread LDFLAGS += -lpthread
endif endif
endif endif
endif endif
endif
### 3.2.1 Debugging ### 3.2.1 Debugging
ifeq ($(debug),no) ifeq ($(debug),no)
@@ -434,7 +503,6 @@ endif
ifeq ($(prefetch),yes) ifeq ($(prefetch),yes)
ifeq ($(sse),yes) ifeq ($(sse),yes)
CXXFLAGS += -msse CXXFLAGS += -msse
DEPENDFLAGS += -msse
endif endif
else else
CXXFLAGS += -DNO_PREFETCH CXXFLAGS += -DNO_PREFETCH
@@ -442,7 +510,7 @@ endif
### 3.6 popcnt ### 3.6 popcnt
ifeq ($(popcnt),yes) ifeq ($(popcnt),yes)
ifeq ($(arch),$(filter $(arch),ppc64 armv8-a arm64)) ifeq ($(arch),$(filter $(arch),ppc64 armv7 armv8 arm64))
CXXFLAGS += -DUSE_POPCNT CXXFLAGS += -DUSE_POPCNT
else ifeq ($(comp),icc) else ifeq ($(comp),icc)
CXXFLAGS += -msse3 -DUSE_POPCNT CXXFLAGS += -msse3 -DUSE_POPCNT
@@ -451,6 +519,7 @@ ifeq ($(popcnt),yes)
endif endif
endif endif
ifeq ($(avx2),yes) ifeq ($(avx2),yes)
CXXFLAGS += -DUSE_AVX2 CXXFLAGS += -DUSE_AVX2
ifeq ($(comp),$(filter $(comp),gcc clang mingw)) ifeq ($(comp),$(filter $(comp),gcc clang mingw))
@@ -461,14 +530,21 @@ endif
ifeq ($(avx512),yes) ifeq ($(avx512),yes)
CXXFLAGS += -DUSE_AVX512 CXXFLAGS += -DUSE_AVX512
ifeq ($(comp),$(filter $(comp),gcc clang mingw)) ifeq ($(comp),$(filter $(comp),gcc clang mingw))
CXXFLAGS += -mavx512bw CXXFLAGS += -mavx512f -mavx512bw
endif endif
endif endif
ifeq ($(sse42),yes) ifeq ($(vnni256),yes)
CXXFLAGS += -DUSE_SSE42 CXXFLAGS += -DUSE_VNNI
ifeq ($(comp),$(filter $(comp),gcc clang mingw)) ifeq ($(comp),$(filter $(comp),gcc clang mingw))
CXXFLAGS += -msse4.2 CXXFLAGS += -mavx512f -mavx512bw -mavx512vnni -mavx512dq -mavx512vl -mprefer-vector-width=256
endif
endif
ifeq ($(vnni512),yes)
CXXFLAGS += -DUSE_VNNI
ifeq ($(comp),$(filter $(comp),gcc clang mingw))
CXXFLAGS += -mavx512vnni -mavx512dq -mavx512vl
endif endif
endif endif
@@ -486,19 +562,29 @@ ifeq ($(ssse3),yes)
endif endif
endif endif
ifeq ($(sse3),yes) ifeq ($(sse2),yes)
CXXFLAGS += -DUSE_SSE3 CXXFLAGS += -DUSE_SSE2
ifeq ($(comp),$(filter $(comp),gcc clang mingw)) ifeq ($(comp),$(filter $(comp),gcc clang mingw))
CXXFLAGS += -msse3 CXXFLAGS += -msse2
endif
endif
ifeq ($(mmx),yes)
CXXFLAGS += -DUSE_MMX
ifeq ($(comp),$(filter $(comp),gcc clang mingw))
CXXFLAGS += -mmmx
endif endif
endif endif
ifeq ($(neon),yes) ifeq ($(neon),yes)
CXXFLAGS += -DUSE_NEON CXXFLAGS += -DUSE_NEON
ifeq ($(KERNEL),Linux)
ifneq ($(COMP),ndk)
ifneq ($(arch),armv8)
CXXFLAGS += -mfpu=neon
endif
endif
endif endif
ifeq ($(arch),x86_64)
CXXFLAGS += -DUSE_SSE2
endif endif
### 3.7 pext ### 3.7 pext
@@ -514,7 +600,10 @@ endif
### needs access to the optimization flags. ### needs access to the optimization flags.
ifeq ($(optimize),yes) ifeq ($(optimize),yes)
ifeq ($(debug), no) ifeq ($(debug), no)
ifeq ($(comp),clang) ifeq ($(COMP),ndk)
CXXFLAGS += -flto=thin
LDFLAGS += $(CXXFLAGS)
else ifeq ($(comp),clang)
CXXFLAGS += -flto=thin CXXFLAGS += -flto=thin
LDFLAGS += $(CXXFLAGS) LDFLAGS += $(CXXFLAGS)
@@ -524,13 +613,18 @@ ifeq ($(debug), no)
ifeq ($(gccisclang),) ifeq ($(gccisclang),)
CXXFLAGS += -flto CXXFLAGS += -flto
LDFLAGS += $(CXXFLAGS) -flto=jobserver LDFLAGS += $(CXXFLAGS) -flto=jobserver
ifneq ($(findstring MINGW,$(KERNEL)),)
LDFLAGS += -save-temps
else ifneq ($(findstring MSYS,$(KERNEL)),)
LDFLAGS += -save-temps
endif
else else
CXXFLAGS += -flto=thin CXXFLAGS += -flto=thin
LDFLAGS += $(CXXFLAGS) LDFLAGS += $(CXXFLAGS)
endif endif
# To use LTO and static linking on windows, the tool chain requires a recent gcc: # To use LTO and static linking on windows, the tool chain requires a recent gcc:
# gcc version 10.1 in msys2 or TDM-GCC version 9.2 are know to work, older might not. # gcc version 10.1 in msys2 or TDM-GCC version 9.2 are known to work, older might not.
# So, only enable it for a cross from Linux by default. # So, only enable it for a cross from Linux by default.
else ifeq ($(comp),mingw) else ifeq ($(comp),mingw)
ifeq ($(KERNEL),Linux) ifeq ($(KERNEL),Linux)
@@ -552,6 +646,7 @@ endif
### Section 4. Public Targets ### Section 4. Public Targets
### ========================================================================== ### ==========================================================================
help: help:
@echo "" @echo ""
@echo "To compile stockfish, type: " @echo "To compile stockfish, type: "
@@ -560,31 +655,34 @@ help:
@echo "" @echo ""
@echo "Supported targets:" @echo "Supported targets:"
@echo "" @echo ""
@echo "help > Display architecture details"
@echo "build > Standard build" @echo "build > Standard build"
@echo "profile-build > Standard build with PGO" @echo "net > Download the default nnue net"
@echo "profile-build > Faster build (with profile-guided optimization)"
@echo "strip > Strip executable" @echo "strip > Strip executable"
@echo "install > Install executable" @echo "install > Install executable"
@echo "clean > Clean up" @echo "clean > Clean up"
@echo "net > Download the default nnue net"
@echo "" @echo ""
@echo "Supported archs:" @echo "Supported archs:"
@echo "" @echo ""
@echo "x86-64-vnni512 > x86 64-bit with vnni support 512bit wide"
@echo "x86-64-vnni256 > x86 64-bit with vnni support 256bit wide"
@echo "x86-64-avx512 > x86 64-bit with avx512 support" @echo "x86-64-avx512 > x86 64-bit with avx512 support"
@echo "x86-64-bmi2 > x86 64-bit with bmi2 support" @echo "x86-64-bmi2 > x86 64-bit with bmi2 support"
@echo "x86-64-avx2 > x86 64-bit with avx2 support" @echo "x86-64-avx2 > x86 64-bit with avx2 support"
@echo "x86-64-sse42 > x86 64-bit with sse42 support" @echo "x86-64-sse41-popcnt > x86 64-bit with sse41 and popcnt support"
@echo "x86-64-modern > x86 64-bit with sse41 support (x86-64-sse41)" @echo "x86-64-modern > common modern CPU, currently x86-64-sse41-popcnt"
@echo "x86-64-sse41 > x86 64-bit with sse41 support"
@echo "x86-64-ssse3 > x86 64-bit with ssse3 support" @echo "x86-64-ssse3 > x86 64-bit with ssse3 support"
@echo "x86-64-sse3-popcnt > x86 64-bit with sse3 and popcnt support" @echo "x86-64-sse3-popcnt > x86 64-bit with sse3 and popcnt support"
@echo "x86-64-sse3 > x86 64-bit with sse3 support" @echo "x86-64 > x86 64-bit generic (with sse2 support)"
@echo "x86-64 > x86 64-bit generic" @echo "x86-32-sse41-popcnt > x86 32-bit with sse41 and popcnt support"
@echo "x86-32 > x86 32-bit (also enables SSE)" @echo "x86-32-sse2 > x86 32-bit with sse2 support"
@echo "x86-32-old > x86 32-bit fall back for old hardware" @echo "x86-32 > x86 32-bit generic (with mmx and sse support)"
@echo "ppc-64 > PPC 64-bit" @echo "ppc-64 > PPC 64-bit"
@echo "ppc-32 > PPC 32-bit" @echo "ppc-32 > PPC 32-bit"
@echo "armv7 > ARMv7 32-bit" @echo "armv7 > ARMv7 32-bit"
@echo "armv8 > ARMv8 64-bit" @echo "armv7-neon > ARMv7 32-bit with popcnt and neon"
@echo "armv8 > ARMv8 64-bit with popcnt and neon"
@echo "apple-silicon > Apple silicon ARM64" @echo "apple-silicon > Apple silicon ARM64"
@echo "general-64 > unspecified 64-bit" @echo "general-64 > unspecified 64-bit"
@echo "general-32 > unspecified 32-bit" @echo "general-32 > unspecified 32-bit"
@@ -595,20 +693,26 @@ help:
@echo "mingw > Gnu compiler with MinGW under Windows" @echo "mingw > Gnu compiler with MinGW under Windows"
@echo "clang > LLVM Clang compiler" @echo "clang > LLVM Clang compiler"
@echo "icc > Intel compiler" @echo "icc > Intel compiler"
@echo "ndk > Google NDK to cross-compile for Android"
@echo "" @echo ""
@echo "Simple examples. If you don't know what to do, you likely want to run: " @echo "Simple examples. If you don't know what to do, you likely want to run: "
@echo "" @echo ""
@echo "make -j build ARCH=x86-64 (This is for 64-bit systems)" @echo "make -j build ARCH=x86-64 (A portable, slow compile for 64-bit systems)"
@echo "make -j build ARCH=x86-32 (This is for 32-bit systems)" @echo "make -j build ARCH=x86-32 (A portable, slow compile for 32-bit systems)"
@echo "" @echo ""
@echo "Advanced examples, for experienced users: " @echo "Advanced examples, for experienced users looking for performance: "
@echo "" @echo ""
@echo "make -j build ARCH=x86-64-modern COMP=clang" @echo "make help ARCH=x86-64-bmi2"
@echo "make -j profile-build ARCH=x86-64-bmi2 COMP=gcc COMPCXX=g++-4.8" @echo "make -j profile-build ARCH=x86-64-bmi2 COMP=gcc COMPCXX=g++-9.0"
@echo "" @echo "make -j build ARCH=x86-64-ssse3 COMP=clang"
@echo "The selected architecture $(ARCH) enables the following configuration: "
@echo "" @echo ""
@echo "-------------------------------"
ifeq ($(SUPPORTED_ARCH), true)
@echo "The selected architecture $(ARCH) will enable the following configuration: "
@$(MAKE) ARCH=$(ARCH) COMP=$(COMP) config-sanity @$(MAKE) ARCH=$(ARCH) COMP=$(COMP) config-sanity
else
@echo "Specify a supported architecture with the ARCH option for more details"
endif
.PHONY: help build profile-build strip install clean net objclean profileclean \ .PHONY: help build profile-build strip install clean net objclean profileclean \
@@ -618,7 +722,7 @@ help:
build: config-sanity build: config-sanity
$(MAKE) ARCH=$(ARCH) COMP=$(COMP) all $(MAKE) ARCH=$(ARCH) COMP=$(COMP) all
profile-build: config-sanity objclean profileclean profile-build: net config-sanity objclean profileclean
@echo "" @echo ""
@echo "Step 1/4. Building instrumented executable ..." @echo "Step 1/4. Building instrumented executable ..."
$(MAKE) ARCH=$(ARCH) COMP=$(COMP) $(profile_make) $(MAKE) ARCH=$(ARCH) COMP=$(COMP) $(profile_make)
@@ -634,7 +738,7 @@ profile-build: config-sanity objclean profileclean
$(MAKE) ARCH=$(ARCH) COMP=$(COMP) profileclean $(MAKE) ARCH=$(ARCH) COMP=$(COMP) profileclean
strip: strip:
strip $(EXE) $(STRIP) $(EXE)
install: install:
-mkdir -p -m 755 $(BINDIR) -mkdir -p -m 755 $(BINDIR)
@@ -649,17 +753,34 @@ net:
$(eval nnuenet := $(shell grep EvalFile ucioption.cpp | grep Option | sed 's/.*\(nn-[a-z0-9]\{12\}.nnue\).*/\1/')) $(eval nnuenet := $(shell grep EvalFile ucioption.cpp | grep Option | sed 's/.*\(nn-[a-z0-9]\{12\}.nnue\).*/\1/'))
@echo "Default net: $(nnuenet)" @echo "Default net: $(nnuenet)"
$(eval nnuedownloadurl := https://tests.stockfishchess.org/api/nn/$(nnuenet)) $(eval nnuedownloadurl := https://tests.stockfishchess.org/api/nn/$(nnuenet))
$(eval curl_or_wget := $(shell if hash curl 2>/dev/null; then echo "curl -sL"; elif hash wget 2>/dev/null; then echo "wget -qO-"; fi)) $(eval curl_or_wget := $(shell if hash curl 2>/dev/null; then echo "curl -skL"; elif hash wget 2>/dev/null; then echo "wget -qO-"; fi))
@if test -f "$(nnuenet)"; then echo "Already available."; else echo "Downloading $(nnuedownloadurl)"; $(curl_or_wget) $(nnuedownloadurl) > $(nnuenet); fi @if test -f "$(nnuenet)"; then \
echo "Already available."; \
else \
if [ "x$(curl_or_wget)" = "x" ]; then \
echo "Automatic download failed: neither curl nor wget is installed. Install one of these tools or download the net manually"; exit 1; \
else \
echo "Downloading $(nnuedownloadurl)"; $(curl_or_wget) $(nnuedownloadurl) > $(nnuenet);\
fi; \
fi;
$(eval shasum_command := $(shell if hash shasum 2>/dev/null; then echo "shasum -a 256 "; elif hash sha256sum 2>/dev/null; then echo "sha256sum "; fi))
@if [ "x$(shasum_command)" != "x" ]; then \
if [ "$(nnuenet)" != "nn-"`$(shasum_command) $(nnuenet) | cut -c1-12`".nnue" ]; then \
echo "Failed download or $(nnuenet) corrupted, please delete!"; exit 1; \
fi \
else \
echo "shasum / sha256sum not found, skipping net validation"; \
fi
# clean binaries and objects # clean binaries and objects
objclean: objclean:
@rm -f $(EXE) *.o ./syzygy/*.o ./learn/*.o ./extra/*.o ./eval/*.o ./nnue/*.o ./nnue/features/*.o @rm -f $(EXE) *.o ./syzygy/*.o ./nnue/*.o ./nnue/features/*.o ./learn/*.o ./extra/*.o ./eval/*.o
# clean auxiliary profiling files # clean auxiliary profiling files
profileclean: profileclean:
@rm -rf profdir @rm -rf profdir
@rm -f bench.txt *.gcda *.gcno ./syzygy/*.gcda ./learn/*.gcda ./extra/*.gcda ./eval/*.gcda ./nnue/*.gcda ./nnue/features/*.gcda @rm -f bench.txt *.gcda *.gcno ./syzygy/*.gcda ./nnue/*.gcda ./nnue/features/*.gcda *.s ./learn/*.gcda ./extra/*.gcda ./eval/*.gcda
@rm -f stockfish.profdata *.profraw @rm -f stockfish.profdata *.profraw
default: default:
@@ -683,14 +804,16 @@ config-sanity:
@echo "os: '$(OS)'" @echo "os: '$(OS)'"
@echo "prefetch: '$(prefetch)'" @echo "prefetch: '$(prefetch)'"
@echo "popcnt: '$(popcnt)'" @echo "popcnt: '$(popcnt)'"
@echo "pext: '$(pext)'"
@echo "sse: '$(sse)'" @echo "sse: '$(sse)'"
@echo "sse3: '$(sse3)'" @echo "mmx: '$(mmx)'"
@echo "sse2: '$(sse2)'"
@echo "ssse3: '$(ssse3)'" @echo "ssse3: '$(ssse3)'"
@echo "sse41: '$(sse41)'" @echo "sse41: '$(sse41)'"
@echo "sse42: '$(sse42)'"
@echo "avx2: '$(avx2)'" @echo "avx2: '$(avx2)'"
@echo "pext: '$(pext)'"
@echo "avx512: '$(avx512)'" @echo "avx512: '$(avx512)'"
@echo "vnni256: '$(vnni256)'"
@echo "vnni512: '$(vnni512)'"
@echo "neon: '$(neon)'" @echo "neon: '$(neon)'"
@echo "" @echo ""
@echo "Flags:" @echo "Flags:"
@@ -703,22 +826,26 @@ config-sanity:
@test "$(debug)" = "yes" || test "$(debug)" = "no" @test "$(debug)" = "yes" || test "$(debug)" = "no"
@test "$(sanitize)" = "undefined" || test "$(sanitize)" = "thread" || test "$(sanitize)" = "address" || test "$(sanitize)" = "no" @test "$(sanitize)" = "undefined" || test "$(sanitize)" = "thread" || test "$(sanitize)" = "address" || test "$(sanitize)" = "no"
@test "$(optimize)" = "yes" || test "$(optimize)" = "no" @test "$(optimize)" = "yes" || test "$(optimize)" = "no"
@test "$(SUPPORTED_ARCH)" = "true"
@test "$(arch)" = "any" || test "$(arch)" = "x86_64" || test "$(arch)" = "i386" || \ @test "$(arch)" = "any" || test "$(arch)" = "x86_64" || test "$(arch)" = "i386" || \
test "$(arch)" = "ppc64" || test "$(arch)" = "ppc" || \ test "$(arch)" = "ppc64" || test "$(arch)" = "ppc" || \
test "$(arch)" = "armv7" || test "$(arch)" = "armv8-a" || test "$(arch)" = "arm64" test "$(arch)" = "armv7" || test "$(arch)" = "armv8" || test "$(arch)" = "arm64"
@test "$(bits)" = "32" || test "$(bits)" = "64" @test "$(bits)" = "32" || test "$(bits)" = "64"
@test "$(prefetch)" = "yes" || test "$(prefetch)" = "no" @test "$(prefetch)" = "yes" || test "$(prefetch)" = "no"
@test "$(popcnt)" = "yes" || test "$(popcnt)" = "no" @test "$(popcnt)" = "yes" || test "$(popcnt)" = "no"
@test "$(pext)" = "yes" || test "$(pext)" = "no"
@test "$(sse)" = "yes" || test "$(sse)" = "no" @test "$(sse)" = "yes" || test "$(sse)" = "no"
@test "$(sse3)" = "yes" || test "$(sse3)" = "no" @test "$(mmx)" = "yes" || test "$(mmx)" = "no"
@test "$(sse2)" = "yes" || test "$(sse2)" = "no"
@test "$(ssse3)" = "yes" || test "$(ssse3)" = "no" @test "$(ssse3)" = "yes" || test "$(ssse3)" = "no"
@test "$(sse41)" = "yes" || test "$(sse41)" = "no" @test "$(sse41)" = "yes" || test "$(sse41)" = "no"
@test "$(sse42)" = "yes" || test "$(sse42)" = "no"
@test "$(avx2)" = "yes" || test "$(avx2)" = "no" @test "$(avx2)" = "yes" || test "$(avx2)" = "no"
@test "$(pext)" = "yes" || test "$(pext)" = "no"
@test "$(avx512)" = "yes" || test "$(avx512)" = "no" @test "$(avx512)" = "yes" || test "$(avx512)" = "no"
@test "$(vnni256)" = "yes" || test "$(vnni256)" = "no"
@test "$(vnni512)" = "yes" || test "$(vnni512)" = "no"
@test "$(neon)" = "yes" || test "$(neon)" = "no" @test "$(neon)" = "yes" || test "$(neon)" = "no"
@test "$(comp)" = "gcc" || test "$(comp)" = "icc" || test "$(comp)" = "mingw" || test "$(comp)" = "clang" @test "$(comp)" = "gcc" || test "$(comp)" = "icc" || test "$(comp)" = "mingw" || test "$(comp)" = "clang" \
|| test "$(comp)" = "armv7a-linux-androideabi16-clang" || test "$(comp)" = "aarch64-linux-android21-clang"
$(EXE): $(OBJS) $(EXE): $(OBJS)
+$(CXX) -o $@ $(OBJS) $(LDFLAGS) +$(CXX) -o $@ $(OBJS) $(LDFLAGS)
@@ -730,7 +857,7 @@ clang-profile-make:
all all
clang-profile-use: clang-profile-use:
llvm-profdata merge -output=stockfish.profdata *.profraw $(XCRUN) llvm-profdata merge -output=stockfish.profdata *.profraw
$(MAKE) ARCH=$(ARCH) COMP=$(COMP) \ $(MAKE) ARCH=$(ARCH) COMP=$(COMP) \
EXTRACXXFLAGS='-fprofile-instr-use=stockfish.profdata' \ EXTRACXXFLAGS='-fprofile-instr-use=stockfish.profdata' \
EXTRALDFLAGS='-fprofile-use ' \ EXTRALDFLAGS='-fprofile-use ' \
+11 -2
View File
@@ -95,8 +95,9 @@ const vector<string> Defaults = {
/// setup_bench() builds a list of UCI commands to be run by bench. There /// setup_bench() builds a list of UCI commands to be run by bench. There
/// are five parameters: TT size in MB, number of search threads that /// are five parameters: TT size in MB, number of search threads that
/// should be used, the limit value spent for each position, a file name /// should be used, the limit value spent for each position, a file name
/// where to look for positions in FEN format and the type of the limit: /// where to look for positions in FEN format, the type of the limit:
/// depth, perft, nodes and movetime (in millisecs). /// depth, perft, nodes and movetime (in millisecs), and evaluation type
/// mixed (default), classical, NNUE.
/// ///
/// bench -> search default positions up to depth 13 /// bench -> search default positions up to depth 13
/// bench 64 1 15 -> search default positions up to depth 15 (TT = 64MB) /// bench 64 1 15 -> search default positions up to depth 15 (TT = 64MB)
@@ -115,6 +116,7 @@ vector<string> setup_bench(const Position& current, istream& is) {
string limit = (is >> token) ? token : "13"; string limit = (is >> token) ? token : "13";
string fenFile = (is >> token) ? token : "default"; string fenFile = (is >> token) ? token : "default";
string limitType = (is >> token) ? token : "depth"; string limitType = (is >> token) ? token : "depth";
string evalType = (is >> token) ? token : "mixed";
go = limitType == "eval" ? "eval" : "go " + limitType + " " + limit; go = limitType == "eval" ? "eval" : "go " + limitType + " " + limit;
@@ -146,13 +148,20 @@ vector<string> setup_bench(const Position& current, istream& is) {
list.emplace_back("setoption name Hash value " + ttSize); list.emplace_back("setoption name Hash value " + ttSize);
list.emplace_back("ucinewgame"); list.emplace_back("ucinewgame");
size_t posCounter = 0;
for (const string& fen : fens) for (const string& fen : fens)
if (fen.find("setoption") != string::npos) if (fen.find("setoption") != string::npos)
list.emplace_back(fen); list.emplace_back(fen);
else else
{ {
if (evalType == "classical" || (evalType == "mixed" && posCounter % 2 == 0))
list.emplace_back("setoption name Use NNUE value false");
else if (evalType == "NNUE" || (evalType == "mixed" && posCounter % 2 != 0))
list.emplace_back("setoption name Use NNUE value true");
list.emplace_back("position fen " + fen); list.emplace_back("position fen " + fen);
list.emplace_back(go); list.emplace_back(go);
++posCounter;
} }
return list; return list;
+10
View File
@@ -39,6 +39,16 @@ namespace {
Bitboard BishopTable[0x1480]; // To store bishop attacks Bitboard BishopTable[0x1480]; // To store bishop attacks
void init_magics(PieceType pt, Bitboard table[], Magic magics[]); void init_magics(PieceType pt, Bitboard table[], Magic magics[]);
}
/// safe_destination() returns the bitboard of target square for the given step
/// from the given square. If the step is off the board, returns empty bitboard.
inline Bitboard safe_destination(Square s, int step) {
Square to = Square(s + step);
return is_ok(to) && distance(s, to) <= 2 ? square_bb(to) : Bitboard(0);
} }
-10
View File
@@ -279,16 +279,6 @@ inline int edge_distance(File f) { return std::min(f, File(FILE_H - f)); }
inline int edge_distance(Rank r) { return std::min(r, Rank(RANK_8 - r)); } inline int edge_distance(Rank r) { return std::min(r, Rank(RANK_8 - r)); }
/// safe_destination() returns the bitboard of target square for the given step
/// from the given square. If the step is off the board, returns empty bitboard.
inline Bitboard safe_destination(Square s, int step)
{
Square to = Square(s + step);
return is_ok(to) && distance(s, to) <= 2 ? square_bb(to) : Bitboard(0);
}
/// attacks_bb(Square) returns the pseudo attacks of the give piece type /// attacks_bb(Square) returns the pseudo attacks of the give piece type
/// assuming an empty board. /// assuming an empty board.
-190
View File
@@ -1,190 +0,0 @@
#if defined(EVAL_NNUE) || defined(EVAL_LEARN)
#include "evaluate_mir_inv_tools.h"
namespace Eval
{
// --- tables
// Value when a certain PieceSquare is seen from the other side
// BONA_PIECE_INIT is -1, so it must be a signed type.
// Even if KPPT is expanded, PieceSquare will not exceed 2^15 for the time being, so int16_t is good.
int16_t inv_piece_[PieceSquare::PS_END];
// Returns the one at the position where a PieceSquare on the board is mirrored.
int16_t mir_piece_[PieceSquare::PS_END];
// --- methods
// Returns the value when a certain PieceSquare is seen from the other side
PieceSquare inv_piece(PieceSquare p) { return (PieceSquare)inv_piece_[p]; }
// Returns the one at the position where a PieceSquare on the board is mirrored.
PieceSquare mir_piece(PieceSquare p) { return (PieceSquare)mir_piece_[p]; }
std::function<void()> mir_piece_init_function;
void init_mir_inv_tables()
{
// Initialize the mirror and inverse tables.
// Initialization is limited to once.
static bool first = true;
if (!first) return;
first = false;
// exchange f and e
int t[] = {
PieceSquare::PS_W_PAWN , PieceSquare::PS_B_PAWN ,
PieceSquare::PS_W_KNIGHT , PieceSquare::PS_B_KNIGHT ,
PieceSquare::PS_W_BISHOP , PieceSquare::PS_B_BISHOP ,
PieceSquare::PS_W_ROOK , PieceSquare::PS_B_ROOK ,
PieceSquare::PS_W_QUEEN , PieceSquare::PS_B_QUEEN ,
};
// Insert uninitialized value.
for (PieceSquare p = PieceSquare::PS_NONE; p < PieceSquare::PS_END; ++p)
{
inv_piece_[p] = PieceSquare::PS_NOT_INIT;
// mirror does not work for hand pieces. Just return the original value.
mir_piece_[p] = (p < PieceSquare::PS_W_PAWN) ? p : PieceSquare::PS_NOT_INIT;
}
for (PieceSquare p = PieceSquare::PS_NONE; p < PieceSquare::PS_END; ++p)
{
for (int i = 0; i < 32 /* t.size() */; i += 2)
{
if (t[i] <= p && p < t[i + 1])
{
Square sq = (Square)(p - t[i]);
// found!!
PieceSquare q = (p < PieceSquare::PS_W_PAWN) ? PieceSquare(sq + t[i + 1]) : (PieceSquare)(rotate180(sq) + t[i + 1]);
inv_piece_[p] = q;
inv_piece_[q] = p;
/*
It's a bit tricky, but regarding p
p >= PieceSquare::PS_W_PAWN
When.
For this p, let n be an integer (i in the above code can only be an even number),
a) When t[2n + 0] <= p <t[2n + 1], the first piece
b) When t[2n + 1] <= p <t[2n + 2], the back piece
Is.
Therefore, if p in the range of a) is set to q = rotate180(p-t[2n+0]) + t[2n+1], it becomes the back piece in the box rotated 180 degrees.
So inv_piece[] is initialized by swapping p and q.
*/
// There is no mirror for hand pieces.
if (p < PieceSquare::PS_W_PAWN)
continue;
PieceSquare r1 = (PieceSquare)(flip_file(sq) + t[i]);
mir_piece_[p] = r1;
mir_piece_[r1] = p;
PieceSquare p2 = (PieceSquare)(sq + t[i + 1]);
PieceSquare r2 = (PieceSquare)(flip_file(sq) + t[i + 1]);
mir_piece_[p2] = r2;
mir_piece_[r2] = p2;
break;
}
}
}
if (mir_piece_init_function)
mir_piece_init_function();
for (PieceSquare p = PieceSquare::PS_NONE; p < PieceSquare::PS_END; ++p)
{
// It remains uninitialized. The initialization code in the table above is incorrect.
assert(mir_piece_[p] != PieceSquare::PS_NOT_INIT && mir_piece_[p] < PieceSquare::PS_END);
assert(inv_piece_[p] != PieceSquare::PS_NOT_INIT && inv_piece_[p] < PieceSquare::PS_END);
// mir and inv return to their original coordinates after being applied twice.
assert(mir_piece_[mir_piece_[p]] == p);
assert(inv_piece_[inv_piece_[p]] == p);
// mir->inv->mir->inv must be the original location.
assert(p == inv_piece(mir_piece(inv_piece(mir_piece(p)))));
// inv->mir->inv->mir must be the original location.
assert(p == mir_piece(inv_piece(mir_piece(inv_piece(p)))));
}
#if 0
// Pre-verification that it is okay to mirror the evaluation function
// When writing a value, there is an assertion, so if you can't mirror it,
// Should get caught in the assert.
// Apery's WCSC26 evaluation function, kpp p1==0 or p1==20 (0th step on the back)
// There is dust in it, and if you don't avoid it, it will get caught in the assert.
std::unordered_set<PieceSquare> s;
vector<int> a = {
f_hand_pawn - 1,e_hand_pawn - 1,
f_hand_lance - 1, e_hand_lance - 1,
f_hand_knight - 1, e_hand_knight - 1,
f_hand_silver - 1, e_hand_silver - 1,
f_hand_gold - 1, e_hand_gold - 1,
f_hand_bishop - 1, e_hand_bishop - 1,
f_hand_rook - 1, e_hand_rook - 1,
};
for (auto b : a)
s.insert((PieceSquare)b);
// Excludes walks, incense, and katsura on the board that do not appear further (Apery also contains garbage here)
for (Rank r = RANK_1; r <= RANK_2; ++r)
for (File f = FILE_1; f <= FILE_9; ++f)
{
if (r == RANK_1)
{
// first step
PieceSquare b1 = PieceSquare(PieceSquare::PS_W_PAWN + (f | r));
s.insert(b1);
s.insert(inv_piece[b1]);
// 1st stage incense
PieceSquare b2 = PieceSquare(f_lance + (f | r));
s.insert(b2);
s.insert(inv_piece[b2]);
}
// Katsura on the 1st and 2nd steps
PieceSquare b = PieceSquare(PieceSquare::PS_W_KNIGHT + (f | r));
s.insert(b);
s.insert(inv_piece[b]);
}
cout << "\nchecking kpp_write()..";
for (auto sq : SQ)
{
cout << sq << ' ';
for (PieceSquare p1 = PieceSquare::PS_NONE; p1 < PieceSquare::PS_END; ++p1)
for (PieceSquare p2 = PieceSquare::PS_NONE; p2 < PieceSquare::PS_END; ++p2)
if (!s.count(p1) && !s.count(p2))
kpp_write(sq, p1, p2, kpp[sq][p1][p2]);
}
cout << "\nchecking kkp_write()..";
for (auto sq1 : SQ)
{
cout << sq1 << ' ';
for (auto sq2 : SQ)
for (PieceSquare p1 = PieceSquare::PS_NONE; p1 < PieceSquare::PS_END; ++p1)
if (!s.count(p1))
kkp_write(sq1, sq2, p1, kkp[sq1][sq2][p1]);
}
cout << "..done!" << endl;
#endif
}
}
#endif // defined(EVAL_NNUE) || defined(EVAL_LEARN)
-47
View File
@@ -1,47 +0,0 @@
#ifndef _EVALUATE_MIR_INV_TOOLS_
#define _EVALUATE_MIR_INV_TOOLS_
#if defined(EVAL_NNUE) || defined(EVAL_LEARN)
// PieceSquare's mirror (horizontal flip) and inverse (180° on the board) tools to get pieces.
#include "../types.h"
#include "../evaluate.h"
#include <functional>
namespace Eval
{
// -------------------------------------------------
// tables
// -------------------------------------------------
// --- Provide Mirror and Inverse to PieceSquare.
// These arrays are initialized by calling init() or init_mir_inv_tables();.
// If you want to use only this table from the evaluation function,
// Call init_mir_inv_tables().
// These arrays are referenced from the KK/KKP/KPP classes below.
// Returns the value when a certain PieceSquare is seen from the other side
extern PieceSquare inv_piece(PieceSquare p);
// Returns the one at the position where a PieceSquare on the board is mirrored.
extern PieceSquare mir_piece(PieceSquare p);
// callback called when initializing mir_piece/inv_piece
// Used when extending fe_end on the user side.
// Inv_piece_ and inv_piece_ are exposed because they are necessary for this initialization.
// At the timing when mir_piece_init_function is called, until fe_old_end
// It is guaranteed that these tables have been initialized.
extern std::function<void()> mir_piece_init_function;
extern int16_t mir_piece_[PieceSquare::PS_END];
extern int16_t inv_piece_[PieceSquare::PS_END];
// The table above will be initialized when you call this function explicitly or call init().
extern void init_mir_inv_tables();
}
#endif // defined(EVAL_NNUE) || defined(EVAL_LEARN)
#endif
+38 -127
View File
@@ -61,10 +61,11 @@ namespace Eval {
UCI::OptionsMap defaults; UCI::OptionsMap defaults;
UCI::init(defaults); UCI::init(defaults);
std::cerr << "NNUE evaluation used, but the network file " << eval_file << " was not loaded successfully. " sync_cout << "info string ERROR: NNUE evaluation used, but the network file " << eval_file << " was not loaded successfully." << sync_endl;
<< "These network evaluation parameters must be available, and compatible with this version of the code. " sync_cout << "info string ERROR: The UCI option EvalFile might need to specify the full path, including the directory/folder name, to the file." << sync_endl;
<< "The UCI option EvalFile might need to specify the full path, including the directory/folder name, to the file. " sync_cout << "info string ERROR: The default net can be downloaded from: https://tests.stockfishchess.org/api/nn/"+std::string(defaults["EvalFile"]) << sync_endl;
<< "The default net can be downloaded from: https://tests.stockfishchess.org/api/nn/"+std::string(defaults["EvalFile"]) << std::endl; sync_cout << "info string ERROR: If the UCI option Use NNUE is set to true, network evaluation parameters compatible with the program must be available." << sync_endl;
sync_cout << "info string ERROR: The engine will be terminated now." << sync_endl;
std::exit(EXIT_FAILURE); std::exit(EXIT_FAILURE);
} }
@@ -122,7 +123,8 @@ namespace {
constexpr Value LazyThreshold1 = Value(1400); constexpr Value LazyThreshold1 = Value(1400);
constexpr Value LazyThreshold2 = Value(1300); constexpr Value LazyThreshold2 = Value(1300);
constexpr Value SpaceThreshold = Value(12222); constexpr Value SpaceThreshold = Value(12222);
constexpr Value NNUEThreshold = Value(460); constexpr Value NNUEThreshold1 = Value(550);
constexpr Value NNUEThreshold2 = Value(150);
// KingAttackWeights[PieceType] contains king attack weights by piece type // KingAttackWeights[PieceType] contains king attack weights by piece type
constexpr int KingAttackWeights[PIECE_TYPE_NB] = { 0, 0, 81, 52, 44, 10 }; constexpr int KingAttackWeights[PIECE_TYPE_NB] = { 0, 0, 81, 52, 44, 10 };
@@ -294,8 +296,8 @@ namespace {
attackedBy2[Us] = dblAttackByPawn | (attackedBy[Us][KING] & attackedBy[Us][PAWN]); attackedBy2[Us] = dblAttackByPawn | (attackedBy[Us][KING] & attackedBy[Us][PAWN]);
// Init our king safety tables // Init our king safety tables
Square s = make_square(Utility::clamp(file_of(ksq), FILE_B, FILE_G), Square s = make_square(std::clamp(file_of(ksq), FILE_B, FILE_G),
Utility::clamp(rank_of(ksq), RANK_2, RANK_7)); std::clamp(rank_of(ksq), RANK_2, RANK_7));
kingRing[Us] = attacks_bb<KING>(s) | s; kingRing[Us] = attacks_bb<KING>(s) | s;
kingAttackersCount[Them] = popcount(kingRing[Us] & pe->pawn_attacks(Them)); kingAttackersCount[Them] = popcount(kingRing[Us] & pe->pawn_attacks(Them));
@@ -692,7 +694,7 @@ namespace {
Square blockSq = s + Up; Square blockSq = s + Up;
// Adjust bonus based on the king's proximity // Adjust bonus based on the king's proximity
bonus += make_score(0, ( (king_proximity(Them, blockSq) * 19) / 4 bonus += make_score(0, ( king_proximity(Them, blockSq) * 19 / 4
- king_proximity(Us, blockSq) * 2) * w); - king_proximity(Us, blockSq) * 2) * w);
// If blockSq is not the queening square then consider also a second push // If blockSq is not the queening square then consider also a second push
@@ -737,7 +739,7 @@ namespace {
// Evaluation::space() computes a space evaluation for a given side, aiming to improve game // Evaluation::space() computes a space evaluation for a given side, aiming to improve game
// play in the opening. It is based on the number of safe squares on the 4 central files // play in the opening. It is based on the number of safe squares on the four central files
// on ranks 2 to 4. Completely safe squares behind a friendly pawn are counted twice. // on ranks 2 to 4. Completely safe squares behind a friendly pawn are counted twice.
// Finally, the space bonus is multiplied by a weight which decreases according to occupancy. // Finally, the space bonus is multiplied by a weight which decreases according to occupancy.
@@ -810,7 +812,7 @@ namespace {
// Now apply the bonus: note that we find the attacking side by extracting the // Now apply the bonus: note that we find the attacking side by extracting the
// sign of the midgame or endgame values, and that we carefully cap the bonus // sign of the midgame or endgame values, and that we carefully cap the bonus
// so that the midgame and endgame scores do not change sign after the bonus. // so that the midgame and endgame scores do not change sign after the bonus.
int u = ((mg > 0) - (mg < 0)) * Utility::clamp(complexity + 50, -abs(mg), 0); int u = ((mg > 0) - (mg < 0)) * std::clamp(complexity + 50, -abs(mg), 0);
int v = ((eg > 0) - (eg < 0)) * std::max(complexity, -abs(eg)); int v = ((eg > 0) - (eg < 0)) * std::max(complexity, -abs(eg));
mg += u; mg += u;
@@ -935,9 +937,6 @@ make_v:
// Side to move point of view // Side to move point of view
v = (pos.side_to_move() == WHITE ? v : -v) + Tempo; v = (pos.side_to_move() == WHITE ? v : -v) + Tempo;
// Damp down the evaluation linearly when shuffling
v = v * (100 - pos.rule50_count()) / 100;
return v; return v;
} }
@@ -954,14 +953,21 @@ Value Eval::evaluate(const Position& pos) {
} }
#endif #endif
if (Eval::useNNUE) bool classical = !Eval::useNNUE
{ || abs(eg_value(pos.psq_score())) * 16 > NNUEThreshold1 * (16 + pos.rule50_count());
Value v = eg_value(pos.psq_score()); Value v = classical ? Evaluation<NO_TRACE>(pos).value()
// Take NNUE eval only on balanced positions : NNUE::evaluate(pos) * 5 / 4 + Tempo;
if (abs(v) < NNUEThreshold + 20 * pos.count<PAWN>())
return NNUE::evaluate(pos) + Tempo; if (classical && Eval::useNNUE && abs(v) * 16 < NNUEThreshold2 * (16 + pos.rule50_count()))
} v = NNUE::evaluate(pos) * 5 / 4 + Tempo;
return Evaluation<NO_TRACE>(pos).value();
// Damp down the evaluation linearly when shuffling
v = v * (100 - pos.rule50_count()) / 100;
// Guarantee evaluation does not hit the tablebase range
v = std::clamp(v, VALUE_TB_LOSS_IN_MAX_PLY + 1, VALUE_TB_WIN_IN_MAX_PLY - 1);
return v;
} }
/// trace() is like evaluate(), but instead of returning a value, it returns /// trace() is like evaluate(), but instead of returning a value, it returns
@@ -979,12 +985,6 @@ std::string Eval::trace(const Position& pos) {
Value v; Value v;
if (Eval::useNNUE)
{
v = NNUE::evaluate(pos);
}
else
{
std::memset(scores, 0, sizeof(scores)); std::memset(scores, 0, sizeof(scores));
pos.this_thread()->contempt = SCORE_ZERO; // Reset any dynamic contempt pos.this_thread()->contempt = SCORE_ZERO; // Reset any dynamic contempt
@@ -1010,110 +1010,21 @@ std::string Eval::trace(const Position& pos) {
<< " Winnable | " << Term(WINNABLE) << " Winnable | " << Term(WINNABLE)
<< " ------------+-------------+-------------+------------\n" << " ------------+-------------+-------------+------------\n"
<< " Total | " << Term(TOTAL); << " Total | " << Term(TOTAL);
}
v = pos.side_to_move() == WHITE ? v : -v; v = pos.side_to_move() == WHITE ? v : -v;
ss << "\nClassical evaluation: " << to_cp(v) << " (white side)\n";
if (Eval::useNNUE)
{
v = NNUE::evaluate(pos);
v = pos.side_to_move() == WHITE ? v : -v;
ss << "\nNNUE evaluation: " << to_cp(v) << " (white side)\n";
}
v = evaluate(pos);
v = pos.side_to_move() == WHITE ? v : -v;
ss << "\nFinal evaluation: " << to_cp(v) << " (white side)\n"; ss << "\nFinal evaluation: " << to_cp(v) << " (white side)\n";
return ss.str(); return ss.str();
} }
// Check whether the pieceListFw[] held internally is a correct PieceSquare.
// Note: For debugging. slow.
bool EvalList::is_valid(const Position& pos)
{
std::set<PieceId> piece_numbers;
for (Square sq = SQ_A1; sq != SQUARE_NB; ++sq) {
auto piece_number = piece_id_list[sq];
if (piece_number == PieceId::PIECE_ID_NONE) {
continue;
}
assert(!piece_numbers.count(piece_number));
piece_numbers.insert(piece_number);
}
for (int i = 0; i < PieceId::PIECE_ID_KING; ++i)
{
PieceSquare fw = pieceListFw[i];
// Go to the Position class to see if this fw really exists.
if (fw == PieceSquare::PS_NONE) {
continue;
}
// Out of range
if (!(0 <= fw && fw < PieceSquare::PS_END))
return false;
// Since it is a piece on the board, I will check if this piece really exists.
for (Piece pc = NO_PIECE; pc < PIECE_NB; ++pc)
{
auto pt = type_of(pc);
if (pt == NO_PIECE_TYPE || pt == 7) // non-existing piece
continue;
// PieceSquare start number of piece pc
auto s = PieceSquare(kpp_board_index[pc].from[Color::WHITE]);
if (s <= fw && fw < s + SQUARE_NB)
{
// Since it was found, check if this piece is at sq.
Square sq = (Square)(fw - s);
Piece pc2 = pos.piece_on(sq);
if (pc2 != pc)
return false;
goto Found;
}
}
// It was a piece that did not exist for some reason..
return false;
Found:;
}
// Validate piece_id_list
for (auto sq = SQUARE_ZERO; sq < SQUARE_NB; ++sq) {
Piece expected_piece = pos.piece_on(sq);
PieceId piece_number = piece_id_list[sq];
if (piece_number == PieceId::PIECE_ID_NONE) {
assert(expected_piece == NO_PIECE);
if (expected_piece != NO_PIECE) {
return false;
}
continue;
}
PieceSquare bona_piece_white = pieceListFw[piece_number];
Piece actual_piece;
for (actual_piece = NO_PIECE; actual_piece < PIECE_NB; ++actual_piece) {
if (kpp_board_index[actual_piece].from[Color::WHITE] == PieceSquare::PS_NONE) {
continue;
}
if (kpp_board_index[actual_piece].from[Color::WHITE] <= bona_piece_white
&& bona_piece_white < kpp_board_index[actual_piece].from[Color::WHITE] + SQUARE_NB) {
break;
}
}
assert(actual_piece != PIECE_NB);
if (actual_piece == PIECE_NB) {
return false;
}
assert(actual_piece == expected_piece);
if (actual_piece != expected_piece) {
return false;
}
Square actual_square = static_cast<Square>(
bona_piece_white - kpp_board_index[actual_piece].from[Color::WHITE]);
assert(sq == actual_square);
if (sq != actual_square) {
return false;
}
}
return true;
}
-18
View File
@@ -276,13 +276,6 @@ int Position::set_from_packed_sfen(const PackedSfen& sfen , StateInfo * si, Thre
// Active color // Active color
sideToMove = (Color)stream.read_one_bit(); sideToMove = (Color)stream.read_one_bit();
// clear evalList. It is cleared when memset is cleared to zero above...
evalList.clear();
// In updating the PieceList, we have to set which piece is where,
// A counter of how much each piece has been used
PieceId next_piece_number = PieceId::PIECE_ID_ZERO;
pieceList[W_KING][0] = SQUARE_NB; pieceList[W_KING][0] = SQUARE_NB;
pieceList[B_KING][0] = SQUARE_NB; pieceList[B_KING][0] = SQUARE_NB;
@@ -327,14 +320,6 @@ int Position::set_from_packed_sfen(const PackedSfen& sfen , StateInfo * si, Thre
put_piece(Piece(pc), sq); put_piece(Piece(pc), sq);
// update evalList
PieceId piece_no =
(pc == B_KING) ?PieceId::PIECE_ID_BKING :// Move ball
(pc == W_KING) ?PieceId::PIECE_ID_WKING :// Backing ball
next_piece_number++; // otherwise
evalList.put_piece(piece_no, sq, pc); // Place the pc piece in the sq box
//cout << sq << ' ' << board[sq] << ' ' << stream.get_cursor() << endl; //cout << sq << ' ' << board[sq] << ' ' << stream.get_cursor() << endl;
if (stream.get_cursor()> 256) if (stream.get_cursor()> 256)
@@ -402,9 +387,6 @@ set_state(st);
//std::cout << *this << std::endl; //std::cout << *this << std::endl;
assert(pos_is_ok()); assert(pos_is_ok());
#if defined(EVAL_NNUE)
assert(evalList.is_valid(*this));
#endif // defined(EVAL_NNUE)
return 0; return 0;
} }
-231
View File
@@ -20,237 +20,6 @@ namespace EvalLearningTools
double Weight::eta3; double Weight::eta3;
uint64_t Weight::eta1_epoch; uint64_t Weight::eta1_epoch;
uint64_t Weight::eta2_epoch; uint64_t Weight::eta2_epoch;
std::vector<bool> min_index_flag;
// --- initialization for each individual table
void init_min_index_flag()
{
// Initialization of mir_piece and inv_piece must be completed.
assert(Eval::mir_piece(PieceSquare::PS_W_PAWN) == PieceSquare::PS_B_PAWN);
// Initialize the flag array for dimension reduction
// Not involved in KPPP.
KK g_kk;
g_kk.set(SQUARE_NB, PieceSquare::PS_END, 0);
KKP g_kkp;
g_kkp.set(SQUARE_NB, PieceSquare::PS_END, g_kk.max_index());
KPP g_kpp;
g_kpp.set(SQUARE_NB, PieceSquare::PS_END, g_kkp.max_index());
uint64_t size = g_kpp.max_index();
min_index_flag.resize(size);
#pragma omp parallel
{
#if defined(_OPENMP)
// To prevent the logical 64 cores from being used when there are two CPUs under Windows
// explicitly assign to CPU here
int thread_index = omp_get_thread_num(); // get your thread number
WinProcGroup::bindThisThread(thread_index);
#endif
#pragma omp for schedule(dynamic,20000)
for (int64_t index_ = 0; index_ < (int64_t)size; ++index_)
{
// It seems that the loop variable must be a sign type due to OpenMP restrictions, but
// It's really difficult to use.
uint64_t index = (uint64_t)index_;
if (g_kk.is_ok(index))
{
// Make sure that the original index will be restored by conversion from index and reverse conversion.
// It is a process that is executed only once at startup, so write it in assert.
assert(g_kk.fromIndex(index).toIndex() == index);
KK a[KK_LOWER_COUNT];
g_kk.fromIndex(index).toLowerDimensions(a);
// Make sure that the first element of dimension reduction is the same as the original index.
assert(a[0].toIndex() == index);
uint64_t min_index = UINT64_MAX;
for (auto& e : a)
min_index = std::min(min_index, e.toIndex());
min_index_flag[index] = (min_index == index);
}
else if (g_kkp.is_ok(index))
{
assert(g_kkp.fromIndex(index).toIndex() == index);
KKP x = g_kkp.fromIndex(index);
KKP a[KKP_LOWER_COUNT];
x.toLowerDimensions(a);
assert(a[0].toIndex() == index);
uint64_t min_index = UINT64_MAX;
for (auto& e : a)
min_index = std::min(min_index, e.toIndex());
min_index_flag[index] = (min_index == index);
}
else if (g_kpp.is_ok(index))
{
assert(g_kpp.fromIndex(index).toIndex() == index);
KPP x = g_kpp.fromIndex(index);
KPP a[KPP_LOWER_COUNT];
x.toLowerDimensions(a);
assert(a[0].toIndex() == index);
uint64_t min_index = UINT64_MAX;
for (auto& e : a)
min_index = std::min(min_index, e.toIndex());
min_index_flag[index] = (min_index == index);
}
else
{
assert(false);
}
}
}
}
void learning_tools_unit_test_kpp()
{
// test KPP triangulation for bugs
// All combinations of k-p0-p1 are properly handled by KPP, and the dimension reduction at that time is
// Determine if it is correct.
KK g_kk;
g_kk.set(SQUARE_NB, PieceSquare::PS_END, 0);
KKP g_kkp;
g_kkp.set(SQUARE_NB, PieceSquare::PS_END, g_kk.max_index());
KPP g_kpp;
g_kpp.set(SQUARE_NB, PieceSquare::PS_END, g_kkp.max_index());
std::vector<bool> f;
f.resize(g_kpp.max_index() - g_kpp.min_index());
for(auto k = SQUARE_ZERO ; k < SQUARE_NB ; ++k)
for(auto p0 = PieceSquare::PS_NONE; p0 < PieceSquare::PS_END ; ++p0)
for (auto p1 = PieceSquare::PS_NONE; p1 < PieceSquare::PS_END; ++p1)
{
KPP kpp_org = g_kpp.fromKPP(k,p0,p1);
KPP kpp0;
KPP kpp1 = g_kpp.fromKPP(flip_file(k), mir_piece(p0), mir_piece(p1));
KPP kpp_array[2];
auto index = kpp_org.toIndex();
assert(g_kpp.is_ok(index));
kpp0 = g_kpp.fromIndex(index);
//if (kpp0 != kpp_org)
// std::cout << "index = " << index << "," << kpp_org << "," << kpp0 << std::endl;
kpp0.toLowerDimensions(kpp_array);
assert(kpp_array[0] == kpp0);
assert(kpp0 == kpp_org);
assert(kpp_array[1] == kpp1);
auto index2 = kpp1.toIndex();
f[index - g_kpp.min_index()] = f[index2-g_kpp.min_index()] = true;
}
// Check if there is no missing index.
for(size_t index = 0 ; index < f.size(); index++)
if (!f[index])
{
std::cout << index << g_kpp.fromIndex(index + g_kpp.min_index()) << std::endl;
}
}
void learning_tools_unit_test_kppp()
{
// Test for missing KPPP calculations
KPPP g_kppp;
g_kppp.set(15, PieceSquare::PS_END,0);
uint64_t min_index = g_kppp.min_index();
uint64_t max_index = g_kppp.max_index();
// Confirm last element.
//KPPP x = KPPP::fromIndex(max_index-1);
//std::cout << x << std::endl;
for (uint64_t index = min_index; index < max_index; ++index)
{
KPPP x = g_kppp.fromIndex(index);
//std::cout << x << std::endl;
#if 0
if ((index % 10000000) == 0)
std::cout << "index = " << index << std::endl;
// index = 9360000000
// done.
if (x.toIndex() != index)
{
std::cout << "assertion failed , index = " << index << std::endl;
}
#endif
assert(x.toIndex() == index);
// ASSERT((&kppp_ksq_pcpcpc(x.king(), x.piece0(), x.piece1(), x.piece2()) - &kppp[0][0]) == (index - min_index));
}
}
void learning_tools_unit_test_kkpp()
{
KKPP g_kkpp;
g_kkpp.set(SQUARE_NB, 10000, 0);
uint64_t n = 0;
for (int k = 0; k<SQUARE_NB; ++k)
for (int i = 0; i<10000; ++i) // As a test, assuming a large fe_end, try turning at 10000.
for (int j = 0; j < i; ++j)
{
auto kkpp = g_kkpp.fromKKPP(k, (PieceSquare)i, (PieceSquare)j);
auto r = kkpp.toRawIndex();
assert(n++ == r);
auto kkpp2 = g_kkpp.fromIndex(r + g_kkpp.min_index());
assert(kkpp2.king() == k && kkpp2.piece0() == i && kkpp2.piece1() == j);
}
}
// Initialize this entire EvalLearningTools
void init()
{
// Initialization is required only once after startup, so a flag for that.
static bool first = true;
if (first)
{
std::cout << "EvalLearningTools init..";
// Make mir_piece() and inv_piece() available.
// After this, the min_index_flag is initialized, but
// It depends on this, so you need to do this first.
init_mir_inv_tables();
//learning_tools_unit_test_kpp();
//learning_tools_unit_test_kppp();
//learning_tools_unit_test_kkpp();
// It may be the last time to execute UnitTest, but since init_min_index_flag() takes a long time,
// I want to do this at the time of debugging.
init_min_index_flag();
std::cout << "done." << std::endl;
first = false;
}
}
} }
#endif #endif
-834
View File
@@ -7,8 +7,6 @@
#if defined (EVAL_LEARN) #if defined (EVAL_LEARN)
#include <array> #include <array>
#include "../eval/evaluate_mir_inv_tools.h"
#if defined(SGD_UPDATE) || defined(USE_KPPP_MIRROR_WRITE) #if defined(SGD_UPDATE) || defined(USE_KPPP_MIRROR_WRITE)
#include "../misc.h" // PRNG , my_insertion_sort #include "../misc.h" // PRNG , my_insertion_sort
#endif #endif
@@ -17,27 +15,6 @@
namespace EvalLearningTools namespace EvalLearningTools
{ {
// -------------------------------------------------
// Initialization
// -------------------------------------------------
// Initialize the tables in this EvalLearningTools namespace.
// Be sure to call once before learning starts.
// In this function, we also call init_mir_inv_tables().
// (It is not necessary to call init_mir_inv_tables() when calling this function.)
void init();
// -------------------------------------------------
// flags
// -------------------------------------------------
// When the dimension is lowered, it may become the smallest index among them
// A flag array that is true for the known index.
// This array is also initialized by init().
// KPPP is not involved.
// Therefore, the valid index range of this array is from KK::min_index() to KPP::max_index().
extern std::vector<bool> min_index_flag;
// ------------------------------------------------- // -------------------------------------------------
// Array for learning that stores gradients etc. // Array for learning that stores gradients etc.
// ------------------------------------------------- // -------------------------------------------------
@@ -217,817 +194,6 @@ namespace EvalLearningTools
std::array<LearnFloatType, 2> get_grad() const { return std::array<LearnFloatType, 2>{w[0].get_grad(), w[1].get_grad()}; } std::array<LearnFloatType, 2> get_grad() const { return std::array<LearnFloatType, 2>{w[0].get_grad(), w[1].get_grad()}; }
}; };
// ------------------------------------------------ -
// A helper that calculates the index when the Weight array is serialized.
// ------------------------------------------------ -
// Base class for KK,KKP,KPP,KKPP
// How to use these classes
//
// 1. Initialize with set() first. Example) KK g_kk; g_kk.set(SQUARE_NB,fe_end,0);
// 2. Next create an instance with fromIndex(), fromKK(), etc.
// 3. Access using properties such as king(), piece0(), piece1().
//
// It may be difficult to understand just by this explanation, but if you look at init_grad(), add_grad(), update_weights() etc. in the learning part
// I think you can understand it including the necessity.
//
// Note: this derived class may indirectly reference the above inv_piece/mir_piece for dimension reduction, so
// Initialize by calling EvalLearningTools::init() or init_mir_inv_tables() first.
//
// Remarks) /*final*/ is written for the function name that should not be overridden on the derived class side.
// The function that should be overridden on the derived class side is a pure virtual function with "= 0".
// Only virtual functions are added to the derived class that may or may not be overridden.
//
struct SerializerBase
{
// Minimum value and maximum value of serial number +1 when serializing KK, KKP, KPP arrays.
/*final*/ uint64_t min_index() const { return min_index_; }
/*final*/ uint64_t max_index() const { return min_index() + max_raw_index_; }
// max_index() - min_index() the value of.
// Calculate the value from max_king_sq_,fe_end_ etc. on the derived class side and return it.
virtual uint64_t size() const = 0;
// Determine if the given index is more than min_index() and less than max_index().
/*final*/ bool is_ok(uint64_t index) { return min_index() <= index && index < max_index(); }
// Make sure to call this set(). Otherwise, construct an instance using fromKK()/fromIndex() etc. on the derived class side.
virtual void set(int max_king_sq, uint64_t fe_end, uint64_t min_index)
{
max_king_sq_ = max_king_sq;
fe_end_ = fe_end;
min_index_ = min_index;
max_raw_index_ = size();
}
// Get the index when serialized, based on the value of the current member.
/*final*/ uint64_t toIndex() const {
return min_index() + toRawIndex();
}
// Returns the index when serializing. (The value of min_index() is before addition)
virtual uint64_t toRawIndex() const = 0;
protected:
// The value of min_index() returned by this class
uint64_t min_index_;
// The value of max_index() returned by this class = min_index() + max_raw_index_
// This variable is calculated by size() of the derived class.
uint64_t max_raw_index_;
// The number of balls to support (normally SQUARE_NB)
int max_king_sq_;
// Maximum PieceSquare value supported
uint64_t fe_end_;
};
struct KK : public SerializerBase
{
protected:
KK(Square king0, Square king1,bool inverse) : king0_(king0), king1_(king1) , inverse_sign(inverse) {}
public:
KK() {}
virtual uint64_t size() const { return max_king_sq_ * max_king_sq_; }
// builder that creates KK object from index (serial number)
KK fromIndex(uint64_t index) const { assert(index >= min_index()); return fromRawIndex(index - min_index()); }
// builder that creates KK object from raw_index (number starting from 0, not serial number)
KK fromRawIndex(uint64_t raw_index) const
{
int king1 = (int)(raw_index % SQUARE_NB);
raw_index /= SQUARE_NB;
int king0 = (int)(raw_index /* % SQUARE_NB */);
assert(king0 < SQUARE_NB);
return fromKK((Square)king0, (Square)king1 , false);
}
KK fromKK(Square king0, Square king1 , bool inverse) const
{
// The variable name kk is used in the Eval::kk array etc., so it needs to be different. (The same applies to KKP, KPP classes, etc.)
KK my_kk(king0, king1, inverse);
my_kk.set(max_king_sq_, fe_end_, min_index());
return my_kk;
}
KK fromKK(Square king0, Square king1) const { return fromKK(king0, king1, false); }
// When you construct this object using fromIndex(), you can get information with the following accessors.
Square king0() const { return king0_; }
Square king1() const { return king1_; }
// number of dimension reductions
#if defined(USE_KK_INVERSE_WRITE)
#define KK_LOWER_COUNT 4
#elif defined(USE_KK_MIRROR_WRITE)
#define KK_LOWER_COUNT 2
#else
#define KK_LOWER_COUNT 1
#endif
#if defined(USE_KK_INVERSE_WRITE) && !defined(USE_KK_MIRROR_WRITE)
// USE_KK_INVERSE_WRITE If you use it, please also define USE_KK_MIRROR_WRITE.
static_assert(false, "define also USE_KK_MIRROR_WRITE!");
#endif
// Get the index of the low-dimensional array.
// When USE_KK_INVERSE_WRITE is enabled, the inverse of them will be in [2] and [3].
// Note that the sign of grad must be reversed for this dimension reduction.
// You can use is_inverse() because it can be determined.
void toLowerDimensions(/*out*/KK kk_[KK_LOWER_COUNT]) const {
kk_[0] = fromKK(king0_, king1_,false);
#if defined(USE_KK_MIRROR_WRITE)
kk_[1] = fromKK(flip_file(king0_),flip_file(king1_),false);
#if defined(USE_KK_INVERSE_WRITE)
kk_[2] = fromKK(rotate180(king1_), rotate180(king0_),true);
kk_[3] = fromKK(rotate180(flip_file(king1_)) , rotate180(flip_file(king0_)),true);
#endif
#endif
}
// Get the index when counting the value of min_index() of this class as 0.
virtual uint64_t toRawIndex() const {
return (uint64_t)king0_ * (uint64_t)max_king_sq_ + (uint64_t)king1_;
}
// Returns whether or not the dimension lowered with toLowerDimensions is inverse.
bool is_inverse() const {
return inverse_sign;
}
// When is_inverse() == true, reverse the sign that is not grad's turn and return it.
template <typename T>
std::array<T, 2> apply_inverse_sign(const std::array<T, 2>& rhs)
{
return !is_inverse() ? rhs : std::array<T, 2>{-rhs[0], rhs[1]};
}
// comparison operator
bool operator==(const KK& rhs) { return king0() == rhs.king0() && king1() == rhs.king1(); }
bool operator!=(const KK& rhs) { return !(*this == rhs); }
private:
Square king0_, king1_ ;
bool inverse_sign;
};
// Output for debugging.
static std::ostream& operator<<(std::ostream& os, KK rhs)
{
os << "KK(" << rhs.king0() << "," << rhs.king1() << ")";
return os;
}
// Same as KK. For KKP.
struct KKP : public SerializerBase
{
protected:
KKP(Square king0, Square king1, PieceSquare p) : king0_(king0), king1_(king1), piece_(p), inverse_sign(false) {}
KKP(Square king0, Square king1, PieceSquare p, bool inverse) : king0_(king0), king1_(king1), piece_(p),inverse_sign(inverse) {}
public:
KKP() {}
virtual uint64_t size() const { return (uint64_t)max_king_sq_*(uint64_t)max_king_sq_*(uint64_t)fe_end_; }
// builder that creates KKP object from index (serial number)
KKP fromIndex(uint64_t index) const { assert(index >= min_index()); return fromRawIndex(index - min_index()); }
// A builder that creates a KKP object from raw_index (a number that starts from 0, not a serial number)
KKP fromRawIndex(uint64_t raw_index) const
{
int piece = (int)(raw_index % PieceSquare::PS_END);
raw_index /= PieceSquare::PS_END;
int king1 = (int)(raw_index % SQUARE_NB);
raw_index /= SQUARE_NB;
int king0 = (int)(raw_index /* % SQUARE_NB */);
assert(king0 < SQUARE_NB);
return fromKKP((Square)king0, (Square)king1, (PieceSquare)piece,false);
}
KKP fromKKP(Square king0, Square king1, PieceSquare p, bool inverse) const
{
KKP my_kkp(king0, king1, p, inverse);
my_kkp.set(max_king_sq_,fe_end_,min_index());
return my_kkp;
}
KKP fromKKP(Square king0, Square king1, PieceSquare p) const { return fromKKP(king0, king1, p, false); }
// When you construct this object using fromIndex(), you can get information with the following accessors.
Square king0() const { return king0_; }
Square king1() const { return king1_; }
PieceSquare piece() const { return piece_; }
// Number of KKP dimension reductions
#if defined(USE_KKP_INVERSE_WRITE)
#define KKP_LOWER_COUNT 4
#elif defined(USE_KKP_MIRROR_WRITE)
#define KKP_LOWER_COUNT 2
#else
#define KKP_LOWER_COUNT 1
#endif
#if defined(USE_KKP_INVERSE_WRITE) && !defined(USE_KKP_MIRROR_WRITE)
// USE_KKP_INVERSE_WRITE If you use it, please also define USE_KKP_MIRROR_WRITE.
static_assert(false, "define also USE_KKP_MIRROR_WRITE!");
#endif
// Get the index of the low-dimensional array. The mirrored one is returned to kkp_[1].
// When USE_KKP_INVERSE_WRITE is enabled, the inverse of them will be in [2] and [3].
// Note that the sign of grad must be reversed for this dimension reduction.
// You can use is_inverse() because it can be determined.
void toLowerDimensions(/*out*/ KKP kkp_[KKP_LOWER_COUNT]) const {
kkp_[0] = fromKKP(king0_, king1_, piece_,false);
#if defined(USE_KKP_MIRROR_WRITE)
kkp_[1] = fromKKP(flip_file(king0_), flip_file(king1_), Eval::mir_piece(piece_),false);
#if defined(USE_KKP_INVERSE_WRITE)
kkp_[2] = fromKKP( rotate180(king1_), rotate180(king0_), Eval::inv_piece(piece_),true);
kkp_[3] = fromKKP( rotate180(flip_file(king1_)), rotate180(flip_file(king0_)) , Eval::inv_piece(Eval::mir_piece(piece_)),true);
#endif
#endif
}
// Get the index when counting the value of min_index() of this class as 0.
virtual uint64_t toRawIndex() const {
return ((uint64_t)king0_ * (uint64_t)max_king_sq_ + (uint64_t)king1_) * (uint64_t)fe_end_ + (uint64_t)piece_;
}
// Returns whether or not the dimension lowered with toLowerDimensions is inverse.
bool is_inverse() const {
return inverse_sign;
}
// When is_inverse() == true, reverse the sign that is not grad's turn and return it.
template <typename T>
std::array<T, 2> apply_inverse_sign(const std::array<T, 2>& rhs)
{
return !is_inverse() ? rhs : std::array<T, 2>{-rhs[0], rhs[1]};
}
// comparison operator
bool operator==(const KKP& rhs) { return king0() == rhs.king0() && king1() == rhs.king1() && piece() == rhs.piece(); }
bool operator!=(const KKP& rhs) { return !(*this == rhs); }
private:
Square king0_, king1_;
PieceSquare piece_;
bool inverse_sign;
};
// Output for debugging.
static std::ostream& operator<<(std::ostream& os, KKP rhs)
{
os << "KKP(" << rhs.king0() << "," << rhs.king1() << "," << rhs.piece() << ")";
return os;
}
// Same as KK and KKP. For KPP
struct KPP : public SerializerBase
{
protected:
KPP(Square king, PieceSquare p0, PieceSquare p1) : king_(king), piece0_(p0), piece1_(p1) {}
public:
KPP() {}
// The minimum and maximum KPP values of serial numbers when serializing KK, KKP, KPP arrays.
#if !defined(USE_TRIANGLE_WEIGHT_ARRAY)
virtual uint64_t size() const { return (uint64_t)max_king_sq_*(uint64_t)fe_end_*(uint64_t)fe_end_; }
#else
// Triangularize the square array part of [fe_end][fe_end] of kpp[SQUARE_NB][fe_end][fe_end].
// If kpp[SQUARE_NB][triangle_fe_end], the first row of this triangular array has one element, the second row has two elements, and so on.
// hence triangle_fe_end = 1 + 2 + .. + fe_end = fe_end * (fe_end + 1) / 2
virtual uint64_t size() const { return (uint64_t)max_king_sq_*(uint64_t)triangle_fe_end; }
#endif
virtual void set(int max_king_sq, uint64_t fe_end, uint64_t min_index)
{
// This value is used in size(), and size() is used in SerializerBase::set(), so calculate first.
triangle_fe_end = (uint64_t)fe_end*((uint64_t)fe_end + 1) / 2;
SerializerBase::set(max_king_sq, fe_end, min_index);
}
// builder that creates KPP object from index (serial number)
KPP fromIndex(uint64_t index) const { assert(index >= min_index()); return fromRawIndex(index - min_index()); }
// A builder that creates KPP objects from raw_index (a number that starts from 0, not a serial number)
KPP fromRawIndex(uint64_t raw_index) const
{
const uint64_t triangle_fe_end = (uint64_t)fe_end_*((uint64_t)fe_end_ + 1) / 2;
#if !defined(USE_TRIANGLE_WEIGHT_ARRAY)
int piece1 = (int)(raw_index % fe_end_);
raw_index /= fe_end_;
int piece0 = (int)(raw_index % fe_end_);
raw_index /= fe_end_;
#else
uint64_t index2 = raw_index % triangle_fe_end;
// Write the expression to find piece0, piece1 from index2 here.
// This is the inverse function of index2 = i * (i+1) / 2 + j.
// If j = 0, i^2 + i-2 * index2 == 0
// From the solution formula of the quadratic equation i = (sqrt(8*index2+1)-1) / 2.
// After i is converted into an integer, j can be calculated as j = index2-i * (i + 1) / 2.
// PieceSquare assumes 32bit (may not fit in 16bit), so this multiplication must be 64bit.
int piece1 = int(sqrt(8 * index2 + 1) - 1) / 2;
int piece0 = int(index2 - (uint64_t)piece1*((uint64_t)piece1 + 1) / 2);
assert(piece1 < (int)fe_end_);
assert(piece0 < (int)fe_end_);
assert(piece0 > piece1);
raw_index /= triangle_fe_end;
#endif
int king = (int)(raw_index /* % SQUARE_NB */);
assert(king < max_king_sq_);
return fromKPP((Square)king, (PieceSquare)piece0, (PieceSquare)piece1);
}
KPP fromKPP(Square king, PieceSquare p0, PieceSquare p1) const
{
KPP my_kpp(king, p0, p1);
my_kpp.set(max_king_sq_,fe_end_,min_index());
return my_kpp;
}
// When you construct this object using fromIndex(), you can get information with the following accessors.
Square king() const { return king_; }
PieceSquare piece0() const { return piece0_; }
PieceSquare piece1() const { return piece1_; }
// number of dimension reductions
#if defined(USE_KPP_MIRROR_WRITE)
#if !defined(USE_TRIANGLE_WEIGHT_ARRAY)
#define KPP_LOWER_COUNT 4
#else
#define KPP_LOWER_COUNT 2
#endif
#else
#if !defined(USE_TRIANGLE_WEIGHT_ARRAY)
#define KPP_LOWER_COUNT 2
#else
#define KPP_LOWER_COUNT 1
#endif
#endif
// Get the index of the low-dimensional array. The ones with p1 and p2 swapped, the ones mirrored, etc. are returned.
void toLowerDimensions(/*out*/ KPP kpp_[KPP_LOWER_COUNT]) const {
#if defined(USE_TRIANGLE_WEIGHT_ARRAY)
// Note that if you use a triangular array, the swapped piece0 and piece1 will not be returned.
kpp_[0] = fromKPP(king_, piece0_, piece1_);
#if defined(USE_KPP_MIRROR_WRITE)
kpp_[1] = fromKPP(flip_file(king_), Eval::mir_piece(piece0_), Eval::mir_piece(piece1_));
#endif
#else
// When not using triangular array
kpp_[0] = fromKPP(king_, piece0_, piece1_);
kpp_[1] = fromKPP(king_, piece1_, piece0_);
#if defined(USE_KPP_MIRROR_WRITE)
kpp_[2] = fromKPP(flip_file(king_), mir_piece(piece0_), mir_piece(piece1_));
kpp_[3] = fromKPP(flip_file(king_), mir_piece(piece1_), mir_piece(piece0_));
#endif
#endif
}
// Get the index when counting the value of min_index() of this class as 0.
virtual uint64_t toRawIndex() const {
#if !defined(USE_TRIANGLE_WEIGHT_ARRAY)
return ((uint64_t)king_ * (uint64_t)fe_end_ + (uint64_t)piece0_) * (uint64_t)fe_end_ + (uint64_t)piece1_;
#else
// Macro similar to that used in Bonanza 6.0
auto PcPcOnSq = [&](Square k, PieceSquare i, PieceSquare j)
{
// (i,j) in this triangular array is the element in the i-th row and the j-th column.
// 1st row + 2 + ... + i = i * (i+1) / 2 because the i-th row and 0th column is the total of the elements up to that point
// The i-th row and the j-th column is j plus this. i*(i+1)/2+j
// PieceSquare type is assumed to be 32 bits, so if you do not pay attention to multiplication, it will overflow.
return (uint64_t)k * triangle_fe_end + (uint64_t)(uint64_t(i)*(uint64_t(i)+1) / 2 + uint64_t(j));
};
auto k = king_;
auto i = piece0_;
auto j = piece1_;
return (i >= j) ? PcPcOnSq(k, i, j) : PcPcOnSq(k, j, i);
#endif
}
// Returns whether or not the dimension lowered with toLowerDimensions is inverse.
// Prepared to match KK, KKP and interface. This method always returns false for this KPP class.
bool is_inverse() const {
return false;
}
// comparison operator
bool operator==(const KPP& rhs) {
return king() == rhs.king() &&
((piece0() == rhs.piece0() && piece1() == rhs.piece1())
#if defined(USE_TRIANGLE_WEIGHT_ARRAY)
// When using a triangular array, allow swapping of piece0 and piece1.
|| (piece0() == rhs.piece1() && piece1() == rhs.piece0())
#endif
); }
bool operator!=(const KPP& rhs) { return !(*this == rhs); }
private:
Square king_;
PieceSquare piece0_, piece1_;
uint64_t triangle_fe_end; // = (uint64_t)fe_end_*((uint64_t)fe_end_ + 1) / 2;
};
// Output for debugging.
static std::ostream& operator<<(std::ostream& os, KPP rhs)
{
os << "KPP(" << rhs.king() << "," << rhs.piece0() << "," << rhs.piece1() << ")";
return os;
}
// 4 pieces related to KPPP. However, if there is a turn and you do not consider mirrors etc., memory of 2 TB or more is required for learning.
// Even if you use a triangular array, you need 50GB x 12 bytes = 600GB for learning.
// It takes about half as much as storing only the mirrored one.
// Here, the triangular array is always used and the mirrored one is stored.
//
// Also, king() of this class is not limited to Square of the actual king, but a value from 0 to (king_sq-1) is simply returned.
// This needs to be converted to an appropriate ball position on the user side when performing compression using a mirror.
//
// Later, regarding the pieces0,1,2 returned by this class,
// piece0() >piece1() >piece2()
// It is, and it is necessary to keep this constraint when passing piece0,1,2 in the constructor.
struct KPPP : public SerializerBase
{
protected:
KPPP(int king, PieceSquare p0, PieceSquare p1, PieceSquare p2) :
king_(king), piece0_(p0), piece1_(p1), piece2_(p2)
{
assert(piece0_ > piece1_ && piece1_ > piece2_);
/* sort_piece(); */
}
public:
KPPP() {}
virtual uint64_t size() const { return (uint64_t)max_king_sq_*triangle_fe_end; }
// Set fe_end and king_sq.
// fe_end: fe_end assumed by this KPPP class
// king_sq: Number of balls to handle in KPPP.
// 3 layers x 3 mirrors = 3 layers x 5 lines = 15
// 2 steps x 2 mirrors without mirror = 18
// Set this first using set() on the side that uses this KPPP class.
virtual void set(int max_king_sq, uint64_t fe_end,uint64_t min_index) {
// This value is used in size(), and size() is used in SerializerBase::set(), so calculate first.
triangle_fe_end = fe_end * (fe_end - 1) * (fe_end - 2) / 6;
SerializerBase::set(max_king_sq, fe_end, min_index);
}
// number of dimension reductions
// For the time being, the dimension reduction of the mirror is not supported. I wonder if I'll do it here...
/*
#if defined(USE_KPPP_MIRROR_WRITE)
#define KPPP_LOWER_COUNT 2
#else
#define KPPP_LOWER_COUNT 1
#endif
*/
#define KPPP_LOWER_COUNT 1
// Get the index of the low-dimensional array.
// Note that the one with p0,p1,p2 swapped will not be returned.
// Also, the mirrored one is returned only when USE_KPPP_MIRROR_WRITE is enabled.
void toLowerDimensions(/*out*/ KPPP kppp_[KPPP_LOWER_COUNT]) const
{
kppp_[0] = fromKPPP(king_, piece0_, piece1_,piece2_);
#if KPPP_LOWER_COUNT > 1
// If mir_piece is done, it will be in a state not sorted. Need code to sort.
PieceSquare p_list[3] = { mir_piece(piece2_), mir_piece(piece1_), mir_piece(piece0_) };
my_insertion_sort(p_list, 0, 3);
kppp_[1] = fromKPPP((int)flip_file((Square)king_), p_list[2] , p_list[1], p_list[0]);
#endif
}
// builder that creates KPPP object from index (serial number)
KPPP fromIndex(uint64_t index) const { assert(index >= min_index()); return fromRawIndex(index - min_index()); }
// A builder that creates KPPP objects from raw_index (a number that starts from 0, not a serial number)
KPPP fromRawIndex(uint64_t raw_index) const
{
uint64_t index2 = raw_index % triangle_fe_end;
// Write the expression to find piece0, piece1, piece2 from index2 here.
// This is the inverse function of index2 = i(i-1)(i-2)/6-1 + j(j+1)/2 + k.
// For j = k = 0, the real root is i = ... from the solution formula of the cubic equation. (The following formula)
// However, if index2 is 0 or 1, there are multiple real solutions. You have to consider this. It is necessary to take measures against insufficient calculation accuracy.
// After i is calculated, i can be converted into an integer, then put in the first expression and then j can be calculated in the same way as in KPP.
// This process is a relatively difficult numerical calculation. Various ideas are needed.
int piece0;
if (index2 <= 1)
{
// There are multiple real solutions only when index2 == 0,1.
piece0 = (int)index2 + 2;
} else {
//double t = pow(sqrt((243 *index2 * index2-1) * 3) + 27 * index2, 1.0 / 3);
// → In this case, the content of sqrt() will overflow if index2 becomes large.
// Since the contents of sqrt() overflow, do not multiply 3.0 in sqrt, but multiply sqrt(3.0) outside sqrt.
// Since the contents of sqrt() will overflow, use an approximate expression when index2 is large.
double t;
if (index2 < 100000000)
t = pow(sqrt((243.0 *index2 * index2 - 1)) * sqrt(3.0) + 27 * index2, 1.0 / 3);
else
// If index2 is very large, we can think of the contents of sqrt as approximately √243 * index2.
t = pow( index2 * sqrt(243 * 3.0) + 27 * index2, 1.0 / 3);
// Add deltas to avoid a slight calculation error when rounding.
// If it is too large, it may increase by 1 so adjustment is necessary.
const double delta = 0.000000001;
piece0 = int(t / pow(3.0, 2.0 / 3) + 1.0 / (pow(3.0, 1.0 / 3) * t) + delta) + 1;
// Uuu. Is it really like this? ('Ω`)
}
//Since piece2 is obtained, substitute piece2 for i of i(i-1)(i-2)/6 (=a) in the above formula. Also substitute k = 0.
// j(j+1)/2 = index2-a
// This is from the solution formula of the quadratic equation..
uint64_t a = (uint64_t)piece0*((uint64_t)piece0 - 1)*((uint64_t)piece0 - 2) / 6;
int piece1 = int((1 + sqrt(8.0 * (index2 - a ) + 1)) / 2);
uint64_t b = (uint64_t)piece1 * (piece1 - 1) / 2;
int piece2 = int(index2 - a - b);
#if 0
if (!((piece0 > piece1 && piece1 > piece2)))
{
std::cout << index << " , " << index2 << "," << a << "," << sqrt(8.0 * (index2 - a) + 1);
}
#endif
assert(piece0 > piece1 && piece1 > piece2);
assert(piece2 < (int)fe_end_);
assert(piece1 < (int)fe_end_);
assert(piece0 < (int)fe_end_);
raw_index /= triangle_fe_end;
int king = (int)(raw_index /* % SQUARE_NB */);
assert(king < max_king_sq_);
// Propagate king_sq and fe_end.
return fromKPPP((Square)king, (PieceSquare)piece0, (PieceSquare)piece1 , (PieceSquare)piece2);
}
// Specify k,p0,p1,p2 to build KPPP instance.
// The king_sq and fe_end passed by set() which is internally retained are inherited.
KPPP fromKPPP(int king, PieceSquare p0, PieceSquare p1, PieceSquare p2) const
{
KPPP kppp(king, p0, p1, p2);
kppp.set(max_king_sq_, fe_end_,min_index());
return kppp;
}
// Get the index when counting the value of min_index() of this class as 0.
virtual uint64_t toRawIndex() const {
// Macro similar to the one used in Bonanza 6.0
// Precondition) i> j> k.
// NG in case of i==j,j==k.
auto PcPcPcOnSq = [this](int king, PieceSquare i, PieceSquare j , PieceSquare k)
{
// (i,j,k) in this triangular array is the element in the i-th row and the j-th column.
// 0th row 0th column 0th is the sum of the elements up to that point, so 0 + 0 + 1 + 3 + 6 + ... + (i)*(i-1)/2 = i*( i-1)*(i-2)/6
// i-th row, j-th column, 0-th is j with j added. + j*(j-1) / 2
// i-th row, j-th column and k-th row is k plus it. + k
assert(i > j && j > k);
// PieceSquare type is assumed to be 32 bits, so if you do not pay attention to multiplication, it will overflow.
return (uint64_t)king * triangle_fe_end + (uint64_t)(
uint64_t(i)*(uint64_t(i) - 1) * (uint64_t(i) - 2) / 6
+ uint64_t(j)*(uint64_t(j) - 1) / 2
+ uint64_t(k)
);
};
return PcPcPcOnSq(king_, piece0_, piece1_, piece2_);
}
// When you construct this object using fromIndex(), you can get information with the following accessors.
int king() const { return king_; }
PieceSquare piece0() const { return piece0_; }
PieceSquare piece1() const { return piece1_; }
PieceSquare piece2() const { return piece2_; }
// Returns whether or not the dimension lowered with toLowerDimensions is inverse.
// Prepared to match KK, KKP and interface. This method always returns false for this KPPP class.
bool is_inverse() const {
return false;
}
// Returns the number of elements in a triangular array. It is assumed that the kppp array is the following two-dimensional array.
// kppp[king_sq][triangle_fe_end];
uint64_t get_triangle_fe_end() const { return triangle_fe_end; }
// comparison operator
bool operator==(const KPPP& rhs) {
// piece0> piece1> piece2 is assumed, so there is no possibility of replacement.
return king() == rhs.king() && piece0() == rhs.piece0() && piece1() == rhs.piece1() && piece2() == rhs.piece2();
}
bool operator!=(const KPPP& rhs) { return !(*this == rhs); }
private:
int king_;
PieceSquare piece0_, piece1_,piece2_;
// The part of the square array of [fe_end][fe_end][fe_end] of kppp[king_sq][fe_end][fe_end][fe_end] is made into a triangular array.
// If kppp[king_sq][triangle_fe_end], the number of elements from the 0th row of this triangular array is 0,0,1,3,..., The nth row is n(n-1)/2.
// therefore,
// triangle_fe_end = Σn(n-1)/2 , n=0..fe_end-1
// = fe_end * (fe_end - 1) * (fe_end - 2) / 6
uint64_t triangle_fe_end; // ((uint64_t)PieceSquare::PS_END)*((uint64_t)PieceSquare::PS_END - 1)*((uint64_t)PieceSquare::PS_END - 2) / 6;
};
// Output for debugging.
static std::ostream& operator<<(std::ostream& os, KPPP rhs)
{
os << "KPPP(" << rhs.king() << "," << rhs.piece0() << "," << rhs.piece1() << "," << rhs.piece2() << ")";
return os;
}
// For learning about 4 pieces by KKPP.
//
// Same design as KPPP class. In KPPP class, treat as one with less p.
// The positions of the two balls are encoded as values from 0 to king_sq-1.
//
// Later, regarding the pieces0 and 1 returned by this class,
// piece0() >piece1()
// It is, and it is necessary to keep this constraint even when passing piece0,1 in the constructor.
//
// Due to this constraint, PieceSquareZero cannot be assigned to piece0 and piece1 at the same time and passed.
// If you want to support learning of dropped frames, you need to devise with evaluate().
struct KKPP: SerializerBase
{
protected:
KKPP(int king, PieceSquare p0, PieceSquare p1) :
king_(king), piece0_(p0), piece1_(p1)
{
assert(piece0_ > piece1_);
/* sort_piece(); */
}
public:
KKPP() {}
virtual uint64_t size() const { return (uint64_t)max_king_sq_*triangle_fe_end; }
// Set fe_end and king_sq.
// fe_end: fe_end assumed by this KPPP class
// king_sq: Number of balls to handle in KPPP.
// 9 steps x mirrors 9 steps x 5 squared squares (balls before and after) = 45*45 = 2025.
// Set this first using set() on the side that uses this KKPP class.
void set(int max_king_sq, uint64_t fe_end , uint64_t min_index) {
// This value is used in size(), and size() is used in SerializerBase::set(), so calculate first.
triangle_fe_end = fe_end * (fe_end - 1) / 2;
SerializerBase::set(max_king_sq, fe_end, min_index);
}
// number of dimension reductions
// For the time being, the dimension reduction of the mirror is not supported. I wonder if I'll do it here... (Because the memory for learning is a waste)
#define KKPP_LOWER_COUNT 1
// Get the index of the low-dimensional array.
//Note that the one with p0,p1,p2 swapped will not be returned.
// Also, the mirrored one is returned only when USE_KPPP_MIRROR_WRITE is enabled.
void toLowerDimensions(/*out*/ KKPP kkpp_[KPPP_LOWER_COUNT]) const
{
kkpp_[0] = fromKKPP(king_, piece0_, piece1_);
// When mirroring, mir_piece will not be sorted. Need code to sort.
// We also need to define a mirror for king_.
}
// builder that creates KKPP object from index (serial number)
KKPP fromIndex(uint64_t index) const { assert(index >= min_index()); return fromRawIndex(index - min_index()); }
// builder that creates KKPP object from raw_index (number starting from 0, not serial number)
KKPP fromRawIndex(uint64_t raw_index) const
{
uint64_t index2 = raw_index % triangle_fe_end;
// Write the expression to find piece0, piece1, piece2 from index2 here.
// This is the inverse function of index2 = i(i-1)/2 + j.
// Use the formula of the solution of the quadratic equation with j=0.
// When index2=0, it is a double root, but the smaller one does not satisfy i>j and is ignored.
int piece0 = (int(sqrt(8 * index2 + 1)) + 1)/2;
int piece1 = int(index2 - piece0 * (piece0 - 1) /2 );
assert(piece0 > piece1);
assert(piece1 < (int)fe_end_);
assert(piece0 < (int)fe_end_);
raw_index /= triangle_fe_end;
int king = (int)(raw_index /* % SQUARE_NB */);
assert(king < max_king_sq_);
// Propagate king_sq and fe_end.
return fromKKPP(king, (PieceSquare)piece0, (PieceSquare)piece1);
}
// Specify k,p0,p1 to build KKPP instance.
// The king_sq and fe_end passed by set() which is internally retained are inherited.
KKPP fromKKPP(int king, PieceSquare p0, PieceSquare p1) const
{
KKPP kkpp(king, p0, p1);
kkpp.set(max_king_sq_, fe_end_,min_index());
return kkpp;
}
// Get the index when counting the value of min_index() of this class as 0.
virtual uint64_t toRawIndex() const {
// Macro similar to the one used in Bonanza 6.0
// Precondition) i> j.
// NG in case of i==j,j==k.
auto PcPcOnSq = [this](int king, PieceSquare i, PieceSquare j)
{
assert(i > j);
// PieceSquare type is assumed to be 32 bits, so if you do not pay attention to multiplication, it will overflow.
return (uint64_t)king * triangle_fe_end + (uint64_t)(
+ uint64_t(i)*(uint64_t(i) - 1) / 2
+ uint64_t(j)
);
};
return PcPcOnSq(king_, piece0_, piece1_);
}
// When you construct this object using fromIndex(), fromKKPP(), you can get information with the following accessors.
int king() const { return king_; }
PieceSquare piece0() const { return piece0_; }
PieceSquare piece1() const { return piece1_; }
// Returns whether or not the dimension lowered with toLowerDimensions is inverse.
// Prepared to match KK, KKP and interface. In this KKPP class, this method always returns false.
bool is_inverse() const {
return false;
}
//Returns the number of elements in a triangular array. It is assumed that the kkpp array is the following two-dimensional array.
// kkpp[king_sq][triangle_fe_end];
uint64_t get_triangle_fe_end() const { return triangle_fe_end; }
// comparison operator
bool operator==(const KKPP& rhs) {
// Since piece0> piece1 is assumed, there is no possibility of replacement.
return king() == rhs.king() && piece0() == rhs.piece0() && piece1() == rhs.piece1();
}
bool operator!=(const KKPP& rhs) { return !(*this == rhs); }
private:
int king_;
PieceSquare piece0_, piece1_;
// Triangularize the square array part of [fe_end][fe_end] of kppp[king_sq][fe_end][fe_end].
uint64_t triangle_fe_end = 0;
};
// Output for debugging.
static std::ostream& operator<<(std::ostream& os, KKPP rhs)
{
os << "KKPP(" << rhs.king() << "," << rhs.piece0() << "," << rhs.piece1() << ")";
return os;
}
} }
#endif // defined (EVAL_LEARN) #endif // defined (EVAL_LEARN)
+1 -1
View File
@@ -130,7 +130,7 @@ Entry* probe(const Position& pos) {
Value npm_w = pos.non_pawn_material(WHITE); Value npm_w = pos.non_pawn_material(WHITE);
Value npm_b = pos.non_pawn_material(BLACK); Value npm_b = pos.non_pawn_material(BLACK);
Value npm = Utility::clamp(npm_w + npm_b, EndgameLimit, MidgameLimit); Value npm = std::clamp(npm_w + npm_b, EndgameLimit, MidgameLimit);
// Map total non-pawn material into [PHASE_ENDGAME, PHASE_MIDGAME] // Map total non-pawn material into [PHASE_ENDGAME, PHASE_MIDGAME]
e->gamePhase = Phase(((npm - EndgameLimit) * PHASE_MIDGAME) / (MidgameLimit - EndgameLimit)); e->gamePhase = Phase(((npm - EndgameLimit) * PHASE_MIDGAME) / (MidgameLimit - EndgameLimit));
+33 -15
View File
@@ -51,6 +51,11 @@ typedef bool(*fun3_t)(HANDLE, CONST GROUP_AFFINITY*, PGROUP_AFFINITY);
#include <sys/mman.h> #include <sys/mman.h>
#endif #endif
#if defined(__APPLE__) || defined(__ANDROID__) || defined(__OpenBSD__) || (defined(__GLIBCXX__) && !defined(_GLIBCXX_HAVE_ALIGNED_ALLOC) && !defined(_WIN32))
#define POSIXALIGNEDALLOC
#include <stdlib.h>
#endif
#include "misc.h" #include "misc.h"
#include "thread.h" #include "thread.h"
@@ -214,26 +219,33 @@ const std::string compiler_info() {
compiler += "\nCompilation settings include: "; compiler += "\nCompilation settings include: ";
compiler += (Is64Bit ? " 64bit" : " 32bit"); compiler += (Is64Bit ? " 64bit" : " 32bit");
#if defined(USE_VNNI)
compiler += " VNNI";
#endif
#if defined(USE_AVX512) #if defined(USE_AVX512)
compiler += " AVX512"; compiler += " AVX512";
#endif #endif
compiler += (HasPext ? " BMI2" : "");
#if defined(USE_AVX2) #if defined(USE_AVX2)
compiler += " AVX2"; compiler += " AVX2";
#endif #endif
#if defined(USE_SSE42)
compiler += " SSE42";
#endif
#if defined(USE_SSE41) #if defined(USE_SSE41)
compiler += " SSE41"; compiler += " SSE41";
#endif #endif
#if defined(USE_SSSE3) #if defined(USE_SSSE3)
compiler += " SSSE3"; compiler += " SSSE3";
#endif #endif
#if defined(USE_SSE3) #if defined(USE_SSE2)
compiler += " SSE3"; compiler += " SSE2";
#endif #endif
compiler += (HasPext ? " BMI2" : "");
compiler += (HasPopCnt ? " POPCNT" : ""); compiler += (HasPopCnt ? " POPCNT" : "");
#if defined(USE_MMX)
compiler += " MMX";
#endif
#if defined(USE_NEON)
compiler += " NEON";
#endif
#if !defined(NDEBUG) #if !defined(NDEBUG)
compiler += " DEBUG"; compiler += " DEBUG";
#endif #endif
@@ -316,14 +328,17 @@ void prefetch(void* addr) {
#endif #endif
/// Wrappers for systems where the c++17 implementation doesn't guarantee the availability of aligned_alloc.
/// Memory allocated with std_aligned_alloc must be freed with std_aligned_free. /// std_aligned_alloc() is our wrapper for systems where the c++17 implementation
/// /// does not guarantee the availability of aligned_alloc(). Memory allocated with
/// std_aligned_alloc() must be freed with std_aligned_free().
void* std_aligned_alloc(size_t alignment, size_t size) { void* std_aligned_alloc(size_t alignment, size_t size) {
#if (defined(__APPLE__) && defined(_LIBCPP_HAS_C11_FEATURES)) || defined(__ANDROID__) || defined(__OpenBSD__) || (defined(__GLIBCXX__) && !defined(_GLIBCXX_HAVE_ALIGNED_ALLOC) && !defined(_WIN32))
return aligned_alloc(alignment, size); #if defined(POSIXALIGNEDALLOC)
#elif (defined(_WIN32) || (defined(__APPLE__) && !defined(_LIBCPP_HAS_C11_FEATURES))) void *mem;
return posix_memalign(&mem, alignment, size) ? nullptr : mem;
#elif defined(_WIN32)
return _mm_malloc(size, alignment); return _mm_malloc(size, alignment);
#else #else
return std::aligned_alloc(alignment, size); return std::aligned_alloc(alignment, size);
@@ -331,16 +346,17 @@ void* std_aligned_alloc(size_t alignment, size_t size) {
} }
void std_aligned_free(void* ptr) { void std_aligned_free(void* ptr) {
#if (defined(__APPLE__) && defined(_LIBCPP_HAS_C11_FEATURES)) || defined(__ANDROID__) || defined(__OpenBSD__) || (defined(__GLIBCXX__) && !defined(_GLIBCXX_HAVE_ALIGNED_ALLOC) && !defined(_WIN32))
#if defined(POSIXALIGNEDALLOC)
free(ptr); free(ptr);
#elif (defined(_WIN32) || (defined(__APPLE__) && !defined(_LIBCPP_HAS_C11_FEATURES))) #elif defined(_WIN32)
_mm_free(ptr); _mm_free(ptr);
#else #else
free(ptr); free(ptr);
#endif #endif
} }
/// aligned_ttmem_alloc() will return suitably aligned memory, and if possible use large pages. /// aligned_ttmem_alloc() will return suitably aligned memory, if possible using large pages.
/// The returned pointer is the aligned one, while the mem argument is the one that needs /// The returned pointer is the aligned one, while the mem argument is the one that needs
/// to be passed to free. With c++17 some of this functionality could be simplified. /// to be passed to free. With c++17 some of this functionality could be simplified.
@@ -352,7 +368,9 @@ void* aligned_ttmem_alloc(size_t allocSize, void*& mem) {
size_t size = ((allocSize + alignment - 1) / alignment) * alignment; // multiple of alignment size_t size = ((allocSize + alignment - 1) / alignment) * alignment; // multiple of alignment
if (posix_memalign(&mem, alignment, size)) if (posix_memalign(&mem, alignment, size))
mem = nullptr; mem = nullptr;
#if defined(MADV_HUGEPAGE)
madvise(mem, allocSize, MADV_HUGEPAGE); madvise(mem, allocSize, MADV_HUGEPAGE);
#endif
return mem; return mem;
} }
-8
View File
@@ -67,14 +67,6 @@ std::ostream& operator<<(std::ostream&, SyncCout);
#define sync_cout std::cout << IO_LOCK #define sync_cout std::cout << IO_LOCK
#define sync_endl std::endl << IO_UNLOCK #define sync_endl std::endl << IO_UNLOCK
namespace Utility {
/// Clamp a value between lo and hi. Available in c++17.
template<class T> constexpr const T& clamp(const T& v, const T& lo, const T& hi) {
return v < lo ? lo : v > hi ? hi : v;
}
}
/// xorshift64star Pseudo-Random Number Generator /// xorshift64star Pseudo-Random Number Generator
/// This class is based on original code written and dedicated /// This class is based on original code written and dedicated
+7 -7
View File
@@ -182,7 +182,7 @@ top:
--endMoves; --endMoves;
++stage; ++stage;
/* fallthrough */ [[fallthrough]];
case REFUTATION: case REFUTATION:
if (select<Next>([&](){ return *cur != MOVE_NONE if (select<Next>([&](){ return *cur != MOVE_NONE
@@ -190,7 +190,7 @@ top:
&& pos.pseudo_legal(*cur); })) && pos.pseudo_legal(*cur); }))
return *(cur - 1); return *(cur - 1);
++stage; ++stage;
/* fallthrough */ [[fallthrough]];
case QUIET_INIT: case QUIET_INIT:
if (!skipQuiets) if (!skipQuiets)
@@ -203,7 +203,7 @@ top:
} }
++stage; ++stage;
/* fallthrough */ [[fallthrough]];
case QUIET: case QUIET:
if ( !skipQuiets if ( !skipQuiets
@@ -217,7 +217,7 @@ top:
endMoves = endBadCaptures; endMoves = endBadCaptures;
++stage; ++stage;
/* fallthrough */ [[fallthrough]];
case BAD_CAPTURE: case BAD_CAPTURE:
return select<Next>([](){ return true; }); return select<Next>([](){ return true; });
@@ -228,7 +228,7 @@ top:
score<EVASIONS>(); score<EVASIONS>();
++stage; ++stage;
/* fallthrough */ [[fallthrough]];
case EVASION: case EVASION:
return select<Best>([](){ return true; }); return select<Best>([](){ return true; });
@@ -246,14 +246,14 @@ top:
return MOVE_NONE; return MOVE_NONE;
++stage; ++stage;
/* fallthrough */ [[fallthrough]];
case QCHECK_INIT: case QCHECK_INIT:
cur = moves; cur = moves;
endMoves = generate<QUIET_CHECKS>(pos, cur); endMoves = generate<QUIET_CHECKS>(pos, cur);
++stage; ++stage;
/* fallthrough */ [[fallthrough]];
case QCHECK: case QCHECK:
return select<Next>([](){ return true; }); return select<Next>([](){ return true; });
+3 -3
View File
@@ -86,9 +86,9 @@ enum StatsType { NoCaptures, Captures };
/// the move's from and to squares, see www.chessprogramming.org/Butterfly_Boards /// the move's from and to squares, see www.chessprogramming.org/Butterfly_Boards
typedef Stats<int16_t, 10692, COLOR_NB, int(SQUARE_NB) * int(SQUARE_NB)> ButterflyHistory; typedef Stats<int16_t, 10692, COLOR_NB, int(SQUARE_NB) * int(SQUARE_NB)> ButterflyHistory;
/// At higher depths LowPlyHistory records successful quiet moves near the root and quiet /// At higher depths LowPlyHistory records successful quiet moves near the root
/// moves which are/were in the PV (ttPv) /// and quiet moves which are/were in the PV (ttPv). It is cleared with each new
/// It is cleared with each new search and filled during iterative deepening /// search and filled during iterative deepening.
constexpr int MAX_LPH = 4; constexpr int MAX_LPH = 4;
typedef Stats<int16_t, 10692, MAX_LPH, int(SQUARE_NB) * int(SQUARE_NB)> LowPlyHistory; typedef Stats<int16_t, 10692, MAX_LPH, int(SQUARE_NB) * int(SQUARE_NB)> LowPlyHistory;
+11 -15
View File
@@ -29,7 +29,9 @@
#include "evaluate_nnue.h" #include "evaluate_nnue.h"
ExtPieceSquare kpp_board_index[PIECE_NB] = { namespace Eval::NNUE {
uint32_t kpp_board_index[PIECE_NB][COLOR_NB] = {
// convention: W - us, B - them // convention: W - us, B - them
// viewed from other side, W and B are reversed // viewed from other side, W and B are reversed
{ PS_NONE, PS_NONE }, { PS_NONE, PS_NONE },
@@ -50,9 +52,6 @@ ExtPieceSquare kpp_board_index[PIECE_NB] = {
{ PS_NONE, PS_NONE } { PS_NONE, PS_NONE }
}; };
namespace Eval::NNUE {
// Input feature converter // Input feature converter
AlignedPtr<FeatureTransformer> feature_transformer; AlignedPtr<FeatureTransformer> feature_transformer;
@@ -86,7 +85,7 @@ namespace Eval::NNUE {
bool ReadParameters(std::istream& stream, const AlignedPtr<T>& pointer) { bool ReadParameters(std::istream& stream, const AlignedPtr<T>& pointer) {
std::uint32_t header; std::uint32_t header;
stream.read(reinterpret_cast<char*>(&header), sizeof(header)); header = read_little_endian<std::uint32_t>(stream);
if (!stream || header != T::GetHashValue()) return false; if (!stream || header != T::GetHashValue()) return false;
return pointer->ReadParameters(stream); return pointer->ReadParameters(stream);
} }
@@ -109,13 +108,13 @@ namespace Eval::NNUE {
} }
// Read network header // Read network header
bool ReadHeader(std::istream& stream, bool ReadHeader(std::istream& stream, std::uint32_t* hash_value, std::string* architecture)
std::uint32_t* hash_value, std::string* architecture) { {
std::uint32_t version, size; std::uint32_t version, size;
stream.read(reinterpret_cast<char*>(&version), sizeof(version));
stream.read(reinterpret_cast<char*>(hash_value), sizeof(*hash_value)); version = read_little_endian<std::uint32_t>(stream);
stream.read(reinterpret_cast<char*>(&size), sizeof(size)); *hash_value = read_little_endian<std::uint32_t>(stream);
size = read_little_endian<std::uint32_t>(stream);
if (!stream || version != kVersion) return false; if (!stream || version != kVersion) return false;
architecture->resize(size); architecture->resize(size);
stream.read(&(*architecture)[0], size); stream.read(&(*architecture)[0], size);
@@ -202,10 +201,7 @@ namespace Eval::NNUE {
// Evaluation function. Perform differential calculation. // Evaluation function. Perform differential calculation.
Value evaluate(const Position& pos) { Value evaluate(const Position& pos) {
Value v = ComputeScore(pos, false); return ComputeScore(pos, false);
v = Utility::clamp(v, VALUE_TB_LOSS_IN_MAX_PLY + 1, VALUE_TB_WIN_IN_MAX_PLY - 1);
return v;
} }
// Evaluation function. Perform full calculation. // Evaluation function. Perform full calculation.
+1 -2
View File
@@ -106,8 +106,7 @@ namespace Eval::NNUE::Features {
reset[perspective] = false; reset[perspective] = false;
switch (trigger) { switch (trigger) {
case TriggerEvent::kFriendKingMoved: case TriggerEvent::kFriendKingMoved:
reset[perspective] = reset[perspective] = dp.piece[0] == make_piece(perspective, KING);
dp.pieceId[0] == PIECE_ID_KING + perspective;
break; break;
default: default:
assert(false); assert(false);
+19 -39
View File
@@ -23,25 +23,17 @@
namespace Eval::NNUE::Features { namespace Eval::NNUE::Features {
// Find the index of the feature quantity from the king position and PieceSquare // Orient a square according to perspective (rotates by 180 for black)
template <Side AssociatedKing> inline Square orient(Color perspective, Square s) {
inline IndexType HalfKP<AssociatedKing>::MakeIndex(Square sq_k, PieceSquare p) { return Square(int(s) ^ (bool(perspective) * 63));
return static_cast<IndexType>(PS_END) * static_cast<IndexType>(sq_k) + p;
} }
// Get pieces information // Find the index of the feature quantity from the king position and PieceSquare
template <Side AssociatedKing> template <Side AssociatedKing>
inline void HalfKP<AssociatedKing>::GetPieces( inline IndexType HalfKP<AssociatedKing>::MakeIndex(
const Position& pos, Color perspective, Color perspective, Square s, Piece pc, Square ksq) {
PieceSquare** pieces, Square* sq_target_k) {
*pieces = (perspective == BLACK) ? return IndexType(orient(perspective, s) + kpp_board_index[pc][perspective] + PS_END * ksq);
pos.eval_list()->piece_list_fb() :
pos.eval_list()->piece_list_fw();
const PieceId target = (AssociatedKing == Side::kFriend) ?
static_cast<PieceId>(PIECE_ID_KING + perspective) :
static_cast<PieceId>(PIECE_ID_KING + ~perspective);
*sq_target_k = static_cast<Square>(((*pieces)[target] - PS_W_KING) % SQUARE_NB);
} }
// Get a list of indices for active features // Get a list of indices for active features
@@ -49,16 +41,11 @@ namespace Eval::NNUE::Features {
void HalfKP<AssociatedKing>::AppendActiveIndices( void HalfKP<AssociatedKing>::AppendActiveIndices(
const Position& pos, Color perspective, IndexList* active) { const Position& pos, Color perspective, IndexList* active) {
// Do nothing if array size is small to avoid compiler warning Square ksq = orient(perspective, pos.square<KING>(perspective));
if (RawFeatures::kMaxActiveDimensions < kMaxActiveDimensions) return; Bitboard bb = pos.pieces() & ~pos.pieces(KING);
while (bb) {
PieceSquare* pieces; Square s = pop_lsb(&bb);
Square sq_target_k; active->push_back(MakeIndex(perspective, s, pos.piece_on(s), ksq));
GetPieces(pos, perspective, &pieces, &sq_target_k);
for (PieceId i = PIECE_ID_ZERO; i < PIECE_ID_KING; ++i) {
if (pieces[i] != PS_NONE) {
active->push_back(MakeIndex(sq_target_k, pieces[i]));
}
} }
} }
@@ -68,22 +55,15 @@ namespace Eval::NNUE::Features {
const Position& pos, Color perspective, const Position& pos, Color perspective,
IndexList* removed, IndexList* added) { IndexList* removed, IndexList* added) {
PieceSquare* pieces; Square ksq = orient(perspective, pos.square<KING>(perspective));
Square sq_target_k;
GetPieces(pos, perspective, &pieces, &sq_target_k);
const auto& dp = pos.state()->dirtyPiece; const auto& dp = pos.state()->dirtyPiece;
for (int i = 0; i < dp.dirty_num; ++i) { for (int i = 0; i < dp.dirty_num; ++i) {
if (dp.pieceId[i] >= PIECE_ID_KING) continue; Piece pc = dp.piece[i];
const auto old_p = static_cast<PieceSquare>( if (type_of(pc) == KING) continue;
dp.old_piece[i].from[perspective]); if (dp.from[i] != SQ_NONE)
if (old_p != PS_NONE) { removed->push_back(MakeIndex(perspective, dp.from[i], pc, ksq));
removed->push_back(MakeIndex(sq_target_k, old_p)); if (dp.to[i] != SQ_NONE)
} added->push_back(MakeIndex(perspective, dp.to[i], pc, ksq));
const auto new_p = static_cast<PieceSquare>(
dp.new_piece[i].from[perspective]);
if (new_p != PS_NONE) {
added->push_back(MakeIndex(sq_target_k, new_p));
}
} }
} }
+3 -7
View File
@@ -41,7 +41,7 @@ namespace Eval::NNUE::Features {
static constexpr IndexType kDimensions = static constexpr IndexType kDimensions =
static_cast<IndexType>(SQUARE_NB) * static_cast<IndexType>(PS_END); static_cast<IndexType>(SQUARE_NB) * static_cast<IndexType>(PS_END);
// Maximum number of simultaneously active features // Maximum number of simultaneously active features
static constexpr IndexType kMaxActiveDimensions = PIECE_ID_KING; static constexpr IndexType kMaxActiveDimensions = 30; // Kings don't count
// Trigger for full calculation instead of difference calculation // Trigger for full calculation instead of difference calculation
static constexpr TriggerEvent kRefreshTrigger = TriggerEvent::kFriendKingMoved; static constexpr TriggerEvent kRefreshTrigger = TriggerEvent::kFriendKingMoved;
@@ -53,13 +53,9 @@ namespace Eval::NNUE::Features {
static void AppendChangedIndices(const Position& pos, Color perspective, static void AppendChangedIndices(const Position& pos, Color perspective,
IndexList* removed, IndexList* added); IndexList* removed, IndexList* added);
// Index of a feature for a given king position and another piece on some square
static IndexType MakeIndex(Square sq_k, PieceSquare p);
private: private:
// Get pieces information // Index of a feature for a given king position and another piece on some square
static void GetPieces(const Position& pos, Color perspective, static IndexType MakeIndex(Color perspective, Square s, Piece pc, Square sq_k);
PieceSquare** pieces, Square* sq_target_k);
}; };
} // namespace Eval::NNUE::Features } // namespace Eval::NNUE::Features
+28 -47
View File
@@ -11,49 +11,41 @@ namespace NNUE {
namespace Features { namespace Features {
// Orient a square according to perspective (rotates by 180 for black)
inline Square orient(Color perspective, Square s) {
return Square(int(s) ^ (bool(perspective) * 63));
}
// Find the index of the feature quantity from the ball position and PieceSquare // Find the index of the feature quantity from the ball position and PieceSquare
template <Side AssociatedKing> template <Side AssociatedKing>
inline IndexType HalfRelativeKP<AssociatedKing>::MakeIndex( inline IndexType HalfRelativeKP<AssociatedKing>::MakeIndex(
Square sq_k, PieceSquare p) { Color perspective, Square s, Piece pc, Square sq_k) {
const IndexType p = IndexType(orient(perspective, s) + kpp_board_index[pc][perspective]);
return MakeIndex(sq_k, p);
}
// Find the index of the feature quantity from the ball position and PieceSquare
template <Side AssociatedKing>
inline IndexType HalfRelativeKP<AssociatedKing>::MakeIndex(
Square sq_k, IndexType p) {
constexpr IndexType W = kBoardWidth; constexpr IndexType W = kBoardWidth;
constexpr IndexType H = kBoardHeight; constexpr IndexType H = kBoardHeight;
const IndexType piece_index = (p - PieceSquare::PS_W_PAWN) / SQUARE_NB; const IndexType piece_index = (p - PS_W_PAWN) / SQUARE_NB;
const Square sq_p = static_cast<Square>((p - PieceSquare::PS_W_PAWN) % SQUARE_NB); const Square sq_p = static_cast<Square>((p - PS_W_PAWN) % SQUARE_NB);
const IndexType relative_file = file_of(sq_p) - file_of(sq_k) + (W / 2); const IndexType relative_file = file_of(sq_p) - file_of(sq_k) + (W / 2);
const IndexType relative_rank = rank_of(sq_p) - rank_of(sq_k) + (H / 2); const IndexType relative_rank = rank_of(sq_p) - rank_of(sq_k) + (H / 2);
return H * W * piece_index + H * relative_file + relative_rank; return H * W * piece_index + H * relative_file + relative_rank;
} }
// Get the piece information
template <Side AssociatedKing>
inline void HalfRelativeKP<AssociatedKing>::GetPieces(
const Position& pos, Color perspective,
PieceSquare** pieces, Square* sq_target_k) {
*pieces = (perspective == BLACK) ?
pos.eval_list()->piece_list_fb() :
pos.eval_list()->piece_list_fw();
const PieceId target = (AssociatedKing == Side::kFriend) ?
static_cast<PieceId>(PieceId::PIECE_ID_KING + perspective) :
static_cast<PieceId>(PieceId::PIECE_ID_KING + ~perspective);
*sq_target_k = static_cast<Square>(((*pieces)[target] - PieceSquare::PS_W_KING) % SQUARE_NB);
}
// Get a list of indices with a value of 1 among the features // Get a list of indices with a value of 1 among the features
template <Side AssociatedKing> template <Side AssociatedKing>
void HalfRelativeKP<AssociatedKing>::AppendActiveIndices( void HalfRelativeKP<AssociatedKing>::AppendActiveIndices(
const Position& pos, Color perspective, IndexList* active) { const Position& pos, Color perspective, IndexList* active) {
// do nothing if array size is small to avoid compiler warning Square ksq = orient(perspective, pos.square<KING>(perspective));
if (RawFeatures::kMaxActiveDimensions < kMaxActiveDimensions) return; Bitboard bb = pos.pieces() & ~pos.pieces(KING);
while (bb) {
PieceSquare* pieces; Square s = pop_lsb(&bb);
Square sq_target_k; active->push_back(MakeIndex(perspective, s, pos.piece_on(s), ksq));
GetPieces(pos, perspective, &pieces, &sq_target_k);
for (PieceId i = PieceId::PIECE_ID_ZERO; i < PieceId::PIECE_ID_KING; ++i) {
if (pieces[i] >= PieceSquare::PS_W_PAWN) {
if (pieces[i] != PieceSquare::PS_NONE) {
active->push_back(MakeIndex(sq_target_k, pieces[i]));
}
}
} }
} }
@@ -62,26 +54,15 @@ template <Side AssociatedKing>
void HalfRelativeKP<AssociatedKing>::AppendChangedIndices( void HalfRelativeKP<AssociatedKing>::AppendChangedIndices(
const Position& pos, Color perspective, const Position& pos, Color perspective,
IndexList* removed, IndexList* added) { IndexList* removed, IndexList* added) {
PieceSquare* pieces; Square ksq = orient(perspective, pos.square<KING>(perspective));
Square sq_target_k;
GetPieces(pos, perspective, &pieces, &sq_target_k);
const auto& dp = pos.state()->dirtyPiece; const auto& dp = pos.state()->dirtyPiece;
for (int i = 0; i < dp.dirty_num; ++i) { for (int i = 0; i < dp.dirty_num; ++i) {
if (dp.pieceId[i] >= PieceId::PIECE_ID_KING) continue; Piece pc = dp.piece[i];
const auto old_p = static_cast<PieceSquare>( if (type_of(pc) == KING) continue;
dp.old_piece[i].from[perspective]); if (dp.from[i] != SQ_NONE)
if (old_p >= PieceSquare::PS_W_PAWN) { removed->push_back(MakeIndex(perspective, dp.from[i], pc, ksq));
if (old_p != PieceSquare::PS_NONE) { if (dp.to[i] != SQ_NONE)
removed->push_back(MakeIndex(sq_target_k, old_p)); added->push_back(MakeIndex(perspective, dp.to[i], pc, ksq));
}
}
const auto new_p = static_cast<PieceSquare>(
dp.new_piece[i].from[perspective]);
if (new_p >= PieceSquare::PS_W_PAWN) {
if (new_p != PieceSquare::PS_NONE) {
added->push_back(MakeIndex(sq_target_k, new_p));
}
}
} }
} }
+5 -8
View File
@@ -25,7 +25,7 @@ class HalfRelativeKP {
static constexpr std::uint32_t kHashValue = static constexpr std::uint32_t kHashValue =
0xF9180919u ^ (AssociatedKing == Side::kFriend); 0xF9180919u ^ (AssociatedKing == Side::kFriend);
// Piece type excluding balls // Piece type excluding balls
static constexpr IndexType kNumPieceKinds = (PieceSquare::PS_END - PieceSquare::PS_W_PAWN) / SQUARE_NB; static constexpr IndexType kNumPieceKinds = 5 * 2;
// width of the virtual board with the ball in the center // width of the virtual board with the ball in the center
static constexpr IndexType kBoardWidth = FILE_NB * 2 - 1; static constexpr IndexType kBoardWidth = FILE_NB * 2 - 1;
// height of a virtual board with balls in the center // height of a virtual board with balls in the center
@@ -34,7 +34,7 @@ class HalfRelativeKP {
static constexpr IndexType kDimensions = static constexpr IndexType kDimensions =
kNumPieceKinds * kBoardHeight * kBoardWidth; kNumPieceKinds * kBoardHeight * kBoardWidth;
// The maximum value of the number of indexes whose value is 1 at the same time among the feature values // The maximum value of the number of indexes whose value is 1 at the same time among the feature values
static constexpr IndexType kMaxActiveDimensions = PieceId::PIECE_ID_KING; static constexpr IndexType kMaxActiveDimensions = 30; // Kings don't count
// Timing of full calculation instead of difference calculation // Timing of full calculation instead of difference calculation
static constexpr TriggerEvent kRefreshTrigger = static constexpr TriggerEvent kRefreshTrigger =
(AssociatedKing == Side::kFriend) ? (AssociatedKing == Side::kFriend) ?
@@ -49,12 +49,9 @@ class HalfRelativeKP {
IndexList* removed, IndexList* added); IndexList* removed, IndexList* added);
// Find the index of the feature quantity from the ball position and PieceSquare // Find the index of the feature quantity from the ball position and PieceSquare
static IndexType MakeIndex(Square sq_k, PieceSquare p); static IndexType MakeIndex(Square s, IndexType p);
// Find the index of the feature quantity from the ball position and PieceSquare
private: static IndexType MakeIndex(Color perspective, Square s, Piece pc, Square sq_k);
// Get the piece information
static void GetPieces(const Position& pos, Color perspective,
PieceSquare** pieces, Square* sq_target_k);
}; };
} // namespace Features } // namespace Features
+24 -15
View File
@@ -11,19 +11,21 @@ namespace NNUE {
namespace Features { namespace Features {
// Orient a square according to perspective (rotates by 180 for black)
inline Square orient(Color perspective, Square s) {
return Square(int(s) ^ (bool(perspective) * 63));
}
// Index of a feature for a given king position.
IndexType K::MakeIndex(Color perspective, Square s, Color king_color) {
return IndexType(orient(perspective, s) + bool(perspective ^ king_color) * 64);
}
// Get a list of indices with a value of 1 among the features // Get a list of indices with a value of 1 among the features
void K::AppendActiveIndices( void K::AppendActiveIndices(
const Position& pos, Color perspective, IndexList* active) { const Position& pos, Color perspective, IndexList* active) {
// do nothing if array size is small to avoid compiler warning for (auto color : Colors) {
if (RawFeatures::kMaxActiveDimensions < kMaxActiveDimensions) return; active->push_back(MakeIndex(perspective, pos.square<KING>(color), color));
const PieceSquare* pieces = (perspective == BLACK) ?
pos.eval_list()->piece_list_fb() :
pos.eval_list()->piece_list_fw();
assert(pieces[PieceId::PIECE_ID_BKING] != PieceSquare::PS_NONE);
assert(pieces[PieceId::PIECE_ID_WKING] != PieceSquare::PS_NONE);
for (PieceId i = PieceId::PIECE_ID_KING; i < PieceId::PIECE_ID_NONE; ++i) {
active->push_back(pieces[i] - PieceSquare::PS_END);
} }
} }
@@ -32,12 +34,19 @@ void K::AppendChangedIndices(
const Position& pos, Color perspective, const Position& pos, Color perspective,
IndexList* removed, IndexList* added) { IndexList* removed, IndexList* added) {
const auto& dp = pos.state()->dirtyPiece; const auto& dp = pos.state()->dirtyPiece;
if (dp.pieceId[0] >= PieceId::PIECE_ID_KING) { Color king_color;
removed->push_back( if (dp.piece[0] == Piece::W_KING) {
dp.old_piece[0].from[perspective] - PieceSquare::PS_END); king_color = WHITE;
added->push_back(
dp.new_piece[0].from[perspective] - PieceSquare::PS_END);
} }
else if (dp.piece[0] == Piece::B_KING) {
king_color = BLACK;
}
else {
return;
}
removed->push_back(MakeIndex(perspective, dp.from[0], king_color));
added->push_back(MakeIndex(perspective, dp.to[0], king_color));
} }
} // namespace Features } // namespace Features
+4
View File
@@ -35,6 +35,10 @@ class K {
// Get a list of indices whose values have changed from the previous one in the feature quantity // Get a list of indices whose values have changed from the previous one in the feature quantity
static void AppendChangedIndices(const Position& pos, Color perspective, static void AppendChangedIndices(const Position& pos, Color perspective,
IndexList* removed, IndexList* added); IndexList* removed, IndexList* added);
private:
// Index of a feature for a given king position.
static IndexType MakeIndex(Color perspective, Square s, Color king_color);
}; };
} // namespace Features } // namespace Features
+21 -17
View File
@@ -11,19 +11,24 @@ namespace NNUE {
namespace Features { namespace Features {
// Orient a square according to perspective (rotates by 180 for black)
inline Square orient(Color perspective, Square s) {
return Square(int(s) ^ (bool(perspective) * 63));
}
// Find the index of the feature quantity from the king position and PieceSquare
inline IndexType P::MakeIndex(
Color perspective, Square s, Piece pc) {
return IndexType(orient(perspective, s) + kpp_board_index[pc][perspective]);
}
// Get a list of indices with a value of 1 among the features // Get a list of indices with a value of 1 among the features
void P::AppendActiveIndices( void P::AppendActiveIndices(
const Position& pos, Color perspective, IndexList* active) { const Position& pos, Color perspective, IndexList* active) {
// do nothing if array size is small to avoid compiler warning Bitboard bb = pos.pieces() & ~pos.pieces(KING);
if (RawFeatures::kMaxActiveDimensions < kMaxActiveDimensions) return; while (bb) {
Square s = pop_lsb(&bb);
const PieceSquare* pieces = (perspective == BLACK) ? active->push_back(MakeIndex(perspective, s, pos.piece_on(s)));
pos.eval_list()->piece_list_fb() :
pos.eval_list()->piece_list_fw();
for (PieceId i = PieceId::PIECE_ID_ZERO; i < PieceId::PIECE_ID_KING; ++i) {
if (pieces[i] != PieceSquare::PS_NONE) {
active->push_back(pieces[i]);
}
} }
} }
@@ -33,13 +38,12 @@ void P::AppendChangedIndices(
IndexList* removed, IndexList* added) { IndexList* removed, IndexList* added) {
const auto& dp = pos.state()->dirtyPiece; const auto& dp = pos.state()->dirtyPiece;
for (int i = 0; i < dp.dirty_num; ++i) { for (int i = 0; i < dp.dirty_num; ++i) {
if (dp.pieceId[i] >= PieceId::PIECE_ID_KING) continue; Piece pc = dp.piece[i];
if (dp.old_piece[i].from[perspective] != PieceSquare::PS_NONE) { if (type_of(pc) == KING) continue;
removed->push_back(dp.old_piece[i].from[perspective]); if (dp.from[i] != SQ_NONE)
} removed->push_back(MakeIndex(perspective, dp.from[i], pc));
if (dp.new_piece[i].from[perspective] != PieceSquare::PS_NONE) { if (dp.to[i] != SQ_NONE)
added->push_back(dp.new_piece[i].from[perspective]); added->push_back(MakeIndex(perspective, dp.to[i], pc));
}
} }
} }
+6 -2
View File
@@ -22,9 +22,9 @@ class P {
// Hash value embedded in the evaluation function file // Hash value embedded in the evaluation function file
static constexpr std::uint32_t kHashValue = 0x764CFB4Bu; static constexpr std::uint32_t kHashValue = 0x764CFB4Bu;
// number of feature dimensions // number of feature dimensions
static constexpr IndexType kDimensions = PieceSquare::PS_END; static constexpr IndexType kDimensions = PS_END;
// The maximum value of the number of indexes whose value is 1 at the same time among the feature values // The maximum value of the number of indexes whose value is 1 at the same time among the feature values
static constexpr IndexType kMaxActiveDimensions = PieceId::PIECE_ID_KING; static constexpr IndexType kMaxActiveDimensions = 30; // Kings don't count
// Timing of full calculation instead of difference calculation // Timing of full calculation instead of difference calculation
static constexpr TriggerEvent kRefreshTrigger = TriggerEvent::kNone; static constexpr TriggerEvent kRefreshTrigger = TriggerEvent::kNone;
@@ -35,6 +35,10 @@ class P {
// Get a list of indices whose values have changed from the previous one in the feature quantity // Get a list of indices whose values have changed from the previous one in the feature quantity
static void AppendChangedIndices(const Position& pos, Color perspective, static void AppendChangedIndices(const Position& pos, Color perspective,
IndexList* removed, IndexList* added); IndexList* removed, IndexList* added);
private:
// Index of a feature for a given piece on some square
static IndexType MakeIndex(Color perspective, Square s, Piece pc);
}; };
} // namespace Features } // namespace Features
+106 -36
View File
@@ -70,11 +70,10 @@ namespace Eval::NNUE::Layers {
// Read network parameters // Read network parameters
bool ReadParameters(std::istream& stream) { bool ReadParameters(std::istream& stream) {
if (!previous_layer_.ReadParameters(stream)) return false; if (!previous_layer_.ReadParameters(stream)) return false;
stream.read(reinterpret_cast<char*>(biases_), for (std::size_t i = 0; i < kOutputDimensions; ++i)
kOutputDimensions * sizeof(BiasType)); biases_[i] = read_little_endian<BiasType>(stream);
stream.read(reinterpret_cast<char*>(weights_), for (std::size_t i = 0; i < kOutputDimensions * kPaddedInputDimensions; ++i)
kOutputDimensions * kPaddedInputDimensions * weights_[i] = read_little_endian<WeightType>(stream);
sizeof(WeightType));
return !stream.fail(); return !stream.fail();
} }
@@ -98,19 +97,32 @@ namespace Eval::NNUE::Layers {
#if defined(USE_AVX512) #if defined(USE_AVX512)
constexpr IndexType kNumChunks = kPaddedInputDimensions / (kSimdWidth * 2); constexpr IndexType kNumChunks = kPaddedInputDimensions / (kSimdWidth * 2);
const __m512i kOnes = _mm512_set1_epi16(1);
const auto input_vector = reinterpret_cast<const __m512i*>(input); const auto input_vector = reinterpret_cast<const __m512i*>(input);
#if !defined(USE_VNNI)
const __m512i kOnes = _mm512_set1_epi16(1);
#endif
#elif defined(USE_AVX2) #elif defined(USE_AVX2)
constexpr IndexType kNumChunks = kPaddedInputDimensions / kSimdWidth; constexpr IndexType kNumChunks = kPaddedInputDimensions / kSimdWidth;
const __m256i kOnes = _mm256_set1_epi16(1);
const auto input_vector = reinterpret_cast<const __m256i*>(input); const auto input_vector = reinterpret_cast<const __m256i*>(input);
#if !defined(USE_VNNI)
const __m256i kOnes = _mm256_set1_epi16(1);
#endif
#elif defined(USE_SSSE3) #elif defined(USE_SSE2)
constexpr IndexType kNumChunks = kPaddedInputDimensions / kSimdWidth; constexpr IndexType kNumChunks = kPaddedInputDimensions / kSimdWidth;
#ifndef USE_SSSE3
const __m128i kZeros = _mm_setzero_si128();
#else
const __m128i kOnes = _mm_set1_epi16(1); const __m128i kOnes = _mm_set1_epi16(1);
#endif
const auto input_vector = reinterpret_cast<const __m128i*>(input); const auto input_vector = reinterpret_cast<const __m128i*>(input);
#elif defined(USE_MMX)
constexpr IndexType kNumChunks = kPaddedInputDimensions / kSimdWidth;
const __m64 kZeros = _mm_setzero_si64();
const auto input_vector = reinterpret_cast<const __m64*>(input);
#elif defined(USE_NEON) #elif defined(USE_NEON)
constexpr IndexType kNumChunks = kPaddedInputDimensions / kSimdWidth; constexpr IndexType kNumChunks = kPaddedInputDimensions / kSimdWidth;
const auto input_vector = reinterpret_cast<const int8x8_t*>(input); const auto input_vector = reinterpret_cast<const int8x8_t*>(input);
@@ -123,60 +135,115 @@ namespace Eval::NNUE::Layers {
__m512i sum = _mm512_setzero_si512(); __m512i sum = _mm512_setzero_si512();
const auto row = reinterpret_cast<const __m512i*>(&weights_[offset]); const auto row = reinterpret_cast<const __m512i*>(&weights_[offset]);
for (IndexType j = 0; j < kNumChunks; ++j) { for (IndexType j = 0; j < kNumChunks; ++j) {
__m512i product = _mm512_maddubs_epi16( #if defined(USE_VNNI)
_mm512_load_si512(&input_vector[j]), _mm512_load_si512(&row[j])); sum = _mm512_dpbusd_epi32(sum, _mm512_loadA_si512(&input_vector[j]), _mm512_load_si512(&row[j]));
#else
__m512i product = _mm512_maddubs_epi16(_mm512_loadA_si512(&input_vector[j]), _mm512_load_si512(&row[j]));
product = _mm512_madd_epi16(product, kOnes); product = _mm512_madd_epi16(product, kOnes);
sum = _mm512_add_epi32(sum, product); sum = _mm512_add_epi32(sum, product);
#endif
} }
output[i] = _mm512_reduce_add_epi32(sum) + biases_[i];
// Note: Changing kMaxSimdWidth from 32 to 64 breaks loading existing networks. // Note: Changing kMaxSimdWidth from 32 to 64 breaks loading existing networks.
// As a result kPaddedInputDimensions may not be an even multiple of 64(512bit) // As a result kPaddedInputDimensions may not be an even multiple of 64(512bit)
// and we have to do one more 256bit chunk. // and we have to do one more 256bit chunk.
if (kPaddedInputDimensions != kNumChunks * kSimdWidth * 2) if (kPaddedInputDimensions != kNumChunks * kSimdWidth * 2)
{ {
const auto iv_256 = reinterpret_cast<const __m256i*>(input); const auto iv256 = reinterpret_cast<const __m256i*>(&input_vector[kNumChunks]);
const auto row_256 = reinterpret_cast<const __m256i*>(&weights_[offset]); const auto row256 = reinterpret_cast<const __m256i*>(&row[kNumChunks]);
int j = kNumChunks * 2; #if defined(USE_VNNI)
__m256i product256 = _mm256_dpbusd_epi32(
__m256i sum256 = _mm256_maddubs_epi16( _mm512_castsi512_si256(sum), _mm256_loadA_si256(&iv256[0]), _mm256_load_si256(&row256[0]));
_mm256_load_si256(&iv_256[j]), _mm256_load_si256(&row_256[j])); sum = _mm512_inserti32x8(sum, product256, 0);
sum256 = _mm256_madd_epi16(sum256, _mm256_set1_epi16(1)); #else
sum256 = _mm256_hadd_epi32(sum256, sum256); __m256i product256 = _mm256_maddubs_epi16(_mm256_loadA_si256(&iv256[0]), _mm256_load_si256(&row256[0]));
sum256 = _mm256_hadd_epi32(sum256, sum256); sum = _mm512_add_epi32(sum, _mm512_cvtepi16_epi32(product256));
const __m128i lo = _mm256_extracti128_si256(sum256, 0); #endif
const __m128i hi = _mm256_extracti128_si256(sum256, 1);
output[i] += _mm_cvtsi128_si32(lo) + _mm_cvtsi128_si32(hi);
} }
output[i] = _mm512_reduce_add_epi32(sum) + biases_[i];
#elif defined(USE_AVX2) #elif defined(USE_AVX2)
__m256i sum = _mm256_setzero_si256(); __m256i sum = _mm256_setzero_si256();
const auto row = reinterpret_cast<const __m256i*>(&weights_[offset]); const auto row = reinterpret_cast<const __m256i*>(&weights_[offset]);
for (IndexType j = 0; j < kNumChunks; ++j) { for (IndexType j = 0; j < kNumChunks; ++j) {
__m256i product = _mm256_maddubs_epi16( #if defined(USE_VNNI)
_mm256_load_si256(&input_vector[j]), _mm256_load_si256(&row[j])); sum = _mm256_dpbusd_epi32(sum, _mm256_loadA_si256(&input_vector[j]), _mm256_load_si256(&row[j]));
#else
__m256i product = _mm256_maddubs_epi16(_mm256_loadA_si256(&input_vector[j]), _mm256_load_si256(&row[j]));
product = _mm256_madd_epi16(product, kOnes); product = _mm256_madd_epi16(product, kOnes);
sum = _mm256_add_epi32(sum, product); sum = _mm256_add_epi32(sum, product);
#endif
} }
sum = _mm256_hadd_epi32(sum, sum); __m128i sum128 = _mm_add_epi32(_mm256_castsi256_si128(sum), _mm256_extracti128_si256(sum, 1));
sum = _mm256_hadd_epi32(sum, sum); sum128 = _mm_add_epi32(sum128, _mm_shuffle_epi32(sum128, _MM_PERM_BADC));
const __m128i lo = _mm256_extracti128_si256(sum, 0); sum128 = _mm_add_epi32(sum128, _mm_shuffle_epi32(sum128, _MM_PERM_CDAB));
const __m128i hi = _mm256_extracti128_si256(sum, 1); output[i] = _mm_cvtsi128_si32(sum128) + biases_[i];
output[i] = _mm_cvtsi128_si32(lo) + _mm_cvtsi128_si32(hi) + biases_[i];
#elif defined(USE_SSSE3) #elif defined(USE_SSSE3)
__m128i sum = _mm_cvtsi32_si128(biases_[i]); __m128i sum = _mm_setzero_si128();
const auto row = reinterpret_cast<const __m128i*>(&weights_[offset]); const auto row = reinterpret_cast<const __m128i*>(&weights_[offset]);
for (IndexType j = 0; j < kNumChunks; ++j) { for (int j = 0; j < (int)kNumChunks - 1; j += 2) {
__m128i product = _mm_maddubs_epi16( __m128i product0 = _mm_maddubs_epi16(_mm_load_si128(&input_vector[j]), _mm_load_si128(&row[j]));
_mm_load_si128(&input_vector[j]), _mm_load_si128(&row[j])); product0 = _mm_madd_epi16(product0, kOnes);
sum = _mm_add_epi32(sum, product0);
__m128i product1 = _mm_maddubs_epi16(_mm_load_si128(&input_vector[j+1]), _mm_load_si128(&row[j+1]));
product1 = _mm_madd_epi16(product1, kOnes);
sum = _mm_add_epi32(sum, product1);
}
if (kNumChunks & 0x1) {
__m128i product = _mm_maddubs_epi16(_mm_load_si128(&input_vector[kNumChunks-1]), _mm_load_si128(&row[kNumChunks-1]));
product = _mm_madd_epi16(product, kOnes); product = _mm_madd_epi16(product, kOnes);
sum = _mm_add_epi32(sum, product); sum = _mm_add_epi32(sum, product);
} }
sum = _mm_hadd_epi32(sum, sum); sum = _mm_add_epi32(sum, _mm_shuffle_epi32(sum, 0x4E)); //_MM_PERM_BADC
sum = _mm_hadd_epi32(sum, sum); sum = _mm_add_epi32(sum, _mm_shuffle_epi32(sum, 0xB1)); //_MM_PERM_CDAB
output[i] = _mm_cvtsi128_si32(sum) + biases_[i];
#elif defined(USE_SSE2)
__m128i sum_lo = _mm_cvtsi32_si128(biases_[i]);
__m128i sum_hi = kZeros;
const auto row = reinterpret_cast<const __m128i*>(&weights_[offset]);
for (IndexType j = 0; j < kNumChunks; ++j) {
__m128i row_j = _mm_load_si128(&row[j]);
__m128i input_j = _mm_load_si128(&input_vector[j]);
__m128i row_signs = _mm_cmpgt_epi8(kZeros, row_j);
__m128i extended_row_lo = _mm_unpacklo_epi8(row_j, row_signs);
__m128i extended_row_hi = _mm_unpackhi_epi8(row_j, row_signs);
__m128i extended_input_lo = _mm_unpacklo_epi8(input_j, kZeros);
__m128i extended_input_hi = _mm_unpackhi_epi8(input_j, kZeros);
__m128i product_lo = _mm_madd_epi16(extended_row_lo, extended_input_lo);
__m128i product_hi = _mm_madd_epi16(extended_row_hi, extended_input_hi);
sum_lo = _mm_add_epi32(sum_lo, product_lo);
sum_hi = _mm_add_epi32(sum_hi, product_hi);
}
__m128i sum = _mm_add_epi32(sum_lo, sum_hi);
__m128i sum_high_64 = _mm_shuffle_epi32(sum, _MM_SHUFFLE(1, 0, 3, 2));
sum = _mm_add_epi32(sum, sum_high_64);
__m128i sum_second_32 = _mm_shufflelo_epi16(sum, _MM_SHUFFLE(1, 0, 3, 2));
sum = _mm_add_epi32(sum, sum_second_32);
output[i] = _mm_cvtsi128_si32(sum); output[i] = _mm_cvtsi128_si32(sum);
#elif defined(USE_MMX)
__m64 sum_lo = _mm_cvtsi32_si64(biases_[i]);
__m64 sum_hi = kZeros;
const auto row = reinterpret_cast<const __m64*>(&weights_[offset]);
for (IndexType j = 0; j < kNumChunks; ++j) {
__m64 row_j = row[j];
__m64 input_j = input_vector[j];
__m64 row_signs = _mm_cmpgt_pi8(kZeros, row_j);
__m64 extended_row_lo = _mm_unpacklo_pi8(row_j, row_signs);
__m64 extended_row_hi = _mm_unpackhi_pi8(row_j, row_signs);
__m64 extended_input_lo = _mm_unpacklo_pi8(input_j, kZeros);
__m64 extended_input_hi = _mm_unpackhi_pi8(input_j, kZeros);
__m64 product_lo = _mm_madd_pi16(extended_row_lo, extended_input_lo);
__m64 product_hi = _mm_madd_pi16(extended_row_hi, extended_input_hi);
sum_lo = _mm_add_pi32(sum_lo, product_lo);
sum_hi = _mm_add_pi32(sum_hi, product_hi);
}
__m64 sum = _mm_add_pi32(sum_lo, sum_hi);
sum = _mm_add_pi32(sum, _mm_unpackhi_pi32(sum, sum));
output[i] = _mm_cvtsi64_si32(sum);
#elif defined(USE_NEON) #elif defined(USE_NEON)
int32x4_t sum = {biases_[i]}; int32x4_t sum = {biases_[i]};
const auto row = reinterpret_cast<const int8x8_t*>(&weights_[offset]); const auto row = reinterpret_cast<const int8x8_t*>(&weights_[offset]);
@@ -196,6 +263,9 @@ namespace Eval::NNUE::Layers {
#endif #endif
} }
#if defined(USE_MMX)
_mm_empty();
#endif
return output; return output;
} }
+24 -7
View File
@@ -86,18 +86,17 @@ namespace Eval::NNUE::Layers {
const auto out = reinterpret_cast<__m256i*>(output); const auto out = reinterpret_cast<__m256i*>(output);
for (IndexType i = 0; i < kNumChunks; ++i) { for (IndexType i = 0; i < kNumChunks; ++i) {
const __m256i words0 = _mm256_srai_epi16(_mm256_packs_epi32( const __m256i words0 = _mm256_srai_epi16(_mm256_packs_epi32(
_mm256_load_si256(&in[i * 4 + 0]), _mm256_loadA_si256(&in[i * 4 + 0]),
_mm256_load_si256(&in[i * 4 + 1])), kWeightScaleBits); _mm256_loadA_si256(&in[i * 4 + 1])), kWeightScaleBits);
const __m256i words1 = _mm256_srai_epi16(_mm256_packs_epi32( const __m256i words1 = _mm256_srai_epi16(_mm256_packs_epi32(
_mm256_load_si256(&in[i * 4 + 2]), _mm256_loadA_si256(&in[i * 4 + 2]),
_mm256_load_si256(&in[i * 4 + 3])), kWeightScaleBits); _mm256_loadA_si256(&in[i * 4 + 3])), kWeightScaleBits);
_mm256_store_si256( _mm256_storeA_si256(&out[i], _mm256_permutevar8x32_epi32(_mm256_max_epi8(
&out[i], _mm256_permutevar8x32_epi32(_mm256_max_epi8(
_mm256_packs_epi16(words0, words1), kZero), kOffsets)); _mm256_packs_epi16(words0, words1), kZero), kOffsets));
} }
constexpr IndexType kStart = kNumChunks * kSimdWidth; constexpr IndexType kStart = kNumChunks * kSimdWidth;
#elif defined(USE_SSSE3) #elif defined(USE_SSE2)
constexpr IndexType kNumChunks = kInputDimensions / kSimdWidth; constexpr IndexType kNumChunks = kInputDimensions / kSimdWidth;
#ifdef USE_SSE41 #ifdef USE_SSE41
@@ -128,6 +127,24 @@ namespace Eval::NNUE::Layers {
} }
constexpr IndexType kStart = kNumChunks * kSimdWidth; constexpr IndexType kStart = kNumChunks * kSimdWidth;
#elif defined(USE_MMX)
constexpr IndexType kNumChunks = kInputDimensions / kSimdWidth;
const __m64 k0x80s = _mm_set1_pi8(-128);
const auto in = reinterpret_cast<const __m64*>(input);
const auto out = reinterpret_cast<__m64*>(output);
for (IndexType i = 0; i < kNumChunks; ++i) {
const __m64 words0 = _mm_srai_pi16(
_mm_packs_pi32(in[i * 4 + 0], in[i * 4 + 1]),
kWeightScaleBits);
const __m64 words1 = _mm_srai_pi16(
_mm_packs_pi32(in[i * 4 + 2], in[i * 4 + 3]),
kWeightScaleBits);
const __m64 packedbytes = _mm_packs_pi16(words0, words1);
out[i] = _mm_subs_pi8(_mm_adds_pi8(packedbytes, k0x80s), k0x80s);
}
_mm_empty();
constexpr IndexType kStart = kNumChunks * kSimdWidth;
#elif defined(USE_NEON) #elif defined(USE_NEON)
constexpr IndexType kNumChunks = kInputDimensions / (kSimdWidth / 2); constexpr IndexType kNumChunks = kInputDimensions / (kSimdWidth / 2);
const int8x8_t kZero = {0}; const int8x8_t kZero = {0};
+1 -1
View File
@@ -26,7 +26,7 @@
namespace Eval::NNUE { namespace Eval::NNUE {
// Class that holds the result of affine transformation of input features // Class that holds the result of affine transformation of input features
struct alignas(32) Accumulator { struct alignas(kCacheLineSize) Accumulator {
std::int16_t std::int16_t
accumulation[2][kRefreshTriggers.size()][kTransformedFeatureDimensions]; accumulation[2][kRefreshTriggers.size()][kTransformedFeatureDimensions];
Value score; Value score;
+71
View File
@@ -21,6 +21,9 @@
#ifndef NNUE_COMMON_H_INCLUDED #ifndef NNUE_COMMON_H_INCLUDED
#define NNUE_COMMON_H_INCLUDED #define NNUE_COMMON_H_INCLUDED
#include <cstring>
#include <iostream>
#if defined(USE_AVX2) #if defined(USE_AVX2)
#include <immintrin.h> #include <immintrin.h>
@@ -33,10 +36,36 @@
#elif defined(USE_SSE2) #elif defined(USE_SSE2)
#include <emmintrin.h> #include <emmintrin.h>
#elif defined(USE_MMX)
#include <mmintrin.h>
#elif defined(USE_NEON) #elif defined(USE_NEON)
#include <arm_neon.h> #include <arm_neon.h>
#endif #endif
// HACK: Use _mm256_loadu_si256() instead of _mm256_load_si256. Otherwise a binary
// compiled with older g++ crashes because the output memory is not aligned
// even though alignas is specified.
#if defined(USE_AVX2)
#if defined(__GNUC__ ) && (__GNUC__ < 9) && defined(_WIN32) && !defined(__clang__)
#define _mm256_loadA_si256 _mm256_loadu_si256
#define _mm256_storeA_si256 _mm256_storeu_si256
#else
#define _mm256_loadA_si256 _mm256_load_si256
#define _mm256_storeA_si256 _mm256_store_si256
#endif
#endif
#if defined(USE_AVX512)
#if defined(__GNUC__ ) && (__GNUC__ < 9) && defined(_WIN32) && !defined(__clang__)
#define _mm512_loadA_si512 _mm512_loadu_si512
#define _mm512_storeA_si512 _mm512_storeu_si512
#else
#define _mm512_loadA_si512 _mm512_load_si512
#define _mm512_storeA_si512 _mm512_store_si512
#endif
#endif
namespace Eval::NNUE { namespace Eval::NNUE {
// Version of the evaluation file // Version of the evaluation file
@@ -56,12 +85,36 @@ namespace Eval::NNUE {
#elif defined(USE_SSE2) #elif defined(USE_SSE2)
constexpr std::size_t kSimdWidth = 16; constexpr std::size_t kSimdWidth = 16;
#elif defined(USE_MMX)
constexpr std::size_t kSimdWidth = 8;
#elif defined(USE_NEON) #elif defined(USE_NEON)
constexpr std::size_t kSimdWidth = 16; constexpr std::size_t kSimdWidth = 16;
#endif #endif
constexpr std::size_t kMaxSimdWidth = 32; constexpr std::size_t kMaxSimdWidth = 32;
// unique number for each piece type on each square
enum {
PS_NONE = 0,
PS_W_PAWN = 1,
PS_B_PAWN = 1 * SQUARE_NB + 1,
PS_W_KNIGHT = 2 * SQUARE_NB + 1,
PS_B_KNIGHT = 3 * SQUARE_NB + 1,
PS_W_BISHOP = 4 * SQUARE_NB + 1,
PS_B_BISHOP = 5 * SQUARE_NB + 1,
PS_W_ROOK = 6 * SQUARE_NB + 1,
PS_B_ROOK = 7 * SQUARE_NB + 1,
PS_W_QUEEN = 8 * SQUARE_NB + 1,
PS_B_QUEEN = 9 * SQUARE_NB + 1,
PS_W_KING = 10 * SQUARE_NB + 1,
PS_END = PS_W_KING, // pieces without kings (pawns included)
PS_B_KING = 11 * SQUARE_NB + 1,
PS_END2 = 12 * SQUARE_NB + 1
};
extern uint32_t kpp_board_index[PIECE_NB][COLOR_NB];
// Type of input feature after conversion // Type of input feature after conversion
using TransformedFeatureType = std::uint8_t; using TransformedFeatureType = std::uint8_t;
using IndexType = std::uint32_t; using IndexType = std::uint32_t;
@@ -76,6 +129,24 @@ namespace Eval::NNUE {
return (n + base - 1) / base * base; return (n + base - 1) / base * base;
} }
// read_little_endian() is our utility to read an integer (signed or unsigned, any size)
// from a stream in little-endian order. We swap the byte order after the read if
// necessary to return a result with the byte ordering of the compiling machine.
template <typename IntType>
inline IntType read_little_endian(std::istream& stream) {
IntType result;
std::uint8_t u[sizeof(IntType)];
typename std::make_unsigned<IntType>::type v = 0;
stream.read(reinterpret_cast<char*>(u), sizeof(IntType));
for (std::size_t i = 0; i < sizeof(IntType); ++i)
v = (v << 8) | u[sizeof(IntType) - i - 1];
std::memcpy(&result, &v, sizeof(IntType));
return result;
}
} // namespace Eval::NNUE } // namespace Eval::NNUE
#endif // #ifndef NNUE_COMMON_H_INCLUDED #endif // #ifndef NNUE_COMMON_H_INCLUDED
+73 -22
View File
@@ -62,10 +62,10 @@ namespace Eval::NNUE {
// Read network parameters // Read network parameters
bool ReadParameters(std::istream& stream) { bool ReadParameters(std::istream& stream) {
stream.read(reinterpret_cast<char*>(biases_), for (std::size_t i = 0; i < kHalfDimensions; ++i)
kHalfDimensions * sizeof(BiasType)); biases_[i] = read_little_endian<BiasType>(stream);
stream.read(reinterpret_cast<char*>(weights_), for (std::size_t i = 0; i < kHalfDimensions * kInputDimensions; ++i)
kHalfDimensions * kInputDimensions * sizeof(WeightType)); weights_[i] = read_little_endian<WeightType>(stream);
return !stream.fail(); return !stream.fail();
} }
@@ -104,7 +104,7 @@ namespace Eval::NNUE {
constexpr int kControl = 0b11011000; constexpr int kControl = 0b11011000;
const __m256i kZero = _mm256_setzero_si256(); const __m256i kZero = _mm256_setzero_si256();
#elif defined(USE_SSSE3) #elif defined(USE_SSE2)
constexpr IndexType kNumChunks = kHalfDimensions / kSimdWidth; constexpr IndexType kNumChunks = kHalfDimensions / kSimdWidth;
#ifdef USE_SSE41 #ifdef USE_SSE41
@@ -113,6 +113,10 @@ namespace Eval::NNUE {
const __m128i k0x80s = _mm_set1_epi8(-128); const __m128i k0x80s = _mm_set1_epi8(-128);
#endif #endif
#elif defined(USE_MMX)
constexpr IndexType kNumChunks = kHalfDimensions / kSimdWidth;
const __m64 k0x80s = _mm_set1_pi8(-128);
#elif defined(USE_NEON) #elif defined(USE_NEON)
constexpr IndexType kNumChunks = kHalfDimensions / (kSimdWidth / 2); constexpr IndexType kNumChunks = kHalfDimensions / (kSimdWidth / 2);
const int8x8_t kZero = {0}; const int8x8_t kZero = {0};
@@ -125,17 +129,15 @@ namespace Eval::NNUE {
#if defined(USE_AVX2) #if defined(USE_AVX2)
auto out = reinterpret_cast<__m256i*>(&output[offset]); auto out = reinterpret_cast<__m256i*>(&output[offset]);
for (IndexType j = 0; j < kNumChunks; ++j) { for (IndexType j = 0; j < kNumChunks; ++j) {
__m256i sum0 = __m256i sum0 = _mm256_loadA_si256(
_mm256_load_si256(&reinterpret_cast<const __m256i*>( &reinterpret_cast<const __m256i*>(accumulation[perspectives[p]][0])[j * 2 + 0]);
accumulation[perspectives[p]][0])[j * 2 + 0]); __m256i sum1 = _mm256_loadA_si256(
__m256i sum1 = &reinterpret_cast<const __m256i*>(accumulation[perspectives[p]][0])[j * 2 + 1]);
_mm256_load_si256(&reinterpret_cast<const __m256i*>( _mm256_storeA_si256(&out[j], _mm256_permute4x64_epi64(_mm256_max_epi8(
accumulation[perspectives[p]][0])[j * 2 + 1]);
_mm256_store_si256(&out[j], _mm256_permute4x64_epi64(_mm256_max_epi8(
_mm256_packs_epi16(sum0, sum1), kZero), kControl)); _mm256_packs_epi16(sum0, sum1), kZero), kControl));
} }
#elif defined(USE_SSSE3) #elif defined(USE_SSE2)
auto out = reinterpret_cast<__m128i*>(&output[offset]); auto out = reinterpret_cast<__m128i*>(&output[offset]);
for (IndexType j = 0; j < kNumChunks; ++j) { for (IndexType j = 0; j < kNumChunks; ++j) {
__m128i sum0 = _mm_load_si128(&reinterpret_cast<const __m128i*>( __m128i sum0 = _mm_load_si128(&reinterpret_cast<const __m128i*>(
@@ -155,6 +157,17 @@ namespace Eval::NNUE {
); );
} }
#elif defined(USE_MMX)
auto out = reinterpret_cast<__m64*>(&output[offset]);
for (IndexType j = 0; j < kNumChunks; ++j) {
__m64 sum0 = *(&reinterpret_cast<const __m64*>(
accumulation[perspectives[p]][0])[j * 2 + 0]);
__m64 sum1 = *(&reinterpret_cast<const __m64*>(
accumulation[perspectives[p]][0])[j * 2 + 1]);
const __m64 packedbytes = _mm_packs_pi16(sum0, sum1);
out[j] = _mm_subs_pi8(_mm_adds_pi8(packedbytes, k0x80s), k0x80s);
}
#elif defined(USE_NEON) #elif defined(USE_NEON)
const auto out = reinterpret_cast<int8x8_t*>(&output[offset]); const auto out = reinterpret_cast<int8x8_t*>(&output[offset]);
for (IndexType j = 0; j < kNumChunks; ++j) { for (IndexType j = 0; j < kNumChunks; ++j) {
@@ -172,6 +185,9 @@ namespace Eval::NNUE {
#endif #endif
} }
#if defined(USE_MMX)
_mm_empty();
#endif
} }
private: private:
@@ -187,23 +203,37 @@ namespace Eval::NNUE {
kHalfDimensions * sizeof(BiasType)); kHalfDimensions * sizeof(BiasType));
for (const auto index : active_indices[perspective]) { for (const auto index : active_indices[perspective]) {
const IndexType offset = kHalfDimensions * index; const IndexType offset = kHalfDimensions * index;
#if defined(USE_AVX512)
auto accumulation = reinterpret_cast<__m512i*>(
&accumulator.accumulation[perspective][i][0]);
auto column = reinterpret_cast<const __m512i*>(&weights_[offset]);
constexpr IndexType kNumChunks = kHalfDimensions / kSimdWidth;
for (IndexType j = 0; j < kNumChunks; ++j)
_mm512_storeA_si512(&accumulation[j], _mm512_add_epi16(_mm512_loadA_si512(&accumulation[j]), column[j]));
#if defined(USE_AVX2) #elif defined(USE_AVX2)
auto accumulation = reinterpret_cast<__m256i*>( auto accumulation = reinterpret_cast<__m256i*>(
&accumulator.accumulation[perspective][i][0]); &accumulator.accumulation[perspective][i][0]);
auto column = reinterpret_cast<const __m256i*>(&weights_[offset]); auto column = reinterpret_cast<const __m256i*>(&weights_[offset]);
constexpr IndexType kNumChunks = kHalfDimensions / (kSimdWidth / 2); constexpr IndexType kNumChunks = kHalfDimensions / (kSimdWidth / 2);
for (IndexType j = 0; j < kNumChunks; ++j) { for (IndexType j = 0; j < kNumChunks; ++j)
accumulation[j] = _mm256_add_epi16(accumulation[j], column[j]); _mm256_storeA_si256(&accumulation[j], _mm256_add_epi16(_mm256_loadA_si256(&accumulation[j]), column[j]));
}
#elif defined(USE_SSE2) #elif defined(USE_SSE2)
auto accumulation = reinterpret_cast<__m128i*>( auto accumulation = reinterpret_cast<__m128i*>(
&accumulator.accumulation[perspective][i][0]); &accumulator.accumulation[perspective][i][0]);
auto column = reinterpret_cast<const __m128i*>(&weights_[offset]); auto column = reinterpret_cast<const __m128i*>(&weights_[offset]);
constexpr IndexType kNumChunks = kHalfDimensions / (kSimdWidth / 2); constexpr IndexType kNumChunks = kHalfDimensions / (kSimdWidth / 2);
for (IndexType j = 0; j < kNumChunks; ++j) { for (IndexType j = 0; j < kNumChunks; ++j)
accumulation[j] = _mm_add_epi16(accumulation[j], column[j]); accumulation[j] = _mm_add_epi16(accumulation[j], column[j]);
#elif defined(USE_MMX)
auto accumulation = reinterpret_cast<__m64*>(
&accumulator.accumulation[perspective][i][0]);
auto column = reinterpret_cast<const __m64*>(&weights_[offset]);
constexpr IndexType kNumChunks = kHalfDimensions / (kSimdWidth / 2);
for (IndexType j = 0; j < kNumChunks; ++j) {
accumulation[j] = _mm_add_pi16(accumulation[j], column[j]);
} }
#elif defined(USE_NEON) #elif defined(USE_NEON)
@@ -211,18 +241,19 @@ namespace Eval::NNUE {
&accumulator.accumulation[perspective][i][0]); &accumulator.accumulation[perspective][i][0]);
auto column = reinterpret_cast<const int16x8_t*>(&weights_[offset]); auto column = reinterpret_cast<const int16x8_t*>(&weights_[offset]);
constexpr IndexType kNumChunks = kHalfDimensions / (kSimdWidth / 2); constexpr IndexType kNumChunks = kHalfDimensions / (kSimdWidth / 2);
for (IndexType j = 0; j < kNumChunks; ++j) { for (IndexType j = 0; j < kNumChunks; ++j)
accumulation[j] = vaddq_s16(accumulation[j], column[j]); accumulation[j] = vaddq_s16(accumulation[j], column[j]);
}
#else #else
for (IndexType j = 0; j < kHalfDimensions; ++j) { for (IndexType j = 0; j < kHalfDimensions; ++j)
accumulator.accumulation[perspective][i][j] += weights_[offset + j]; accumulator.accumulation[perspective][i][j] += weights_[offset + j];
}
#endif #endif
} }
} }
#if defined(USE_MMX)
_mm_empty();
#endif
accumulator.computed_accumulation = true; accumulator.computed_accumulation = true;
accumulator.computed_score = false; accumulator.computed_score = false;
@@ -249,6 +280,11 @@ namespace Eval::NNUE {
auto accumulation = reinterpret_cast<__m128i*>( auto accumulation = reinterpret_cast<__m128i*>(
&accumulator.accumulation[perspective][i][0]); &accumulator.accumulation[perspective][i][0]);
#elif defined(USE_MMX)
constexpr IndexType kNumChunks = kHalfDimensions / (kSimdWidth / 2);
auto accumulation = reinterpret_cast<__m64*>(
&accumulator.accumulation[perspective][i][0]);
#elif defined(USE_NEON) #elif defined(USE_NEON)
constexpr IndexType kNumChunks = kHalfDimensions / (kSimdWidth / 2); constexpr IndexType kNumChunks = kHalfDimensions / (kSimdWidth / 2);
auto accumulation = reinterpret_cast<int16x8_t*>( auto accumulation = reinterpret_cast<int16x8_t*>(
@@ -278,6 +314,12 @@ namespace Eval::NNUE {
accumulation[j] = _mm_sub_epi16(accumulation[j], column[j]); accumulation[j] = _mm_sub_epi16(accumulation[j], column[j]);
} }
#elif defined(USE_MMX)
auto column = reinterpret_cast<const __m64*>(&weights_[offset]);
for (IndexType j = 0; j < kNumChunks; ++j) {
accumulation[j] = _mm_sub_pi16(accumulation[j], column[j]);
}
#elif defined(USE_NEON) #elif defined(USE_NEON)
auto column = reinterpret_cast<const int16x8_t*>(&weights_[offset]); auto column = reinterpret_cast<const int16x8_t*>(&weights_[offset]);
for (IndexType j = 0; j < kNumChunks; ++j) { for (IndexType j = 0; j < kNumChunks; ++j) {
@@ -309,6 +351,12 @@ namespace Eval::NNUE {
accumulation[j] = _mm_add_epi16(accumulation[j], column[j]); accumulation[j] = _mm_add_epi16(accumulation[j], column[j]);
} }
#elif defined(USE_MMX)
auto column = reinterpret_cast<const __m64*>(&weights_[offset]);
for (IndexType j = 0; j < kNumChunks; ++j) {
accumulation[j] = _mm_add_pi16(accumulation[j], column[j]);
}
#elif defined(USE_NEON) #elif defined(USE_NEON)
auto column = reinterpret_cast<const int16x8_t*>(&weights_[offset]); auto column = reinterpret_cast<const int16x8_t*>(&weights_[offset]);
for (IndexType j = 0; j < kNumChunks; ++j) { for (IndexType j = 0; j < kNumChunks; ++j) {
@@ -325,6 +373,9 @@ namespace Eval::NNUE {
} }
} }
} }
#if defined(USE_MMX)
_mm_empty();
#endif
accumulator.computed_accumulation = true; accumulator.computed_accumulation = true;
accumulator.computed_score = false; accumulator.computed_score = false;
@@ -62,8 +62,8 @@ class Factorizer<HalfKP<AssociatedKing>> {
IndexType index_offset = AppendBaseFeature<FeatureType>( IndexType index_offset = AppendBaseFeature<FeatureType>(
kProperties[kFeaturesHalfKP], base_index, training_features); kProperties[kFeaturesHalfKP], base_index, training_features);
const auto sq_k = static_cast<Square>(base_index / PieceSquare::PS_END); const auto sq_k = static_cast<Square>(base_index / PS_END);
const auto p = static_cast<PieceSquare>(base_index % PieceSquare::PS_END); const auto p = static_cast<IndexType>(base_index % PS_END);
// kFeaturesHalfK // kFeaturesHalfK
{ {
const auto& properties = kProperties[kFeaturesHalfK]; const auto& properties = kProperties[kFeaturesHalfK];
@@ -76,7 +76,7 @@ class Factorizer<HalfKP<AssociatedKing>> {
index_offset += InheritFeaturesIfRequired<P>( index_offset += InheritFeaturesIfRequired<P>(
index_offset, kProperties[kFeaturesP], p, training_features); index_offset, kProperties[kFeaturesP], p, training_features);
// kFeaturesHalfRelativeKP // kFeaturesHalfRelativeKP
if (p >= PieceSquare::PS_W_PAWN) { if (p >= PS_W_PAWN) {
index_offset += InheritFeaturesIfRequired<HalfRelativeKP<AssociatedKing>>( index_offset += InheritFeaturesIfRequired<HalfRelativeKP<AssociatedKing>>(
index_offset, kProperties[kFeaturesHalfRelativeKP], index_offset, kProperties[kFeaturesHalfRelativeKP],
HalfRelativeKP<AssociatedKing>::MakeIndex(sq_k, p), HalfRelativeKP<AssociatedKing>::MakeIndex(sq_k, p),
+1 -1
View File
@@ -219,7 +219,7 @@ Score Entry::evaluate_shelter(const Position& pos, Square ksq) const {
Score bonus = make_score(5, 5); Score bonus = make_score(5, 5);
File center = Utility::clamp(file_of(ksq), FILE_B, FILE_G); File center = std::clamp(file_of(ksq), FILE_B, FILE_G);
for (File f = File(center - 1); f <= File(center + 1); ++f) for (File f = File(center - 1); f <= File(center + 1); ++f)
{ {
b = ourPawns & file_bb(f); b = ourPawns & file_bb(f);
+25 -74
View File
@@ -198,9 +198,6 @@ Position& Position::set(const string& fenStr, bool isChess960, StateInfo* si, Th
std::fill_n(&pieceList[0][0], sizeof(pieceList) / sizeof(Square), SQ_NONE); std::fill_n(&pieceList[0][0], sizeof(pieceList) / sizeof(Square), SQ_NONE);
st = si; st = si;
// Each piece on board gets a unique ID used to track the piece later
PieceId piece_id, next_piece_id = PIECE_ID_ZERO;
ss >> std::noskipws; ss >> std::noskipws;
// 1. Piece placement // 1. Piece placement
@@ -212,21 +209,8 @@ Position& Position::set(const string& fenStr, bool isChess960, StateInfo* si, Th
else if (token == '/') else if (token == '/')
sq += 2 * SOUTH; sq += 2 * SOUTH;
else if ((idx = PieceToChar.find(token)) != string::npos) else if ((idx = PieceToChar.find(token)) != string::npos) {
{ put_piece(Piece(idx), sq);
auto pc = Piece(idx);
put_piece(pc, sq);
if (Eval::useNNUE)
{
// Kings get a fixed ID, other pieces get ID in order of placement
piece_id =
(idx == W_KING) ? PIECE_ID_WKING :
(idx == B_KING) ? PIECE_ID_BKING :
next_piece_id++;
evalList.put_piece(piece_id, sq, pc);
}
++sq; ++sq;
} }
} }
@@ -721,8 +705,6 @@ void Position::do_move(Move m, StateInfo& newSt, bool givesCheck) {
// Used by NNUE // Used by NNUE
st->accumulator.computed_accumulation = false; st->accumulator.computed_accumulation = false;
st->accumulator.computed_score = false; st->accumulator.computed_score = false;
PieceId dp0 = PIECE_ID_NONE;
PieceId dp1 = PIECE_ID_NONE;
auto& dp = st->dirtyPiece; auto& dp = st->dirtyPiece;
dp.dirty_num = 1; dp.dirty_num = 1;
@@ -775,12 +757,10 @@ void Position::do_move(Move m, StateInfo& newSt, bool givesCheck) {
if (Eval::useNNUE) if (Eval::useNNUE)
{ {
dp.dirty_num = 2; // 2 pieces moved dp.dirty_num = 2; // 1 piece moved, 1 piece captured
dp1 = piece_id_on(capsq); dp.piece[1] = captured;
dp.pieceId[1] = dp1; dp.from[1] = capsq;
dp.old_piece[1] = evalList.piece_with_id(dp1); dp.to[1] = SQ_NONE;
evalList.put_piece(dp1, capsq, NO_PIECE);
dp.new_piece[1] = evalList.piece_with_id(dp1);
} }
// Update board and piece lists // Update board and piece lists
@@ -821,11 +801,9 @@ void Position::do_move(Move m, StateInfo& newSt, bool givesCheck) {
{ {
if (Eval::useNNUE) if (Eval::useNNUE)
{ {
dp0 = piece_id_on(from); dp.piece[0] = pc;
dp.pieceId[0] = dp0; dp.from[0] = from;
dp.old_piece[0] = evalList.piece_with_id(dp0); dp.to[0] = to;
evalList.put_piece(dp0, to, pc);
dp.new_piece[0] = evalList.piece_with_id(dp0);
} }
move_piece(from, to); move_piece(from, to);
@@ -854,9 +832,12 @@ void Position::do_move(Move m, StateInfo& newSt, bool givesCheck) {
if (Eval::useNNUE) if (Eval::useNNUE)
{ {
dp0 = piece_id_on(to); // Promoting pawn to SQ_NONE, promoted piece from SQ_NONE
evalList.put_piece(dp0, to, promotion); dp.to[0] = SQ_NONE;
dp.new_piece[0] = evalList.piece_with_id(dp0); dp.piece[dp.dirty_num] = promotion;
dp.from[dp.dirty_num] = SQ_NONE;
dp.to[dp.dirty_num] = to;
dp.dirty_num++;
} }
// Update hash keys // Update hash keys
@@ -950,12 +931,6 @@ void Position::undo_move(Move m) {
{ {
move_piece(to, from); // Put the piece back at the source square move_piece(to, from); // Put the piece back at the source square
if (Eval::useNNUE)
{
PieceId dp0 = st->dirtyPiece.pieceId[0];
evalList.put_piece(dp0, from, pc);
}
if (st->capturedPiece) if (st->capturedPiece)
{ {
Square capsq = to; Square capsq = to;
@@ -972,14 +947,6 @@ void Position::undo_move(Move m) {
} }
put_piece(st->capturedPiece, capsq); // Restore the captured piece put_piece(st->capturedPiece, capsq); // Restore the captured piece
if (Eval::useNNUE)
{
PieceId dp1 = st->dirtyPiece.pieceId[1];
assert(evalList.piece_with_id(dp1).from[WHITE] == PS_NONE);
assert(evalList.piece_with_id(dp1).from[BLACK] == PS_NONE);
evalList.put_piece(dp1, capsq, st->capturedPiece);
}
} }
} }
@@ -1001,32 +968,16 @@ void Position::do_castling(Color us, Square from, Square& to, Square& rfrom, Squ
rto = relative_square(us, kingSide ? SQ_F1 : SQ_D1); rto = relative_square(us, kingSide ? SQ_F1 : SQ_D1);
to = relative_square(us, kingSide ? SQ_G1 : SQ_C1); to = relative_square(us, kingSide ? SQ_G1 : SQ_C1);
if (Eval::useNNUE) if (Do && Eval::useNNUE)
{ {
PieceId dp0, dp1;
auto& dp = st->dirtyPiece; auto& dp = st->dirtyPiece;
dp.dirty_num = 2; // 2 pieces moved dp.piece[0] = make_piece(us, KING);
dp.from[0] = from;
if (Do) dp.to[0] = to;
{ dp.piece[1] = make_piece(us, ROOK);
dp0 = piece_id_on(from); dp.from[1] = rfrom;
dp1 = piece_id_on(rfrom); dp.to[1] = rto;
dp.pieceId[0] = dp0; dp.dirty_num = 2;
dp.old_piece[0] = evalList.piece_with_id(dp0);
evalList.put_piece(dp0, to, make_piece(us, KING));
dp.new_piece[0] = evalList.piece_with_id(dp0);
dp.pieceId[1] = dp1;
dp.old_piece[1] = evalList.piece_with_id(dp1);
evalList.put_piece(dp1, rto, make_piece(us, ROOK));
dp.new_piece[1] = evalList.piece_with_id(dp1);
}
else
{
dp0 = piece_id_on(to);
dp1 = piece_id_on(rto);
evalList.put_piece(dp0, from, make_piece(us, KING));
evalList.put_piece(dp1, rfrom, make_piece(us, ROOK));
}
} }
// Remove both pieces first since squares could overlap in Chess960 // Remove both pieces first since squares could overlap in Chess960
@@ -1145,8 +1096,8 @@ bool Position::see_ge(Move m, Value threshold) const {
// Don't allow pinned pieces to attack (except the king) as long as // Don't allow pinned pieces to attack (except the king) as long as
// there are pinners on their original square. // there are pinners on their original square.
if (st->pinners[~stm] & occupied) if (pinners(~stm) & occupied)
stmAttackers &= ~st->blockersForKing[stm]; stmAttackers &= ~blockers_for_king(stm);
if (!stmAttackers) if (!stmAttackers)
break; break;
+5 -23
View File
@@ -116,6 +116,7 @@ public:
Bitboard checkers() const; Bitboard checkers() const;
Bitboard blockers_for_king(Color c) const; Bitboard blockers_for_king(Color c) const;
Bitboard check_squares(PieceType pt) const; Bitboard check_squares(PieceType pt) const;
Bitboard pinners(Color c) const;
bool is_discovery_check_on_king(Color c, Move m) const; bool is_discovery_check_on_king(Color c, Move m) const;
// Attacks to/from a given square // Attacks to/from a given square
@@ -173,7 +174,6 @@ public:
// Used by NNUE // Used by NNUE
StateInfo* state() const; StateInfo* state() const;
const EvalList* eval_list() const;
#if defined(EVAL_LEARN) #if defined(EVAL_LEARN)
// --sfenization helper // --sfenization helper
@@ -208,9 +208,6 @@ private:
template<bool Do> template<bool Do>
void do_castling(Color us, Square from, Square& to, Square& rfrom, Square& rto); void do_castling(Color us, Square from, Square& to, Square& rfrom, Square& rto);
// ID of a piece on a given square
PieceId piece_id_on(Square sq) const;
// Data members // Data members
Piece board[SQUARE_NB]; Piece board[SQUARE_NB];
Bitboard byTypeBB[PIECE_TYPE_NB]; Bitboard byTypeBB[PIECE_TYPE_NB];
@@ -227,9 +224,6 @@ private:
Thread* thisThread; Thread* thisThread;
StateInfo* st; StateInfo* st;
bool chess960; bool chess960;
// List of pieces used in NNUE evaluation function
EvalList evalList;
}; };
namespace PSQT { namespace PSQT {
@@ -332,6 +326,10 @@ inline Bitboard Position::blockers_for_king(Color c) const {
return st->blockersForKing[c]; return st->blockersForKing[c];
} }
inline Bitboard Position::pinners(Color c) const {
return st->pinners[c];
}
inline Bitboard Position::check_squares(PieceType pt) const { inline Bitboard Position::check_squares(PieceType pt) const {
return st->checkSquares[pt]; return st->checkSquares[pt];
} }
@@ -469,20 +467,4 @@ inline StateInfo* Position::state() const {
return st; return st;
} }
inline const EvalList* Position::eval_list() const {
return &evalList;
}
inline PieceId Position::piece_id_on(Square sq) const
{
assert(piece_on(sq) != NO_PIECE);
PieceId pid = evalList.piece_id_list[sq];
assert(is_ok(pid));
return pid;
}
#endif // #ifndef POSITION_H_INCLUDED #endif // #ifndef POSITION_H_INCLUDED
+76 -77
View File
@@ -63,9 +63,9 @@ namespace {
constexpr uint64_t TtHitAverageResolution = 1024; constexpr uint64_t TtHitAverageResolution = 1024;
// Razor and futility margins // Razor and futility margins
constexpr int RazorMargin = 527; constexpr int RazorMargin = 510;
Value futility_margin(Depth d, bool improving) { Value futility_margin(Depth d, bool improving) {
return Value(227 * (d - improving)); return Value(223 * (d - improving));
} }
bool training; bool training;
@@ -75,7 +75,7 @@ namespace {
Depth reduction(bool i, Depth d, int mn) { Depth reduction(bool i, Depth d, int mn) {
int r = Reductions[d] * Reductions[mn]; int r = Reductions[d] * Reductions[mn];
return (r + 570) / 1024 + (!i && r > 1018); return (r + 509) / 1024 + (!i && r > 894);
} }
constexpr int futility_move_count(bool improving, Depth depth) { constexpr int futility_move_count(bool improving, Depth depth) {
@@ -84,7 +84,7 @@ namespace {
// History and stats update bonus, based on depth // History and stats update bonus, based on depth
int stat_bonus(Depth d) { int stat_bonus(Depth d) {
return d > 15 ? 27 : 17 * d * d + 133 * d - 134; return d > 13 ? 29 : 17 * d * d + 134 * d - 134;
} }
// Add a small random component to draw evaluations to avoid 3fold-blindness // Add a small random component to draw evaluations to avoid 3fold-blindness
@@ -194,7 +194,7 @@ namespace {
void Search::init() { void Search::init() {
for (int i = 1; i < MAX_MOVES; ++i) for (int i = 1; i < MAX_MOVES; ++i)
Reductions[i] = int((24.8 + std::log(Threads.size())) * std::log(i)); Reductions[i] = int((22.0 + std::log(Threads.size())) * std::log(i));
training = Options["Training"]; training = Options["Training"];
} }
@@ -339,7 +339,7 @@ void Thread::search() {
// for match (TC 60+0.6) results spanning a wide range of k values. // for match (TC 60+0.6) results spanning a wide range of k values.
PRNG rng(now()); PRNG rng(now());
double floatLevel = Options["UCI_LimitStrength"] ? double floatLevel = Options["UCI_LimitStrength"] ?
Utility::clamp(std::pow((Options["UCI_Elo"] - 1346.6) / 143.4, 1 / 0.806), 0.0, 20.0) : std::clamp(std::pow((Options["UCI_Elo"] - 1346.6) / 143.4, 1 / 0.806), 0.0, 20.0) :
double(Options["Skill Level"]); double(Options["Skill Level"]);
int intLevel = int(floatLevel) + int intLevel = int(floatLevel) +
((floatLevel - int(floatLevel)) * 1024 > rng.rand<unsigned>() % 1024 ? 1 : 0); ((floatLevel - int(floatLevel)) * 1024 > rng.rand<unsigned>() % 1024 ? 1 : 0);
@@ -407,12 +407,12 @@ void Thread::search() {
if (rootDepth >= 4) if (rootDepth >= 4)
{ {
Value prev = rootMoves[pvIdx].previousScore; Value prev = rootMoves[pvIdx].previousScore;
delta = Value(19); delta = Value(17);
alpha = std::max(prev - delta,-VALUE_INFINITE); alpha = std::max(prev - delta,-VALUE_INFINITE);
beta = std::min(prev + delta, VALUE_INFINITE); beta = std::min(prev + delta, VALUE_INFINITE);
// Adjust contempt based on root move's previousScore (dynamic contempt) // Adjust contempt based on root move's previousScore (dynamic contempt)
int dct = ct + (110 - ct / 2) * prev / (abs(prev) + 140); int dct = ct + (105 - ct / 2) * prev / (abs(prev) + 149);
contempt = (us == WHITE ? make_score(dct, dct / 2) contempt = (us == WHITE ? make_score(dct, dct / 2)
: -make_score(dct, dct / 2)); : -make_score(dct, dct / 2));
@@ -510,13 +510,13 @@ void Thread::search() {
&& !Threads.stop && !Threads.stop
&& !mainThread->stopOnPonderhit) && !mainThread->stopOnPonderhit)
{ {
double fallingEval = (296 + 6 * (mainThread->bestPreviousScore - bestValue) double fallingEval = (318 + 6 * (mainThread->bestPreviousScore - bestValue)
+ 6 * (mainThread->iterValue[iterIdx] - bestValue)) / 725.0; + 6 * (mainThread->iterValue[iterIdx] - bestValue)) / 825.0;
fallingEval = Utility::clamp(fallingEval, 0.5, 1.5); fallingEval = std::clamp(fallingEval, 0.5, 1.5);
// If the bestMove is stable over several iterations, reduce time accordingly // If the bestMove is stable over several iterations, reduce time accordingly
timeReduction = lastBestMoveDepth + 10 < completedDepth ? 1.92 : 0.95; timeReduction = lastBestMoveDepth + 9 < completedDepth ? 1.92 : 0.95;
double reduction = (1.47 + mainThread->previousTimeReduction) / (2.22 * timeReduction); double reduction = (1.47 + mainThread->previousTimeReduction) / (2.32 * timeReduction);
// Use part of the gained time from a previous stable move for the current move // Use part of the gained time from a previous stable move for the current move
for (Thread* th : Threads) for (Thread* th : Threads)
@@ -541,7 +541,7 @@ void Thread::search() {
} }
else if ( Threads.increaseDepth else if ( Threads.increaseDepth
&& !mainThread->ponder && !mainThread->ponder
&& Time.elapsed() > totalTime * 0.56) && Time.elapsed() > totalTime * 0.58)
Threads.increaseDepth = false; Threads.increaseDepth = false;
else else
Threads.increaseDepth = true; Threads.increaseDepth = true;
@@ -600,7 +600,7 @@ namespace {
Key posKey; Key posKey;
Move ttMove, move, excludedMove, bestMove; Move ttMove, move, excludedMove, bestMove;
Depth extension, newDepth; Depth extension, newDepth;
Value bestValue, value, ttValue, eval, maxValue, probcutBeta; Value bestValue, value, ttValue, eval, maxValue, probCutBeta;
bool ttHit, ttPv, formerPv, givesCheck, improving, didLMR, priorCapture; bool ttHit, ttPv, formerPv, givesCheck, improving, didLMR, priorCapture;
bool captureOrPromotion, doFullDepthSearch, moveCountPruning, bool captureOrPromotion, doFullDepthSearch, moveCountPruning,
ttCapture, singularQuietLMR; ttCapture, singularQuietLMR;
@@ -798,11 +798,7 @@ namespace {
else else
{ {
if ((ss-1)->currentMove != MOVE_NULL) if ((ss-1)->currentMove != MOVE_NULL)
{ ss->staticEval = eval = evaluate(pos);
int bonus = -(ss-1)->statScore / 512;
ss->staticEval = eval = evaluate(pos) + bonus;
}
else else
ss->staticEval = eval = -(ss-1)->staticEval + 2 * Tempo; ss->staticEval = eval = -(ss-1)->staticEval + 2 * Tempo;
@@ -815,8 +811,9 @@ namespace {
&& eval <= alpha - RazorMargin) && eval <= alpha - RazorMargin)
return qsearch<NT>(pos, ss, alpha, beta); return qsearch<NT>(pos, ss, alpha, beta);
improving = (ss-2)->staticEval == VALUE_NONE ? (ss->staticEval > (ss-4)->staticEval improving = (ss-2)->staticEval == VALUE_NONE
|| (ss-4)->staticEval == VALUE_NONE) : ss->staticEval > (ss-2)->staticEval; ? ss->staticEval > (ss-4)->staticEval || (ss-4)->staticEval == VALUE_NONE
: ss->staticEval > (ss-2)->staticEval;
// Step 8. Futility pruning: child node (~50 Elo) // Step 8. Futility pruning: child node (~50 Elo)
if ( !PvNode if ( !PvNode
@@ -828,10 +825,10 @@ namespace {
// Step 9. Null move search with verification search (~40 Elo) // Step 9. Null move search with verification search (~40 Elo)
if ( !PvNode if ( !PvNode
&& (ss-1)->currentMove != MOVE_NULL && (ss-1)->currentMove != MOVE_NULL
&& (ss-1)->statScore < 23824 && (ss-1)->statScore < 22977
&& eval >= beta && eval >= beta
&& eval >= ss->staticEval && eval >= ss->staticEval
&& ss->staticEval >= beta - 28 * depth - 28 * improving + 94 * ttPv + 200 && ss->staticEval >= beta - 30 * depth - 28 * improving + 84 * ttPv + 182
&& !excludedMove && !excludedMove
&& pos.non_pawn_material(us) && pos.non_pawn_material(us)
&& (ss->ply >= thisThread->nmpMinPly || us != thisThread->nmpColor)) && (ss->ply >= thisThread->nmpMinPly || us != thisThread->nmpColor))
@@ -839,7 +836,7 @@ namespace {
assert(eval - beta >= 0); assert(eval - beta >= 0);
// Null move dynamic reduction based on depth and value // Null move dynamic reduction based on depth and value
Depth R = (737 + 77 * depth) / 246 + std::min(int(eval - beta) / 192, 3); Depth R = (817 + 71 * depth) / 213 + std::min(int(eval - beta) / 192, 3);
ss->currentMove = MOVE_NULL; ss->currentMove = MOVE_NULL;
ss->continuationHistory = &thisThread->continuationHistory[0][0][NO_PIECE][0]; ss->continuationHistory = &thisThread->continuationHistory[0][0][NO_PIECE][0];
@@ -875,7 +872,7 @@ namespace {
} }
} }
probcutBeta = beta + 176 - 49 * improving; probCutBeta = beta + 176 - 49 * improving;
// Step 10. ProbCut (~10 Elo) // Step 10. ProbCut (~10 Elo)
// If we have a good enough capture and a reduced search returns a value // If we have a good enough capture and a reduced search returns a value
@@ -883,21 +880,27 @@ namespace {
if ( !PvNode if ( !PvNode
&& depth > 4 && depth > 4
&& abs(beta) < VALUE_TB_WIN_IN_MAX_PLY && abs(beta) < VALUE_TB_WIN_IN_MAX_PLY
// if value from transposition table is lower than probCutBeta, don't attempt probCut
// there and in further interactions with transposition table cutoff depth is set to depth - 3
// because probCut search has depth set to depth - 4 but we also do a move before it
// so effective depth is equal to depth - 3
&& !( ttHit && !( ttHit
&& tte->depth() >= depth - 3 && tte->depth() >= depth - 3
&& ttValue != VALUE_NONE && ttValue != VALUE_NONE
&& ttValue < probcutBeta)) && ttValue < probCutBeta))
{ {
// if ttMove is a capture and value from transposition table is good enough produce probCut
// cutoff without digging into actual probCut search
if ( ttHit if ( ttHit
&& tte->depth() >= depth - 3 && tte->depth() >= depth - 3
&& ttValue != VALUE_NONE && ttValue != VALUE_NONE
&& ttValue >= probcutBeta && ttValue >= probCutBeta
&& ttMove && ttMove
&& pos.capture_or_promotion(ttMove)) && pos.capture_or_promotion(ttMove))
return probcutBeta; return probCutBeta;
assert(probcutBeta < VALUE_INFINITE); assert(probCutBeta < VALUE_INFINITE);
MovePicker mp(pos, ttMove, probcutBeta - ss->staticEval, &captureHistory); MovePicker mp(pos, ttMove, probCutBeta - ss->staticEval, &captureHistory);
int probCutCount = 0; int probCutCount = 0;
while ( (move = mp.next_move()) != MOVE_NONE while ( (move = mp.next_move()) != MOVE_NONE
@@ -919,16 +922,17 @@ namespace {
pos.do_move(move, st); pos.do_move(move, st);
// Perform a preliminary qsearch to verify that the move holds // Perform a preliminary qsearch to verify that the move holds
value = -qsearch<NonPV>(pos, ss+1, -probcutBeta, -probcutBeta+1); value = -qsearch<NonPV>(pos, ss+1, -probCutBeta, -probCutBeta+1);
// If the qsearch held, perform the regular search // If the qsearch held, perform the regular search
if (value >= probcutBeta) if (value >= probCutBeta)
value = -search<NonPV>(pos, ss+1, -probcutBeta, -probcutBeta+1, depth - 4, !cutNode); value = -search<NonPV>(pos, ss+1, -probCutBeta, -probCutBeta+1, depth - 4, !cutNode);
pos.undo_move(move); pos.undo_move(move);
if (value >= probcutBeta) if (value >= probCutBeta)
{ {
// if transposition table doesn't have equal or more deep info write probCut data into it
if ( !(ttHit if ( !(ttHit
&& tte->depth() >= depth - 3 && tte->depth() >= depth - 3
&& ttValue != VALUE_NONE)) && ttValue != VALUE_NONE))
@@ -940,16 +944,6 @@ namespace {
} }
} }
// Step 11. Internal iterative deepening (~1 Elo)
if (depth >= 7 && !ttMove)
{
search<NT>(pos, ss, alpha, beta, depth - 7, cutNode);
tte = TT.probe(posKey, ttHit);
ttValue = ttHit ? value_from_tt(tte->value(), ss->ply, pos.rule50_count()) : VALUE_NONE;
ttMove = ttHit ? tte->move() : MOVE_NONE;
}
moves_loop: // When in check, search starts from here moves_loop: // When in check, search starts from here
const PieceToHistory* contHist[] = { (ss-1)->continuationHistory, (ss-2)->continuationHistory, const PieceToHistory* contHist[] = { (ss-1)->continuationHistory, (ss-2)->continuationHistory,
@@ -973,7 +967,7 @@ moves_loop: // When in check, search starts from here
// Mark this node as being searched // Mark this node as being searched
ThreadHolding th(thisThread, posKey, ss->ply); ThreadHolding th(thisThread, posKey, ss->ply);
// Step 12. Loop through all pseudo-legal moves until no moves remain // Step 11. Loop through all pseudo-legal moves until no moves remain
// or a beta cutoff occurs. // or a beta cutoff occurs.
while ((move = mp.next_move(moveCountPruning)) != MOVE_NONE) while ((move = mp.next_move(moveCountPruning)) != MOVE_NONE)
{ {
@@ -1015,7 +1009,7 @@ moves_loop: // When in check, search starts from here
// Calculate new depth for this move // Calculate new depth for this move
newDepth = depth - 1; newDepth = depth - 1;
// Step 13. Pruning at shallow depth (~200 Elo) // Step 12. Pruning at shallow depth (~200 Elo)
if ( !rootNode if ( !rootNode
&& !(training && PvNode) && !(training && PvNode)
&& pos.non_pawn_material(us) && pos.non_pawn_material(us)
@@ -1037,17 +1031,17 @@ moves_loop: // When in check, search starts from here
continue; continue;
// Futility pruning: parent node (~5 Elo) // Futility pruning: parent node (~5 Elo)
if ( lmrDepth < 8 if ( lmrDepth < 7
&& !ss->inCheck && !ss->inCheck
&& ss->staticEval + 284 + 188 * lmrDepth <= alpha && ss->staticEval + 283 + 170 * lmrDepth <= alpha
&& (*contHist[0])[movedPiece][to_sq(move)] && (*contHist[0])[movedPiece][to_sq(move)]
+ (*contHist[1])[movedPiece][to_sq(move)] + (*contHist[1])[movedPiece][to_sq(move)]
+ (*contHist[3])[movedPiece][to_sq(move)] + (*contHist[3])[movedPiece][to_sq(move)]
+ (*contHist[5])[movedPiece][to_sq(move)] / 2 < 28388) + (*contHist[5])[movedPiece][to_sq(move)] / 2 < 27376)
continue; continue;
// Prune moves with negative SEE (~20 Elo) // Prune moves with negative SEE (~20 Elo)
if (!pos.see_ge(move, Value(-(29 - std::min(lmrDepth, 17)) * lmrDepth * lmrDepth))) if (!pos.see_ge(move, Value(-(29 - std::min(lmrDepth, 18)) * lmrDepth * lmrDepth)))
continue; continue;
} }
else else
@@ -1064,17 +1058,17 @@ moves_loop: // When in check, search starts from here
&& !(PvNode && abs(bestValue) < 2) && !(PvNode && abs(bestValue) < 2)
&& PieceValue[MG][type_of(movedPiece)] >= PieceValue[MG][type_of(pos.piece_on(to_sq(move)))] && PieceValue[MG][type_of(movedPiece)] >= PieceValue[MG][type_of(pos.piece_on(to_sq(move)))]
&& !ss->inCheck && !ss->inCheck
&& ss->staticEval + 178 + 261 * lmrDepth && ss->staticEval + 169 + 244 * lmrDepth
+ PieceValue[MG][type_of(pos.piece_on(to_sq(move)))] <= alpha) + PieceValue[MG][type_of(pos.piece_on(to_sq(move)))] <= alpha)
continue; continue;
// See based pruning // See based pruning
if (!pos.see_ge(move, Value(-202) * depth)) // (~25 Elo) if (!pos.see_ge(move, Value(-221) * depth)) // (~25 Elo)
continue; continue;
} }
} }
// Step 14. Extensions (~75 Elo) // Step 13. Extensions (~75 Elo)
// Singular extension search (~70 Elo). If all moves but one fail low on a // Singular extension search (~70 Elo). If all moves but one fail low on a
// search of (alpha-s, beta-s), and just one fails high on (alpha, beta), // search of (alpha-s, beta-s), and just one fails high on (alpha, beta),
@@ -1128,19 +1122,14 @@ moves_loop: // When in check, search starts from here
&& (pos.is_discovery_check_on_king(~us, move) || pos.see_ge(move))) && (pos.is_discovery_check_on_king(~us, move) || pos.see_ge(move)))
extension = 1; extension = 1;
// Passed pawn extension
else if ( move == ss->killers[0]
&& pos.advanced_pawn_push(move)
&& pos.pawn_passed(us, to_sq(move)))
extension = 1;
// Last captures extension // Last captures extension
else if ( PieceValue[EG][pos.captured_piece()] > PawnValueEg else if ( PieceValue[EG][pos.captured_piece()] > PawnValueEg
&& pos.non_pawn_material() <= 2 * RookValueMg) && pos.non_pawn_material() <= 2 * RookValueMg)
extension = 1; extension = 1;
// Castling extension // Castling extension
if (type_of(move) == CASTLING) if ( type_of(move) == CASTLING
&& popcount(pos.pieces(us) & ~pos.pieces(PAWN) & (to_sq(move) & KingSide ? KingSide : QueenSide)) <= 2)
extension = 1; extension = 1;
// Late irreversible move extension // Late irreversible move extension
@@ -1162,10 +1151,10 @@ moves_loop: // When in check, search starts from here
[movedPiece] [movedPiece]
[to_sq(move)]; [to_sq(move)];
// Step 15. Make the move // Step 14. Make the move
pos.do_move(move, st, givesCheck); pos.do_move(move, st, givesCheck);
// Step 16. Reduced depth search (LMR, ~200 Elo). If the move fails high it will be // Step 15. Reduced depth search (LMR, ~200 Elo). If the move fails high it will be
// re-searched at full depth. // re-searched at full depth.
if ( depth >= 3 if ( depth >= 3
&& moveCount > 1 + 2 * rootNode + 2 * (PvNode && abs(bestValue) < 2) && moveCount > 1 + 2 * rootNode + 2 * (PvNode && abs(bestValue) < 2)
@@ -1174,7 +1163,7 @@ moves_loop: // When in check, search starts from here
|| moveCountPruning || moveCountPruning
|| ss->staticEval + PieceValue[EG][pos.captured_piece()] <= alpha || ss->staticEval + PieceValue[EG][pos.captured_piece()] <= alpha
|| cutNode || cutNode
|| thisThread->ttHitAverage < 415 * TtHitAverageResolution * TtHitAverageWindow / 1024)) || thisThread->ttHitAverage < 427 * TtHitAverageResolution * TtHitAverageWindow / 1024))
{ {
Depth r = reduction(improving, depth, moveCount); Depth r = reduction(improving, depth, moveCount);
@@ -1186,7 +1175,7 @@ moves_loop: // When in check, search starts from here
r--; r--;
// Decrease reduction if the ttHit running average is large // Decrease reduction if the ttHit running average is large
if (thisThread->ttHitAverage > 473 * TtHitAverageResolution * TtHitAverageWindow / 1024) if (thisThread->ttHitAverage > 509 * TtHitAverageResolution * TtHitAverageWindow / 1024)
r--; r--;
// Reduction if other threads are searching this position // Reduction if other threads are searching this position
@@ -1229,17 +1218,17 @@ moves_loop: // When in check, search starts from here
+ (*contHist[0])[movedPiece][to_sq(move)] + (*contHist[0])[movedPiece][to_sq(move)]
+ (*contHist[1])[movedPiece][to_sq(move)] + (*contHist[1])[movedPiece][to_sq(move)]
+ (*contHist[3])[movedPiece][to_sq(move)] + (*contHist[3])[movedPiece][to_sq(move)]
- 4826; - 5287;
// Decrease/increase reduction by comparing opponent's stat score (~10 Elo) // Decrease/increase reduction by comparing opponent's stat score (~10 Elo)
if (ss->statScore >= -100 && (ss-1)->statScore < -112) if (ss->statScore >= -106 && (ss-1)->statScore < -104)
r--; r--;
else if ((ss-1)->statScore >= -125 && ss->statScore < -138) else if ((ss-1)->statScore >= -119 && ss->statScore < -140)
r++; r++;
// Decrease/increase reduction for moves with a good/bad history (~30 Elo) // Decrease/increase reduction for moves with a good/bad history (~30 Elo)
r -= ss->statScore / 14615; r -= ss->statScore / 14884;
} }
else else
{ {
@@ -1249,11 +1238,11 @@ moves_loop: // When in check, search starts from here
// Unless giving check, this capture is likely bad // Unless giving check, this capture is likely bad
if ( !givesCheck if ( !givesCheck
&& ss->staticEval + PieceValue[EG][pos.captured_piece()] + 211 * depth <= alpha) && ss->staticEval + PieceValue[EG][pos.captured_piece()] + 213 * depth <= alpha)
r++; r++;
} }
Depth d = Utility::clamp(newDepth - r, 1, newDepth); Depth d = std::clamp(newDepth - r, 1, newDepth);
value = -search<NonPV>(pos, ss+1, -(alpha+1), -alpha, d, true); value = -search<NonPV>(pos, ss+1, -(alpha+1), -alpha, d, true);
@@ -1268,7 +1257,7 @@ moves_loop: // When in check, search starts from here
didLMR = false; didLMR = false;
} }
// Step 17. Full depth search when LMR is skipped or fails high // Step 16. Full depth search when LMR is skipped or fails high
if (doFullDepthSearch) if (doFullDepthSearch)
{ {
value = -search<NonPV>(pos, ss+1, -(alpha+1), -alpha, newDepth, !cutNode); value = -search<NonPV>(pos, ss+1, -(alpha+1), -alpha, newDepth, !cutNode);
@@ -1296,12 +1285,12 @@ moves_loop: // When in check, search starts from here
value = -search<PV>(pos, ss+1, -beta, -alpha, newDepth, false); value = -search<PV>(pos, ss+1, -beta, -alpha, newDepth, false);
} }
// Step 18. Undo move // Step 17. Undo move
pos.undo_move(move); pos.undo_move(move);
assert(value > -VALUE_INFINITE && value < VALUE_INFINITE); assert(value > -VALUE_INFINITE && value < VALUE_INFINITE);
// Step 19. Check for a new best move // Step 18. Check for a new best move
// Finished searching the move. If a stop occurred, the return value of // Finished searching the move. If a stop occurred, the return value of
// the search cannot be trusted, and we return immediately without // the search cannot be trusted, and we return immediately without
// updating best move, PV and TT. // updating best move, PV and TT.
@@ -1378,7 +1367,7 @@ moves_loop: // When in check, search starts from here
return VALUE_DRAW; return VALUE_DRAW;
*/ */
// Step 20. Check for mate and stalemate // Step 19. Check for mate and stalemate
// All legal moves have been searched and if there are no legal moves, it // All legal moves have been searched and if there are no legal moves, it
// must be a mate or a stalemate. If we are in a singular extension search then // must be a mate or a stalemate. If we are in a singular extension search then
// return a fail low score. // return a fail low score.
@@ -1511,7 +1500,7 @@ moves_loop: // When in check, search starts from here
if (PvNode && bestValue > alpha) if (PvNode && bestValue > alpha)
alpha = bestValue; alpha = bestValue;
futilityBase = bestValue + 141; futilityBase = bestValue + 145;
} }
const PieceToHistory* contHist[] = { (ss-1)->continuationHistory, (ss-2)->continuationHistory, const PieceToHistory* contHist[] = { (ss-1)->continuationHistory, (ss-2)->continuationHistory,
@@ -1545,6 +1534,10 @@ moves_loop: // When in check, search starts from here
{ {
assert(type_of(move) != ENPASSANT); // Due to !pos.advanced_pawn_push assert(type_of(move) != ENPASSANT); // Due to !pos.advanced_pawn_push
// moveCount pruning
if (moveCount > 2)
continue;
futilityValue = futilityBase + PieceValue[EG][pos.piece_on(to_sq(move))]; futilityValue = futilityBase + PieceValue[EG][pos.piece_on(to_sq(move))];
if (futilityValue <= alpha) if (futilityValue <= alpha)
@@ -1586,6 +1579,12 @@ moves_loop: // When in check, search starts from here
[pos.moved_piece(move)] [pos.moved_piece(move)]
[to_sq(move)]; [to_sq(move)];
if ( !captureOrPromotion
&& moveCount >= abs(depth) + 1
&& (*contHist[0])[pos.moved_piece(move)][to_sq(move)] < CounterMovePruneThreshold
&& (*contHist[1])[pos.moved_piece(move)][to_sq(move)] < CounterMovePruneThreshold)
continue;
// Make and search the move // Make and search the move
pos.do_move(move, st, givesCheck); pos.do_move(move, st, givesCheck);
value = -qsearch<NT>(pos, ss+1, -beta, -alpha, depth - 1); value = -qsearch<NT>(pos, ss+1, -beta, -alpha, depth - 1);
@@ -1768,7 +1767,7 @@ moves_loop: // When in check, search starts from here
} }
if (depth > 11 && ss->ply < MAX_LPH) if (depth > 11 && ss->ply < MAX_LPH)
thisThread->lowPlyHistory[ss->ply][from_to(move)] << stat_bonus(depth - 6); thisThread->lowPlyHistory[ss->ply][from_to(move)] << stat_bonus(depth - 7);
} }
// When playing with strength handicap, choose best move among a set of RootMoves // When playing with strength handicap, choose best move among a set of RootMoves
+1 -1
View File
@@ -27,7 +27,7 @@
/// The implementation calls pthread_create() with the stack size parameter /// The implementation calls pthread_create() with the stack size parameter
/// equal to the linux 8MB default, on platforms that support it. /// equal to the linux 8MB default, on platforms that support it.
#if defined(__APPLE__) || defined(__MINGW32__) || defined(__MINGW64__) #if defined(__APPLE__) || defined(__MINGW32__) || defined(__MINGW64__) || defined(USE_PTHREADS)
#include <pthread.h> #include <pthread.h>
+9 -9
View File
@@ -38,9 +38,9 @@ void TimeManagement::init(Search::LimitsType& limits, Color us, int ply) {
TimePoint slowMover = TimePoint(Options["Slow Mover"]); TimePoint slowMover = TimePoint(Options["Slow Mover"]);
TimePoint npmsec = TimePoint(Options["nodestime"]); TimePoint npmsec = TimePoint(Options["nodestime"]);
// opt_scale is a percentage of available time to use for the current move. // optScale is a percentage of available time to use for the current move.
// max_scale is a multiplier applied to optimumTime. // maxScale is a multiplier applied to optimumTime.
double opt_scale, max_scale; double optScale, maxScale;
// If we have to play in 'nodes as time' mode, then convert from time // If we have to play in 'nodes as time' mode, then convert from time
// to nodes, and use resulting values in time management formulas. // to nodes, and use resulting values in time management formulas.
@@ -75,22 +75,22 @@ void TimeManagement::init(Search::LimitsType& limits, Color us, int ply) {
// game time for the current move, so also cap to 20% of available game time. // game time for the current move, so also cap to 20% of available game time.
if (limits.movestogo == 0) if (limits.movestogo == 0)
{ {
opt_scale = std::min(0.008 + std::pow(ply + 3.0, 0.5) / 250.0, optScale = std::min(0.008 + std::pow(ply + 3.0, 0.5) / 250.0,
0.2 * limits.time[us] / double(timeLeft)); 0.2 * limits.time[us] / double(timeLeft));
max_scale = std::min(7.0, 4.0 + ply / 12.0); maxScale = std::min(7.0, 4.0 + ply / 12.0);
} }
// x moves in y seconds (+ z increment) // x moves in y seconds (+ z increment)
else else
{ {
opt_scale = std::min((0.8 + ply / 128.0) / mtg, optScale = std::min((0.8 + ply / 128.0) / mtg,
0.8 * limits.time[us] / double(timeLeft)); 0.8 * limits.time[us] / double(timeLeft));
max_scale = std::min(6.3, 1.5 + 0.11 * mtg); maxScale = std::min(6.3, 1.5 + 0.11 * mtg);
} }
// Never use more than 80% of the available time for this move // Never use more than 80% of the available time for this move
optimumTime = TimePoint(opt_scale * timeLeft); optimumTime = TimePoint(optScale * timeLeft);
maximumTime = TimePoint(std::min(0.8 * limits.time[us] - moveOverhead, max_scale * optimumTime)); maximumTime = TimePoint(std::min(0.8 * limits.time[us] - moveOverhead, maxScale * optimumTime));
if (Options["Ponder"]) if (Options["Ponder"])
optimumTime += optimumTime / 4; optimumTime += optimumTime / 4;
+11 -10
View File
@@ -37,18 +37,19 @@ void TTEntry::save(Key k, Value v, bool pv, Bound b, Depth d, Move m, Value ev)
if (m || (uint16_t)k != key16) if (m || (uint16_t)k != key16)
move16 = (uint16_t)m; move16 = (uint16_t)m;
// Overwrite less valuable entries // Overwrite less valuable entries (cheapest checks first)
if ((uint16_t)k != key16 if (b == BOUND_EXACT
|| d - DEPTH_OFFSET > depth8 - 4 || (uint16_t)k != key16
|| b == BOUND_EXACT) || d - DEPTH_OFFSET > depth8 - 4)
{ {
assert(d >= DEPTH_OFFSET); assert(d > DEPTH_OFFSET);
assert(d < 256 + DEPTH_OFFSET);
key16 = (uint16_t)k; key16 = (uint16_t)k;
depth8 = (uint8_t)(d - DEPTH_OFFSET);
genBound8 = (uint8_t)(TT.generation8 | uint8_t(pv) << 2 | b);
value16 = (int16_t)v; value16 = (int16_t)v;
eval16 = (int16_t)ev; eval16 = (int16_t)ev;
genBound8 = (uint8_t)(TT.generation8 | uint8_t(pv) << 2 | b);
depth8 = (uint8_t)(d - DEPTH_OFFSET);
} }
} }
@@ -119,11 +120,11 @@ TTEntry* TranspositionTable::probe(const Key key, bool& found) const {
const uint16_t key16 = (uint16_t)key; // Use the low 16 bits as key inside the cluster const uint16_t key16 = (uint16_t)key; // Use the low 16 bits as key inside the cluster
for (int i = 0; i < ClusterSize; ++i) for (int i = 0; i < ClusterSize; ++i)
if (!tte[i].key16 || tte[i].key16 == key16) if (tte[i].key16 == key16 || !tte[i].depth8)
{ {
tte[i].genBound8 = uint8_t(generation8 | (tte[i].genBound8 & 0x7)); // Refresh tte[i].genBound8 = uint8_t(generation8 | (tte[i].genBound8 & 0x7)); // Refresh
return found = (bool)tte[i].key16, &tte[i]; return found = (bool)tte[i].depth8, &tte[i];
} }
// Find an entry to be replaced according to the replacement strategy // Find an entry to be replaced according to the replacement strategy
@@ -149,7 +150,7 @@ int TranspositionTable::hashfull() const {
int cnt = 0; int cnt = 0;
for (int i = 0; i < 1000; ++i) for (int i = 0; i < 1000; ++i)
for (int j = 0; j < ClusterSize; ++j) for (int j = 0; j < ClusterSize; ++j)
cnt += (table[i].entry[j].genBound8 & 0xF8) == generation8; cnt += table[i].entry[j].depth8 && (table[i].entry[j].genBound8 & 0xF8) == generation8;
return cnt / ClusterSize; return cnt / ClusterSize;
} }
+6 -6
View File
@@ -25,13 +25,13 @@
/// TTEntry struct is the 10 bytes transposition table entry, defined as below: /// TTEntry struct is the 10 bytes transposition table entry, defined as below:
/// ///
/// key 16 bit /// key 16 bit
/// move 16 bit /// depth 8 bit
/// value 16 bit
/// eval value 16 bit
/// generation 5 bit /// generation 5 bit
/// pv node 1 bit /// pv node 1 bit
/// bound type 2 bit /// bound type 2 bit
/// depth 8 bit /// move 16 bit
/// value 16 bit
/// eval value 16 bit
struct TTEntry { struct TTEntry {
@@ -47,11 +47,11 @@ private:
friend class TranspositionTable; friend class TranspositionTable;
uint16_t key16; uint16_t key16;
uint8_t depth8;
uint8_t genBound8;
uint16_t move16; uint16_t move16;
int16_t value16; int16_t value16;
int16_t eval16; int16_t eval16;
uint8_t genBound8;
uint8_t depth8;
}; };
+10 -129
View File
@@ -203,22 +203,6 @@ enum Piece {
PIECE_NB = 16 PIECE_NB = 16
}; };
// An ID used to track the pieces. Max. 32 pieces on board.
enum PieceId {
PIECE_ID_ZERO = 0,
PIECE_ID_KING = 30,
PIECE_ID_WKING = 30,
PIECE_ID_BKING = 31,
PIECE_ID_NONE = 32
};
inline PieceId operator++(PieceId& d, int) {
PieceId x = d;
d = PieceId(int(d) + 1);
return x;
}
constexpr Value PieceValue[PHASE_NB][PIECE_NB] = { constexpr Value PieceValue[PHASE_NB][PIECE_NB] = {
{ VALUE_ZERO, PawnValueMg, KnightValueMg, BishopValueMg, RookValueMg, QueenValueMg, VALUE_ZERO, VALUE_ZERO, { VALUE_ZERO, PawnValueMg, KnightValueMg, BishopValueMg, RookValueMg, QueenValueMg, VALUE_ZERO, VALUE_ZERO,
VALUE_ZERO, PawnValueMg, KnightValueMg, BishopValueMg, RookValueMg, QueenValueMg, VALUE_ZERO, VALUE_ZERO }, VALUE_ZERO, PawnValueMg, KnightValueMg, BishopValueMg, RookValueMg, QueenValueMg, VALUE_ZERO, VALUE_ZERO },
@@ -234,7 +218,8 @@ enum : int {
DEPTH_QS_RECAPTURES = -5, DEPTH_QS_RECAPTURES = -5,
DEPTH_NONE = -6, DEPTH_NONE = -6,
DEPTH_OFFSET = DEPTH_NONE
DEPTH_OFFSET = -7 // value used only for TT entry occupancy check
}; };
enum Square : int { enum Square : int {
@@ -272,118 +257,20 @@ enum Rank : int {
RANK_1, RANK_2, RANK_3, RANK_4, RANK_5, RANK_6, RANK_7, RANK_8, RANK_NB RANK_1, RANK_2, RANK_3, RANK_4, RANK_5, RANK_6, RANK_7, RANK_8, RANK_NB
}; };
// unique number for each piece type on each square // Keep track of what a move changes on the board (used by NNUE)
enum PieceSquare : uint32_t {
PS_NONE = 0,
PS_W_PAWN = 1,
PS_B_PAWN = 1 * SQUARE_NB + 1,
PS_W_KNIGHT = 2 * SQUARE_NB + 1,
PS_B_KNIGHT = 3 * SQUARE_NB + 1,
PS_W_BISHOP = 4 * SQUARE_NB + 1,
PS_B_BISHOP = 5 * SQUARE_NB + 1,
PS_W_ROOK = 6 * SQUARE_NB + 1,
PS_B_ROOK = 7 * SQUARE_NB + 1,
PS_W_QUEEN = 8 * SQUARE_NB + 1,
PS_B_QUEEN = 9 * SQUARE_NB + 1,
PS_W_KING = 10 * SQUARE_NB + 1,
PS_END = PS_W_KING, // pieces without kings (pawns included)
PS_B_KING = 11 * SQUARE_NB + 1,
PS_END2 = 12 * SQUARE_NB + 1,
PS_NOT_INIT = PS_END2 + 1,
};
struct ExtPieceSquare {
PieceSquare from[COLOR_NB];
};
// Array for finding the PieceSquare corresponding to the piece on the board
extern ExtPieceSquare kpp_board_index[PIECE_NB];
constexpr bool is_ok(PieceId pid);
constexpr Square rotate180(Square sq);
class Position;
// Structure holding which tracked piece (PieceId) is where (PieceSquare)
class EvalList {
public:
// Max. number of pieces without kings is 30 but must be a multiple of 4 in AVX2
static const int MAX_LENGTH = 32;
// Array that holds the piece id for the pieces on the board
PieceId piece_id_list[SQUARE_NB];
// List of pieces, separate from White and Black POV
PieceSquare* piece_list_fw() const { return const_cast<PieceSquare*>(pieceListFw); }
PieceSquare* piece_list_fb() const { return const_cast<PieceSquare*>(pieceListFb); }
// Place the piece pc with piece_id on the square sq on the board
void put_piece(PieceId piece_id, Square sq, Piece pc)
{
assert(is_ok(piece_id));
if (pc != NO_PIECE)
{
pieceListFw[piece_id] = PieceSquare(kpp_board_index[pc].from[WHITE] + sq);
pieceListFb[piece_id] = PieceSquare(kpp_board_index[pc].from[BLACK] + rotate180(sq));
piece_id_list[sq] = piece_id;
}
else
{
pieceListFw[piece_id] = PS_NONE;
pieceListFb[piece_id] = PS_NONE;
piece_id_list[sq] = piece_id;
}
}
// Convert the specified piece_id piece to ExtPieceSquare type and return it
ExtPieceSquare piece_with_id(PieceId piece_id) const
{
ExtPieceSquare eps;
eps.from[WHITE] = pieceListFw[piece_id];
eps.from[BLACK] = pieceListFb[piece_id];
return eps;
}
// Initialize the pieceList.
// Set the value of unused pieces to PieceSquare::PS_NONE in case you want to deal with dropped pieces.
// A normal evaluation function can be used as an evaluation function for missing frames.
// piece_no_list is initialized with PieceId::PIECE_ID_NONE to facilitate debugging.
void clear()
{
for (auto& p : pieceListFw)
p = PieceSquare::PS_NONE;
for (auto& p : pieceListFb)
p = PieceSquare::PS_NONE;
for (auto& v : piece_id_list)
v = PieceId::PIECE_ID_NONE;
}
// Check whether the pieceListFw[] held internally is a correct BonaPiece.
// Note: For debugging. slow.
bool is_valid(const Position& pos);
private:
PieceSquare pieceListFw[MAX_LENGTH];
PieceSquare pieceListFb[MAX_LENGTH];
};
// For differential evaluation of pieces that changed since last turn
struct DirtyPiece { struct DirtyPiece {
// Number of changed pieces // Number of changed pieces
int dirty_num; int dirty_num;
// The ids of changed pieces, max. 2 pieces can change in one move // Max 3 pieces can change in one move. A promotion with capture moves
PieceId pieceId[2]; // both the pawn and the captured piece to SQ_NONE and the piece promoted
// to from SQ_NONE to the capture square.
Piece piece[3];
// What changed from the piece with that piece number // From and to squares, which may be SQ_NONE
ExtPieceSquare old_piece[2]; Square from[3];
ExtPieceSquare new_piece[2]; Square to[3];
}; };
/// Score enum stores a middlegame and an endgame value in a single integer (enum). /// Score enum stores a middlegame and an endgame value in a single integer (enum).
@@ -433,8 +320,6 @@ ENABLE_FULL_OPERATORS_ON(Value)
ENABLE_FULL_OPERATORS_ON(Direction) ENABLE_FULL_OPERATORS_ON(Direction)
ENABLE_INCR_OPERATORS_ON(Piece) ENABLE_INCR_OPERATORS_ON(Piece)
ENABLE_INCR_OPERATORS_ON(PieceSquare)
ENABLE_INCR_OPERATORS_ON(PieceId)
ENABLE_INCR_OPERATORS_ON(PieceType) ENABLE_INCR_OPERATORS_ON(PieceType)
ENABLE_INCR_OPERATORS_ON(Square) ENABLE_INCR_OPERATORS_ON(Square)
ENABLE_INCR_OPERATORS_ON(File) ENABLE_INCR_OPERATORS_ON(File)
@@ -523,10 +408,6 @@ inline Color color_of(Piece pc) {
return Color(pc >> 3); return Color(pc >> 3);
} }
constexpr bool is_ok(PieceId pid) {
return pid < PIECE_ID_NONE;
}
constexpr bool is_ok(Square s) { constexpr bool is_ok(Square s) {
return s >= SQ_A1 && s <= SQ_H8; return s >= SQ_A1 && s <= SQ_H8;
} }
+1 -1
View File
@@ -260,7 +260,7 @@ double UCI::win_rate_model_double(double v, int ply) {
double b = (((bs[0] * m + bs[1]) * m + bs[2]) * m) + bs[3]; double b = (((bs[0] * m + bs[1]) * m + bs[2]) * m) + bs[3];
// Transform eval to centipawns with limited range // Transform eval to centipawns with limited range
double x = Utility::clamp(double(100 * v) / PawnValueEg, -1000.0, 1000.0); double x = std::clamp(double(100 * v) / PawnValueEg, -1000.0, 1000.0);
// Return win rate in per mille // Return win rate in per mille
return 1000.0 / (1 + std::exp((a - x) / b)); return 1000.0 / (1 + std::exp((a - x) / b));
+4 -2
View File
@@ -79,8 +79,10 @@ void init(OptionsMap& o) {
o["SyzygyProbeDepth"] << Option(1, 1, 100); o["SyzygyProbeDepth"] << Option(1, 1, 100);
o["Syzygy50MoveRule"] << Option(true); o["Syzygy50MoveRule"] << Option(true);
o["SyzygyProbeLimit"] << Option(7, 0, 7); o["SyzygyProbeLimit"] << Option(7, 0, 7);
o["Use NNUE"] << Option(false, on_use_NNUE); o["Use NNUE"] << Option(true, on_use_NNUE);
o["EvalFile"] << Option("nn-9931db908a9b.nnue", on_eval_file); // The default must follow the format nn-[SHA256 first 12 digits].nnue
// for the build process (profile-build and fishtest) to work.
o["EvalFile"] << Option("nn-82215d0fd0df.nnue", on_eval_file);
#ifdef EVAL_NNUE #ifdef EVAL_NNUE
// When the evaluation function is loaded at the ucinewgame timing, it is necessary to convert the new evaluation function. // When the evaluation function is loaded at the ucinewgame timing, it is necessary to convert the new evaluation function.
// I want to hit the test eval convert command, but there is no new evaluation function // I want to hit the test eval convert command, but there is no new evaluation function
+4 -4
View File
@@ -70,7 +70,7 @@ for args in "eval" \
"go depth 10" \ "go depth 10" \
"go movetime 1000" \ "go movetime 1000" \
"go wtime 8000 btime 8000 winc 500 binc 500" \ "go wtime 8000 btime 8000 winc 500 binc 500" \
"bench 128 $threads 10 default depth" "bench 128 $threads 8 default depth"
do do
echo "$prefix $exeprefix ./stockfish $args $postfix" echo "$prefix $exeprefix ./stockfish $args $postfix"
@@ -80,7 +80,7 @@ done
# more general testing, following an uci protocol exchange # more general testing, following an uci protocol exchange
cat << EOF > game.exp cat << EOF > game.exp
set timeout 10 set timeout 240
spawn $exeprefix ./stockfish spawn $exeprefix ./stockfish
send "uci\n" send "uci\n"
@@ -98,7 +98,7 @@ cat << EOF > game.exp
expect "bestmove" expect "bestmove"
send "position fen 5rk1/1K4p1/8/8/3B4/8/8/8 b - - 0 1\n" send "position fen 5rk1/1K4p1/8/8/3B4/8/8/8 b - - 0 1\n"
send "go depth 30\n" send "go depth 20\n"
expect "bestmove" expect "bestmove"
send "quit\n" send "quit\n"
@@ -121,7 +121,7 @@ cat << EOF > syzygy.exp
send "uci\n" send "uci\n"
send "setoption name SyzygyPath value ../tests/syzygy/\n" send "setoption name SyzygyPath value ../tests/syzygy/\n"
expect "info string Found 35 tablebases" {} timeout {exit 1} expect "info string Found 35 tablebases" {} timeout {exit 1}
send "bench 128 1 10 default depth\n" send "bench 128 1 8 default depth\n"
send "quit\n" send "quit\n"
expect eof expect eof