diff --git a/.travis.yml b/.travis.yml index d563a1e1..092c7f53 100644 --- a/.travis.yml +++ b/.travis.yml @@ -43,26 +43,47 @@ before_script: - cd src script: + # Download net + - make net + # Obtain bench reference from git log - git log HEAD | grep "\b[Bb]ench[ :]\+[0-9]\{7\}" | head -n 1 | sed "s/[^0-9]*\([0-9]*\).*/\1/g" > git_sig - export benchref=$(cat git_sig) - echo "Reference bench:" $benchref - # # Compiler version string - $COMPILER -v - # + # test help target + - make help + # Verify bench number against various builds - export CXXFLAGS="-Werror -D_GLIBCXX_DEBUG" - - make clean && make -j2 ARCH=x86-64 optimize=no debug=yes build && ../tests/signature.sh $benchref + - make clean && make -j2 ARCH=x86-64-modern optimize=no debug=yes build && ../tests/signature.sh $benchref + - export CXXFLAGS="-Werror" + - make clean && make -j2 ARCH=x86-64-modern build && ../tests/signature.sh $benchref + - make clean && make -j2 ARCH=x86-64-ssse3 build && ../tests/signature.sh $benchref + - make clean && make -j2 ARCH=x86-64-sse3-popcnt build && ../tests/signature.sh $benchref + - make clean && make -j2 ARCH=x86-64 build && ../tests/signature.sh $benchref + - if [[ "$TRAVIS_OS_NAME" == "linux" ]]; then make clean && make -j2 ARCH=general-64 build && ../tests/signature.sh $benchref; fi - if [[ "$TRAVIS_OS_NAME" == "linux" ]]; then make clean && make -j2 ARCH=x86-32 optimize=no debug=yes build && ../tests/signature.sh $benchref; fi + - if [[ "$TRAVIS_OS_NAME" == "linux" ]]; then make clean && make -j2 ARCH=x86-32-sse41-popcnt build && ../tests/signature.sh $benchref; fi + - if [[ "$TRAVIS_OS_NAME" == "linux" ]]; then make clean && make -j2 ARCH=x86-32-sse2 build && ../tests/signature.sh $benchref; fi - if [[ "$TRAVIS_OS_NAME" == "linux" ]]; then make clean && make -j2 ARCH=x86-32 build && ../tests/signature.sh $benchref; fi + - if [[ "$TRAVIS_OS_NAME" == "linux" ]]; then make clean && make -j2 ARCH=general-32 build && ../tests/signature.sh $benchref; fi + # workaround: exclude a custom version of llvm+clang, which doesn't find llvm-profdata on ubuntu + - if [[ "$TRAVIS_OS_NAME" != "linux" || "$COMP" == "gcc" ]]; then make clean && make -j2 ARCH=x86-64-modern profile-build && ../tests/signature.sh $benchref; fi + + # compile only for some more advanced architectures (might not run in travis) + - make clean && make -j2 ARCH=x86-64-avx2 build + - make clean && make -j2 ARCH=x86-64-bmi2 build + - make clean && make -j2 ARCH=x86-64-avx512 build + - make clean && make -j2 ARCH=x86-64-vnni512 build + - make clean && make -j2 ARCH=x86-64-vnni256 build # # Check perft and reproducible search - - export CXXFLAGS="-Werror" - - make clean && make -j2 ARCH=x86-64 build + - make clean && make -j2 ARCH=x86-64-modern build - ../tests/perft.sh - ../tests/reprosearch.sh @@ -70,11 +91,11 @@ script: # Valgrind # - export CXXFLAGS="-O1 -fno-inline" - - if [ -x "$(command -v valgrind )" ]; then make clean && make -j2 ARCH=x86-64 debug=yes optimize=no build > /dev/null && ../tests/instrumented.sh --valgrind; fi + - if [ -x "$(command -v valgrind )" ]; then make clean && make -j2 ARCH=x86-64-modern debug=yes optimize=no build > /dev/null && ../tests/instrumented.sh --valgrind; fi - if [ -x "$(command -v valgrind )" ]; then ../tests/instrumented.sh --valgrind-thread; fi # # Sanitizer # - - if [[ "$TRAVIS_OS_NAME" == "linux" ]]; then make clean && make -j2 ARCH=x86-64 sanitize=undefined optimize=no debug=yes build > /dev/null && ../tests/instrumented.sh --sanitizer-undefined; fi - - if [[ "$TRAVIS_OS_NAME" == "linux" ]]; then make clean && make -j2 ARCH=x86-64 sanitize=thread optimize=no debug=yes build > /dev/null && ../tests/instrumented.sh --sanitizer-thread; fi + - if [[ "$TRAVIS_OS_NAME" == "linux" ]]; then make clean && make -j2 ARCH=x86-64-modern sanitize=undefined optimize=no debug=yes build > /dev/null && ../tests/instrumented.sh --sanitizer-undefined; fi + - if [[ "$TRAVIS_OS_NAME" == "linux" ]]; then make clean && make -j2 ARCH=x86-64-modern sanitize=thread optimize=no debug=yes build > /dev/null && ../tests/instrumented.sh --sanitizer-thread; fi diff --git a/AUTHORS b/AUTHORS index 21ef3e50..c96f870a 100644 --- a/AUTHORS +++ b/AUTHORS @@ -53,11 +53,13 @@ Ernesto Gatti Linmiao Xu (linrock) Fabian Beuke (madnight) Fabian Fichter (ianfab) +Fanael Linithien (Fanael) fanon Fauzi Akram Dabat (FauziAkram) Felix Wittmann gamander Gary Heckman (gheckman) +George Sobala (gsobala) gguliash Gian-Carlo Pascutto (gcp) Gontran Lemaire (gonlem) @@ -126,6 +128,7 @@ Niklas Fiekas (niklasf) Nikolay Kostov (NikolayIT) Nguyen Pham (nguyenpham) Norman Schmidt (FireFather) +notruck Ondrej Mosnáček (WOnder93) Oskar Werkelin Ahlin Pablo Vazquez diff --git a/appveyor.yml b/appveyor.yml index d356ba2f..a3732a23 100644 --- a/appveyor.yml +++ b/appveyor.yml @@ -61,6 +61,20 @@ before_build: build_script: - cmake --build . --config %CONFIGURATION% -- /verbosity:minimal + - ps: | + # Download default NNUE net from fishtest + $nnuenet = Get-Content -Path src\ucioption.cpp | Select-String -CaseSensitive -Pattern "Option" | Select-String -CaseSensitive -Pattern "nn-[a-z0-9]{12}.nnue" + $dummy = $nnuenet -match "(?nn-[a-z0-9]{12}.nnue)" + $nnuenet = $Matches.nnuenet + Write-Host "Default net:" $nnuenet + $nnuedownloadurl = "https://tests.stockfishchess.org/api/nn/$nnuenet" + $nnuefilepath = "src\${env:CONFIGURATION}\$nnuenet" + if (Test-Path -Path $nnuefilepath) { + Write-Host "Already available." + } else { + Write-Host "Downloading $nnuedownloadurl to $nnuefilepath" + Invoke-WebRequest -Uri $nnuedownloadurl -OutFile $nnuefilepath + } before_test: - cd src/%CONFIGURATION% diff --git a/src/Makefile b/src/Makefile index a8736a15..9372b915 100644 --- a/src/Makefile +++ b/src/Makefile @@ -40,7 +40,6 @@ PGOGENSFEN = ./$(EXE) gensfen depth 3 loop 100000 SRCS = benchmark.cpp bitbase.cpp bitboard.cpp endgame.cpp evaluate.cpp main.cpp \ material.cpp misc.cpp movegen.cpp movepick.cpp pawns.cpp position.cpp psqt.cpp \ search.cpp thread.cpp timeman.cpp tt.cpp uci.cpp ucioption.cpp tune.cpp syzygy/tbprobe.cpp \ - eval/evaluate_mir_inv_tools.cpp \ nnue/evaluate_nnue.cpp \ nnue/evaluate_nnue_learner.cpp \ nnue/features/half_kp.cpp \ @@ -82,14 +81,16 @@ endif # bits = 64/32 --- -DIS_64BIT --- 64-/32-bit operating system # prefetch = yes/no --- -DUSE_PREFETCH --- Use prefetch asm-instruction # popcnt = yes/no --- -DUSE_POPCNT --- Use popcnt asm-instruction +# pext = yes/no --- -DUSE_PEXT --- Use pext x86_64 asm-instruction # sse = yes/no --- -msse --- Use Intel Streaming SIMD Extensions -# sse3 = yes/no --- -msse3 --- Use Intel Streaming SIMD Extensions 3 +# mmx = yes/no --- -mmmx --- Use Intel MMX instructions +# sse2 = yes/no --- -msse2 --- Use Intel Streaming SIMD Extensions 2 # ssse3 = yes/no --- -mssse3 --- Use Intel Supplemental Streaming SIMD Extensions 3 # sse41 = yes/no --- -msse4.1 --- Use Intel Streaming SIMD Extensions 4.1 -# sse42 = yes/no --- -msse4.2 --- Use Intel Streaming SIMD Extensions 4.2 # avx2 = yes/no --- -mavx2 --- Use Intel Advanced Vector Extensions 2 -# pext = yes/no --- -DUSE_PEXT --- Use pext x86_64 asm-instruction # avx512 = yes/no --- -mavx512bw --- Use Intel Advanced Vector Extensions 512 +# vnni256 = yes/no --- -mavx512vnni --- Use Intel Vector Neural Network Instructions 256 +# vnni512 = yes/no --- -mavx512vnni --- Use Intel Vector Neural Network Instructions 512 # neon = yes/no --- -DUSE_NEON --- Use ARM SIMD architecture # # Note that Makefile is space sensitive, so when adding new architectures @@ -97,152 +98,184 @@ endif # at the end of the line for flag values. ### 2.1. General and architecture defaults + +# explicitly check for the list of supported architectures (as listed with make help), +# the user can override with `make ARCH=x86-32-vnni256 SUPPORTED_ARCH=true` +ifeq ($(ARCH),$(filter $(ARCH),x86-64-vnni512 x86-64-vnni256 x86-64-avx512 x86-64-bmi2 x86-64-avx2 \ + x86-64-sse41-popcnt x86-64-modern x86-64-ssse3 x86-64-sse3-popcnt \ + x86-64 x86-32-sse41-popcnt x86-32-sse2 x86-32 ppc-64 ppc-32 \ + armv7 armv7-neon armv8 apple-silicon general-64 general-32)) + SUPPORTED_ARCH=true +else + SUPPORTED_ARCH=false +endif + optimize = yes debug = no sanitize = no bits = 64 prefetch = no popcnt = no +pext = no sse = no -sse3 = no +mmx = no +sse2 = no ssse3 = no sse41 = no -sse42 = no avx2 = no -pext = no avx512 = no +vnni256 = no +vnni512 = no neon = no ARCH = x86-64-modern +STRIP = strip ### 2.2 Architecture specific + +ifeq ($(findstring x86,$(ARCH)),x86) + +# x86-32/64 + +ifeq ($(findstring x86-32,$(ARCH)),x86-32) + arch = i386 + bits = 32 + sse = yes + mmx = yes +else + arch = x86_64 + sse = yes + sse2 = yes +endif + +ifeq ($(findstring -sse,$(ARCH)),-sse) + sse = yes +endif + +ifeq ($(findstring -popcnt,$(ARCH)),-popcnt) + popcnt = yes +endif + +ifeq ($(findstring -mmx,$(ARCH)),-mmx) + mmx = yes +endif + +ifeq ($(findstring -sse2,$(ARCH)),-sse2) + sse = yes + sse2 = yes +endif + +ifeq ($(findstring -ssse3,$(ARCH)),-ssse3) + sse = yes + sse2 = yes + ssse3 = yes +endif + +ifeq ($(findstring -sse41,$(ARCH)),-sse41) + sse = yes + sse2 = yes + ssse3 = yes + sse41 = yes +endif + +ifeq ($(findstring -modern,$(ARCH)),-modern) + popcnt = yes + sse = yes + sse2 = yes + ssse3 = yes + sse41 = yes +endif + +ifeq ($(findstring -avx2,$(ARCH)),-avx2) + popcnt = yes + sse = yes + sse2 = yes + ssse3 = yes + sse41 = yes + avx2 = yes +endif + +ifeq ($(findstring -bmi2,$(ARCH)),-bmi2) + popcnt = yes + sse = yes + sse2 = yes + ssse3 = yes + sse41 = yes + avx2 = yes + pext = yes +endif + +ifeq ($(findstring -avx512,$(ARCH)),-avx512) + popcnt = yes + sse = yes + sse2 = yes + ssse3 = yes + sse41 = yes + avx2 = yes + pext = yes + avx512 = yes +endif + +ifeq ($(findstring -vnni256,$(ARCH)),-vnni256) + popcnt = yes + sse = yes + sse2 = yes + ssse3 = yes + sse41 = yes + avx2 = yes + pext = yes + vnni256 = yes +endif + +ifeq ($(findstring -vnni512,$(ARCH)),-vnni512) + popcnt = yes + sse = yes + sse2 = yes + ssse3 = yes + sse41 = yes + avx2 = yes + pext = yes + avx512 = yes + vnni512 = yes +endif + +ifeq ($(sse),yes) + prefetch = yes +endif + +# 64-bit pext is not available on x86-32 +ifeq ($(bits),32) + pext = no +endif + +else + +# all other architectures + ifeq ($(ARCH),general-32) arch = any bits = 32 endif -ifeq ($(ARCH),x86-32-old) - arch = i386 - bits = 32 -endif - -ifeq ($(ARCH),x86-32) - arch = i386 - bits = 32 - prefetch = yes - sse = yes -endif - ifeq ($(ARCH),general-64) arch = any endif -ifeq ($(ARCH),x86-64) - arch = x86_64 - prefetch = yes - sse = yes -endif - -ifeq ($(ARCH),x86-64-sse3) - arch = x86_64 - prefetch = yes - sse = yes - sse3 = yes -endif - -ifeq ($(ARCH),x86-64-sse3-popcnt) - arch = x86_64 - prefetch = yes - sse = yes - sse3 = yes - popcnt = yes -endif - -ifeq ($(ARCH),x86-64-ssse3) - arch = x86_64 - prefetch = yes - sse = yes - sse3 = yes - ssse3 = yes -endif - -ifeq ($(ARCH),x86-64-sse41) - arch = x86_64 - prefetch = yes - popcnt = yes - sse = yes - sse3 = yes - ssse3 = yes - sse41 = yes -endif - -ifeq ($(ARCH),x86-64-modern) - arch = x86_64 - prefetch = yes - popcnt = yes - sse = yes - sse3 = yes - ssse3 = yes - sse41 = yes -endif - -ifeq ($(ARCH),x86-64-sse42) - arch = x86_64 - prefetch = yes - popcnt = yes - sse = yes - sse3 = yes - ssse3 = yes - sse41 = yes - sse42 = yes -endif - -ifeq ($(ARCH),x86-64-avx2) - arch = x86_64 - prefetch = yes - popcnt = yes - sse = yes - sse3 = yes - ssse3 = yes - sse41 = yes - sse42 = yes - avx2 = yes -endif - -ifeq ($(ARCH),x86-64-bmi2) - arch = x86_64 - prefetch = yes - popcnt = yes - sse = yes - sse3 = yes - ssse3 = yes - sse41 = yes - sse42 = yes - avx2 = yes - pext = yes -endif - -ifeq ($(ARCH),x86-64-avx512) - arch = x86_64 - prefetch = yes - popcnt = yes - sse = yes - sse3 = yes - ssse3 = yes - sse41 = yes - sse42 = yes - avx2 = yes - pext = yes - avx512 = yes -endif - ifeq ($(ARCH),armv7) arch = armv7 prefetch = yes bits = 32 endif +ifeq ($(ARCH),armv7-neon) + arch = armv7 + prefetch = yes + popcnt = yes + neon = yes + bits = 32 +endif + ifeq ($(ARCH),armv8) - arch = armv8-a + arch = armv8 prefetch = yes popcnt = yes neon = yes @@ -266,6 +299,8 @@ ifeq ($(ARCH),ppc-64) prefetch = yes endif +endif + ### ========================================================================== ### Section 3. Low-level Configuration ### ========================================================================== @@ -284,7 +319,7 @@ ifeq ($(COMP),gcc) CXX=g++ CXXFLAGS += -pedantic -Wextra -Wshadow - ifeq ($(ARCH),$(filter $(ARCH),armv7 armv8)) + ifeq ($(arch),$(filter $(arch),armv7 armv8)) ifeq ($(OS),Android) CXXFLAGS += -m$(bits) LDFLAGS += -m$(bits) @@ -294,12 +329,13 @@ ifeq ($(COMP),gcc) LDFLAGS += -m$(bits) endif + ifeq ($(arch),$(filter $(arch),armv7)) + LDFLAGS += -latomic + endif + ifneq ($(KERNEL),Darwin) LDFLAGS += -Wl,--no-as-needed endif - - gccversion = $(shell $(CXX) --version) - gccisclang = $(findstring clang,$(gccversion)) endif ifeq ($(COMP),mingw) @@ -344,7 +380,7 @@ ifeq ($(COMP),clang) endif endif - ifeq ($(ARCH),$(filter $(ARCH),armv7 armv8)) + ifeq ($(arch),$(filter $(arch),armv7 armv8)) ifeq ($(OS),Android) CXXFLAGS += -m$(bits) LDFLAGS += -m$(bits) @@ -371,6 +407,26 @@ endif ifeq ($(KERNEL),Darwin) CXXFLAGS += -arch $(arch) -mmacosx-version-min=10.14 LDFLAGS += -arch $(arch) -mmacosx-version-min=10.14 + XCRUN = xcrun +endif + +# To cross-compile for Android, NDK version r21 or later is recommended. +# In earlier NDK versions, you'll need to pass -fno-addrsig if using GNU binutils. +# Currently we don't know how to make PGO builds with the NDK yet. +ifeq ($(COMP),ndk) + CXXFLAGS += -stdlib=libc++ -fPIE + ifeq ($(arch),armv7) + comp=armv7a-linux-androideabi16-clang + CXX=armv7a-linux-androideabi16-clang++ + CXXFLAGS += -mthumb -march=armv7-a -mfloat-abi=softfp -mfpu=neon + STRIP=arm-linux-androideabi-strip + endif + ifeq ($(arch),armv8) + comp=aarch64-linux-android21-clang + CXX=aarch64-linux-android21-clang++ + STRIP=aarch64-linux-android-strip + endif + LDFLAGS += -static-libstdc++ -pie -lm -latomic endif ### Travis CI script uses COMPILER to overwrite CXX @@ -383,16 +439,29 @@ ifdef COMPCXX CXX=$(COMPCXX) endif +### Sometimes gcc is really clang +ifeq ($(COMP),gcc) + gccversion = $(shell $(CXX) --version) + gccisclang = $(findstring clang,$(gccversion)) + ifneq ($(gccisclang),) + profile_make = clang-profile-make + profile_use = clang-profile-use + endif +endif + ### On mingw use Windows threads, otherwise POSIX ifneq ($(comp),mingw) + CXXFLAGS += -DUSE_PTHREADS # On Android Bionic's C library comes with its own pthread implementation bundled in ifneq ($(OS),Android) # Haiku has pthreads in its libroot, so only link it in on other platforms ifneq ($(KERNEL),Haiku) + ifneq ($(COMP),ndk) LDFLAGS += -lpthread endif endif endif +endif ### 3.2.1 Debugging ifeq ($(debug),no) @@ -434,7 +503,6 @@ endif ifeq ($(prefetch),yes) ifeq ($(sse),yes) CXXFLAGS += -msse - DEPENDFLAGS += -msse endif else CXXFLAGS += -DNO_PREFETCH @@ -442,7 +510,7 @@ endif ### 3.6 popcnt ifeq ($(popcnt),yes) - ifeq ($(arch),$(filter $(arch),ppc64 armv8-a arm64)) + ifeq ($(arch),$(filter $(arch),ppc64 armv7 armv8 arm64)) CXXFLAGS += -DUSE_POPCNT else ifeq ($(comp),icc) CXXFLAGS += -msse3 -DUSE_POPCNT @@ -451,6 +519,7 @@ ifeq ($(popcnt),yes) endif endif + ifeq ($(avx2),yes) CXXFLAGS += -DUSE_AVX2 ifeq ($(comp),$(filter $(comp),gcc clang mingw)) @@ -461,14 +530,21 @@ endif ifeq ($(avx512),yes) CXXFLAGS += -DUSE_AVX512 ifeq ($(comp),$(filter $(comp),gcc clang mingw)) - CXXFLAGS += -mavx512bw + CXXFLAGS += -mavx512f -mavx512bw endif endif -ifeq ($(sse42),yes) - CXXFLAGS += -DUSE_SSE42 +ifeq ($(vnni256),yes) + CXXFLAGS += -DUSE_VNNI ifeq ($(comp),$(filter $(comp),gcc clang mingw)) - CXXFLAGS += -msse4.2 + CXXFLAGS += -mavx512f -mavx512bw -mavx512vnni -mavx512dq -mavx512vl -mprefer-vector-width=256 + endif +endif + +ifeq ($(vnni512),yes) + CXXFLAGS += -DUSE_VNNI + ifeq ($(comp),$(filter $(comp),gcc clang mingw)) + CXXFLAGS += -mavx512vnni -mavx512dq -mavx512vl endif endif @@ -486,19 +562,29 @@ ifeq ($(ssse3),yes) endif endif -ifeq ($(sse3),yes) - CXXFLAGS += -DUSE_SSE3 +ifeq ($(sse2),yes) + CXXFLAGS += -DUSE_SSE2 ifeq ($(comp),$(filter $(comp),gcc clang mingw)) - CXXFLAGS += -msse3 + CXXFLAGS += -msse2 + endif +endif + +ifeq ($(mmx),yes) + CXXFLAGS += -DUSE_MMX + ifeq ($(comp),$(filter $(comp),gcc clang mingw)) + CXXFLAGS += -mmmx endif endif ifeq ($(neon),yes) CXXFLAGS += -DUSE_NEON + ifeq ($(KERNEL),Linux) + ifneq ($(COMP),ndk) + ifneq ($(arch),armv8) + CXXFLAGS += -mfpu=neon + endif + endif endif - -ifeq ($(arch),x86_64) - CXXFLAGS += -DUSE_SSE2 endif ### 3.7 pext @@ -514,7 +600,10 @@ endif ### needs access to the optimization flags. ifeq ($(optimize),yes) ifeq ($(debug), no) - ifeq ($(comp),clang) + ifeq ($(COMP),ndk) + CXXFLAGS += -flto=thin + LDFLAGS += $(CXXFLAGS) + else ifeq ($(comp),clang) CXXFLAGS += -flto=thin LDFLAGS += $(CXXFLAGS) @@ -524,13 +613,18 @@ ifeq ($(debug), no) ifeq ($(gccisclang),) CXXFLAGS += -flto LDFLAGS += $(CXXFLAGS) -flto=jobserver + ifneq ($(findstring MINGW,$(KERNEL)),) + LDFLAGS += -save-temps + else ifneq ($(findstring MSYS,$(KERNEL)),) + LDFLAGS += -save-temps + endif else CXXFLAGS += -flto=thin LDFLAGS += $(CXXFLAGS) endif # To use LTO and static linking on windows, the tool chain requires a recent gcc: -# gcc version 10.1 in msys2 or TDM-GCC version 9.2 are know to work, older might not. +# gcc version 10.1 in msys2 or TDM-GCC version 9.2 are known to work, older might not. # So, only enable it for a cross from Linux by default. else ifeq ($(comp),mingw) ifeq ($(KERNEL),Linux) @@ -552,6 +646,7 @@ endif ### Section 4. Public Targets ### ========================================================================== + help: @echo "" @echo "To compile stockfish, type: " @@ -560,31 +655,34 @@ help: @echo "" @echo "Supported targets:" @echo "" + @echo "help > Display architecture details" @echo "build > Standard build" - @echo "profile-build > Standard build with PGO" + @echo "net > Download the default nnue net" + @echo "profile-build > Faster build (with profile-guided optimization)" @echo "strip > Strip executable" @echo "install > Install executable" @echo "clean > Clean up" - @echo "net > Download the default nnue net" @echo "" @echo "Supported archs:" @echo "" + @echo "x86-64-vnni512 > x86 64-bit with vnni support 512bit wide" + @echo "x86-64-vnni256 > x86 64-bit with vnni support 256bit wide" @echo "x86-64-avx512 > x86 64-bit with avx512 support" @echo "x86-64-bmi2 > x86 64-bit with bmi2 support" @echo "x86-64-avx2 > x86 64-bit with avx2 support" - @echo "x86-64-sse42 > x86 64-bit with sse42 support" - @echo "x86-64-modern > x86 64-bit with sse41 support (x86-64-sse41)" - @echo "x86-64-sse41 > x86 64-bit with sse41 support" + @echo "x86-64-sse41-popcnt > x86 64-bit with sse41 and popcnt support" + @echo "x86-64-modern > common modern CPU, currently x86-64-sse41-popcnt" @echo "x86-64-ssse3 > x86 64-bit with ssse3 support" @echo "x86-64-sse3-popcnt > x86 64-bit with sse3 and popcnt support" - @echo "x86-64-sse3 > x86 64-bit with sse3 support" - @echo "x86-64 > x86 64-bit generic" - @echo "x86-32 > x86 32-bit (also enables SSE)" - @echo "x86-32-old > x86 32-bit fall back for old hardware" + @echo "x86-64 > x86 64-bit generic (with sse2 support)" + @echo "x86-32-sse41-popcnt > x86 32-bit with sse41 and popcnt support" + @echo "x86-32-sse2 > x86 32-bit with sse2 support" + @echo "x86-32 > x86 32-bit generic (with mmx and sse support)" @echo "ppc-64 > PPC 64-bit" @echo "ppc-32 > PPC 32-bit" @echo "armv7 > ARMv7 32-bit" - @echo "armv8 > ARMv8 64-bit" + @echo "armv7-neon > ARMv7 32-bit with popcnt and neon" + @echo "armv8 > ARMv8 64-bit with popcnt and neon" @echo "apple-silicon > Apple silicon ARM64" @echo "general-64 > unspecified 64-bit" @echo "general-32 > unspecified 32-bit" @@ -595,20 +693,26 @@ help: @echo "mingw > Gnu compiler with MinGW under Windows" @echo "clang > LLVM Clang compiler" @echo "icc > Intel compiler" + @echo "ndk > Google NDK to cross-compile for Android" @echo "" @echo "Simple examples. If you don't know what to do, you likely want to run: " @echo "" - @echo "make -j build ARCH=x86-64 (This is for 64-bit systems)" - @echo "make -j build ARCH=x86-32 (This is for 32-bit systems)" + @echo "make -j build ARCH=x86-64 (A portable, slow compile for 64-bit systems)" + @echo "make -j build ARCH=x86-32 (A portable, slow compile for 32-bit systems)" @echo "" - @echo "Advanced examples, for experienced users: " + @echo "Advanced examples, for experienced users looking for performance: " @echo "" - @echo "make -j build ARCH=x86-64-modern COMP=clang" - @echo "make -j profile-build ARCH=x86-64-bmi2 COMP=gcc COMPCXX=g++-4.8" - @echo "" - @echo "The selected architecture $(ARCH) enables the following configuration: " + @echo "make help ARCH=x86-64-bmi2" + @echo "make -j profile-build ARCH=x86-64-bmi2 COMP=gcc COMPCXX=g++-9.0" + @echo "make -j build ARCH=x86-64-ssse3 COMP=clang" @echo "" + @echo "-------------------------------" +ifeq ($(SUPPORTED_ARCH), true) + @echo "The selected architecture $(ARCH) will enable the following configuration: " @$(MAKE) ARCH=$(ARCH) COMP=$(COMP) config-sanity +else + @echo "Specify a supported architecture with the ARCH option for more details" +endif .PHONY: help build profile-build strip install clean net objclean profileclean \ @@ -618,7 +722,7 @@ help: build: config-sanity $(MAKE) ARCH=$(ARCH) COMP=$(COMP) all -profile-build: config-sanity objclean profileclean +profile-build: net config-sanity objclean profileclean @echo "" @echo "Step 1/4. Building instrumented executable ..." $(MAKE) ARCH=$(ARCH) COMP=$(COMP) $(profile_make) @@ -634,7 +738,7 @@ profile-build: config-sanity objclean profileclean $(MAKE) ARCH=$(ARCH) COMP=$(COMP) profileclean strip: - strip $(EXE) + $(STRIP) $(EXE) install: -mkdir -p -m 755 $(BINDIR) @@ -649,17 +753,34 @@ net: $(eval nnuenet := $(shell grep EvalFile ucioption.cpp | grep Option | sed 's/.*\(nn-[a-z0-9]\{12\}.nnue\).*/\1/')) @echo "Default net: $(nnuenet)" $(eval nnuedownloadurl := https://tests.stockfishchess.org/api/nn/$(nnuenet)) - $(eval curl_or_wget := $(shell if hash curl 2>/dev/null; then echo "curl -sL"; elif hash wget 2>/dev/null; then echo "wget -qO-"; fi)) - @if test -f "$(nnuenet)"; then echo "Already available."; else echo "Downloading $(nnuedownloadurl)"; $(curl_or_wget) $(nnuedownloadurl) > $(nnuenet); fi + $(eval curl_or_wget := $(shell if hash curl 2>/dev/null; then echo "curl -skL"; elif hash wget 2>/dev/null; then echo "wget -qO-"; fi)) + @if test -f "$(nnuenet)"; then \ + echo "Already available."; \ + else \ + if [ "x$(curl_or_wget)" = "x" ]; then \ + echo "Automatic download failed: neither curl nor wget is installed. Install one of these tools or download the net manually"; exit 1; \ + else \ + echo "Downloading $(nnuedownloadurl)"; $(curl_or_wget) $(nnuedownloadurl) > $(nnuenet);\ + fi; \ + fi; + $(eval shasum_command := $(shell if hash shasum 2>/dev/null; then echo "shasum -a 256 "; elif hash sha256sum 2>/dev/null; then echo "sha256sum "; fi)) + @if [ "x$(shasum_command)" != "x" ]; then \ + if [ "$(nnuenet)" != "nn-"`$(shasum_command) $(nnuenet) | cut -c1-12`".nnue" ]; then \ + echo "Failed download or $(nnuenet) corrupted, please delete!"; exit 1; \ + fi \ + else \ + echo "shasum / sha256sum not found, skipping net validation"; \ + fi + # clean binaries and objects objclean: - @rm -f $(EXE) *.o ./syzygy/*.o ./learn/*.o ./extra/*.o ./eval/*.o ./nnue/*.o ./nnue/features/*.o + @rm -f $(EXE) *.o ./syzygy/*.o ./nnue/*.o ./nnue/features/*.o ./learn/*.o ./extra/*.o ./eval/*.o # clean auxiliary profiling files profileclean: @rm -rf profdir - @rm -f bench.txt *.gcda *.gcno ./syzygy/*.gcda ./learn/*.gcda ./extra/*.gcda ./eval/*.gcda ./nnue/*.gcda ./nnue/features/*.gcda + @rm -f bench.txt *.gcda *.gcno ./syzygy/*.gcda ./nnue/*.gcda ./nnue/features/*.gcda *.s ./learn/*.gcda ./extra/*.gcda ./eval/*.gcda @rm -f stockfish.profdata *.profraw default: @@ -683,14 +804,16 @@ config-sanity: @echo "os: '$(OS)'" @echo "prefetch: '$(prefetch)'" @echo "popcnt: '$(popcnt)'" + @echo "pext: '$(pext)'" @echo "sse: '$(sse)'" - @echo "sse3: '$(sse3)'" + @echo "mmx: '$(mmx)'" + @echo "sse2: '$(sse2)'" @echo "ssse3: '$(ssse3)'" @echo "sse41: '$(sse41)'" - @echo "sse42: '$(sse42)'" @echo "avx2: '$(avx2)'" - @echo "pext: '$(pext)'" @echo "avx512: '$(avx512)'" + @echo "vnni256: '$(vnni256)'" + @echo "vnni512: '$(vnni512)'" @echo "neon: '$(neon)'" @echo "" @echo "Flags:" @@ -703,22 +826,26 @@ config-sanity: @test "$(debug)" = "yes" || test "$(debug)" = "no" @test "$(sanitize)" = "undefined" || test "$(sanitize)" = "thread" || test "$(sanitize)" = "address" || test "$(sanitize)" = "no" @test "$(optimize)" = "yes" || test "$(optimize)" = "no" + @test "$(SUPPORTED_ARCH)" = "true" @test "$(arch)" = "any" || test "$(arch)" = "x86_64" || test "$(arch)" = "i386" || \ test "$(arch)" = "ppc64" || test "$(arch)" = "ppc" || \ - test "$(arch)" = "armv7" || test "$(arch)" = "armv8-a" || test "$(arch)" = "arm64" + test "$(arch)" = "armv7" || test "$(arch)" = "armv8" || test "$(arch)" = "arm64" @test "$(bits)" = "32" || test "$(bits)" = "64" @test "$(prefetch)" = "yes" || test "$(prefetch)" = "no" @test "$(popcnt)" = "yes" || test "$(popcnt)" = "no" + @test "$(pext)" = "yes" || test "$(pext)" = "no" @test "$(sse)" = "yes" || test "$(sse)" = "no" - @test "$(sse3)" = "yes" || test "$(sse3)" = "no" + @test "$(mmx)" = "yes" || test "$(mmx)" = "no" + @test "$(sse2)" = "yes" || test "$(sse2)" = "no" @test "$(ssse3)" = "yes" || test "$(ssse3)" = "no" @test "$(sse41)" = "yes" || test "$(sse41)" = "no" - @test "$(sse42)" = "yes" || test "$(sse42)" = "no" @test "$(avx2)" = "yes" || test "$(avx2)" = "no" - @test "$(pext)" = "yes" || test "$(pext)" = "no" @test "$(avx512)" = "yes" || test "$(avx512)" = "no" + @test "$(vnni256)" = "yes" || test "$(vnni256)" = "no" + @test "$(vnni512)" = "yes" || test "$(vnni512)" = "no" @test "$(neon)" = "yes" || test "$(neon)" = "no" - @test "$(comp)" = "gcc" || test "$(comp)" = "icc" || test "$(comp)" = "mingw" || test "$(comp)" = "clang" + @test "$(comp)" = "gcc" || test "$(comp)" = "icc" || test "$(comp)" = "mingw" || test "$(comp)" = "clang" \ + || test "$(comp)" = "armv7a-linux-androideabi16-clang" || test "$(comp)" = "aarch64-linux-android21-clang" $(EXE): $(OBJS) +$(CXX) -o $@ $(OBJS) $(LDFLAGS) @@ -730,7 +857,7 @@ clang-profile-make: all clang-profile-use: - llvm-profdata merge -output=stockfish.profdata *.profraw + $(XCRUN) llvm-profdata merge -output=stockfish.profdata *.profraw $(MAKE) ARCH=$(ARCH) COMP=$(COMP) \ EXTRACXXFLAGS='-fprofile-instr-use=stockfish.profdata' \ EXTRALDFLAGS='-fprofile-use ' \ diff --git a/src/benchmark.cpp b/src/benchmark.cpp index 6041d642..806e9840 100644 --- a/src/benchmark.cpp +++ b/src/benchmark.cpp @@ -95,8 +95,9 @@ const vector Defaults = { /// setup_bench() builds a list of UCI commands to be run by bench. There /// are five parameters: TT size in MB, number of search threads that /// should be used, the limit value spent for each position, a file name -/// where to look for positions in FEN format and the type of the limit: -/// depth, perft, nodes and movetime (in millisecs). +/// where to look for positions in FEN format, the type of the limit: +/// depth, perft, nodes and movetime (in millisecs), and evaluation type +/// mixed (default), classical, NNUE. /// /// bench -> search default positions up to depth 13 /// bench 64 1 15 -> search default positions up to depth 15 (TT = 64MB) @@ -115,6 +116,7 @@ vector setup_bench(const Position& current, istream& is) { string limit = (is >> token) ? token : "13"; string fenFile = (is >> token) ? token : "default"; string limitType = (is >> token) ? token : "depth"; + string evalType = (is >> token) ? token : "mixed"; go = limitType == "eval" ? "eval" : "go " + limitType + " " + limit; @@ -146,13 +148,20 @@ vector setup_bench(const Position& current, istream& is) { list.emplace_back("setoption name Hash value " + ttSize); list.emplace_back("ucinewgame"); + size_t posCounter = 0; + for (const string& fen : fens) if (fen.find("setoption") != string::npos) list.emplace_back(fen); else { + if (evalType == "classical" || (evalType == "mixed" && posCounter % 2 == 0)) + list.emplace_back("setoption name Use NNUE value false"); + else if (evalType == "NNUE" || (evalType == "mixed" && posCounter % 2 != 0)) + list.emplace_back("setoption name Use NNUE value true"); list.emplace_back("position fen " + fen); list.emplace_back(go); + ++posCounter; } return list; diff --git a/src/bitboard.cpp b/src/bitboard.cpp index f531010c..80206b58 100644 --- a/src/bitboard.cpp +++ b/src/bitboard.cpp @@ -39,6 +39,16 @@ namespace { Bitboard BishopTable[0x1480]; // To store bishop attacks void init_magics(PieceType pt, Bitboard table[], Magic magics[]); + +} + + +/// safe_destination() returns the bitboard of target square for the given step +/// from the given square. If the step is off the board, returns empty bitboard. + +inline Bitboard safe_destination(Square s, int step) { + Square to = Square(s + step); + return is_ok(to) && distance(s, to) <= 2 ? square_bb(to) : Bitboard(0); } @@ -110,7 +120,7 @@ namespace { Direction RookDirections[4] = {NORTH, SOUTH, EAST, WEST}; Direction BishopDirections[4] = {NORTH_EAST, SOUTH_EAST, SOUTH_WEST, NORTH_WEST}; - for(Direction d : (pt == ROOK ? RookDirections : BishopDirections)) + for (Direction d : (pt == ROOK ? RookDirections : BishopDirections)) { Square s = sq; while(safe_destination(s, d) && !(occupied & s)) diff --git a/src/bitboard.h b/src/bitboard.h index a899d879..29d8f66d 100644 --- a/src/bitboard.h +++ b/src/bitboard.h @@ -279,16 +279,6 @@ inline int edge_distance(File f) { return std::min(f, File(FILE_H - f)); } inline int edge_distance(Rank r) { return std::min(r, Rank(RANK_8 - r)); } -/// safe_destination() returns the bitboard of target square for the given step -/// from the given square. If the step is off the board, returns empty bitboard. - -inline Bitboard safe_destination(Square s, int step) -{ - Square to = Square(s + step); - return is_ok(to) && distance(s, to) <= 2 ? square_bb(to) : Bitboard(0); -} - - /// attacks_bb(Square) returns the pseudo attacks of the give piece type /// assuming an empty board. diff --git a/src/eval/evaluate_mir_inv_tools.cpp b/src/eval/evaluate_mir_inv_tools.cpp deleted file mode 100644 index 3667b9f5..00000000 --- a/src/eval/evaluate_mir_inv_tools.cpp +++ /dev/null @@ -1,190 +0,0 @@ -#if defined(EVAL_NNUE) || defined(EVAL_LEARN) - -#include "evaluate_mir_inv_tools.h" - -namespace Eval -{ - - // --- tables - - // Value when a certain PieceSquare is seen from the other side - // BONA_PIECE_INIT is -1, so it must be a signed type. - // Even if KPPT is expanded, PieceSquare will not exceed 2^15 for the time being, so int16_t is good. - int16_t inv_piece_[PieceSquare::PS_END]; - - // Returns the one at the position where a PieceSquare on the board is mirrored. - int16_t mir_piece_[PieceSquare::PS_END]; - - - // --- methods - -// Returns the value when a certain PieceSquare is seen from the other side - PieceSquare inv_piece(PieceSquare p) { return (PieceSquare)inv_piece_[p]; } - - // Returns the one at the position where a PieceSquare on the board is mirrored. - PieceSquare mir_piece(PieceSquare p) { return (PieceSquare)mir_piece_[p]; } - - std::function mir_piece_init_function; - - void init_mir_inv_tables() - { - // Initialize the mirror and inverse tables. - - // Initialization is limited to once. - static bool first = true; - if (!first) return; - first = false; - - // exchange f and e - int t[] = { - PieceSquare::PS_W_PAWN , PieceSquare::PS_B_PAWN , - PieceSquare::PS_W_KNIGHT , PieceSquare::PS_B_KNIGHT , - PieceSquare::PS_W_BISHOP , PieceSquare::PS_B_BISHOP , - PieceSquare::PS_W_ROOK , PieceSquare::PS_B_ROOK , - PieceSquare::PS_W_QUEEN , PieceSquare::PS_B_QUEEN , - }; - - // Insert uninitialized value. - for (PieceSquare p = PieceSquare::PS_NONE; p < PieceSquare::PS_END; ++p) - { - inv_piece_[p] = PieceSquare::PS_NOT_INIT; - - // mirror does not work for hand pieces. Just return the original value. - mir_piece_[p] = (p < PieceSquare::PS_W_PAWN) ? p : PieceSquare::PS_NOT_INIT; - } - - for (PieceSquare p = PieceSquare::PS_NONE; p < PieceSquare::PS_END; ++p) - { - for (int i = 0; i < 32 /* t.size() */; i += 2) - { - if (t[i] <= p && p < t[i + 1]) - { - Square sq = (Square)(p - t[i]); - - // found!! - PieceSquare q = (p < PieceSquare::PS_W_PAWN) ? PieceSquare(sq + t[i + 1]) : (PieceSquare)(rotate180(sq) + t[i + 1]); - inv_piece_[p] = q; - inv_piece_[q] = p; - - /* - It's a bit tricky, but regarding p - p >= PieceSquare::PS_W_PAWN - When. - - For this p, let n be an integer (i in the above code can only be an even number), - a) When t[2n + 0] <= p inv->mir->inv must be the original location. - assert(p == inv_piece(mir_piece(inv_piece(mir_piece(p))))); - - // inv->mir->inv->mir must be the original location. - assert(p == mir_piece(inv_piece(mir_piece(inv_piece(p))))); - } - -#if 0 - // Pre-verification that it is okay to mirror the evaluation function - // When writing a value, there is an assertion, so if you can't mirror it, - // Should get caught in the assert. - - // Apery's WCSC26 evaluation function, kpp p1==0 or p1==20 (0th step on the back) - // There is dust in it, and if you don't avoid it, it will get caught in the assert. - - std::unordered_set s; - vector a = { - f_hand_pawn - 1,e_hand_pawn - 1, - f_hand_lance - 1, e_hand_lance - 1, - f_hand_knight - 1, e_hand_knight - 1, - f_hand_silver - 1, e_hand_silver - 1, - f_hand_gold - 1, e_hand_gold - 1, - f_hand_bishop - 1, e_hand_bishop - 1, - f_hand_rook - 1, e_hand_rook - 1, - }; - for (auto b : a) - s.insert((PieceSquare)b); - - // Excludes walks, incense, and katsura on the board that do not appear further (Apery also contains garbage here) - for (Rank r = RANK_1; r <= RANK_2; ++r) - for (File f = FILE_1; f <= FILE_9; ++f) - { - if (r == RANK_1) - { - // first step - PieceSquare b1 = PieceSquare(PieceSquare::PS_W_PAWN + (f | r)); - s.insert(b1); - s.insert(inv_piece[b1]); - - // 1st stage incense - PieceSquare b2 = PieceSquare(f_lance + (f | r)); - s.insert(b2); - s.insert(inv_piece[b2]); - } - - // Katsura on the 1st and 2nd steps - PieceSquare b = PieceSquare(PieceSquare::PS_W_KNIGHT + (f | r)); - s.insert(b); - s.insert(inv_piece[b]); - } - - cout << "\nchecking kpp_write().."; - for (auto sq : SQ) - { - cout << sq << ' '; - for (PieceSquare p1 = PieceSquare::PS_NONE; p1 < PieceSquare::PS_END; ++p1) - for (PieceSquare p2 = PieceSquare::PS_NONE; p2 < PieceSquare::PS_END; ++p2) - if (!s.count(p1) && !s.count(p2)) - kpp_write(sq, p1, p2, kpp[sq][p1][p2]); - } - cout << "\nchecking kkp_write().."; - - for (auto sq1 : SQ) - { - cout << sq1 << ' '; - for (auto sq2 : SQ) - for (PieceSquare p1 = PieceSquare::PS_NONE; p1 < PieceSquare::PS_END; ++p1) - if (!s.count(p1)) - kkp_write(sq1, sq2, p1, kkp[sq1][sq2][p1]); - } - cout << "..done!" << endl; -#endif - } - -} - -#endif // defined(EVAL_NNUE) || defined(EVAL_LEARN) diff --git a/src/eval/evaluate_mir_inv_tools.h b/src/eval/evaluate_mir_inv_tools.h deleted file mode 100644 index 1f193b17..00000000 --- a/src/eval/evaluate_mir_inv_tools.h +++ /dev/null @@ -1,47 +0,0 @@ -#ifndef _EVALUATE_MIR_INV_TOOLS_ -#define _EVALUATE_MIR_INV_TOOLS_ - -#if defined(EVAL_NNUE) || defined(EVAL_LEARN) - -// PieceSquare's mirror (horizontal flip) and inverse (180° on the board) tools to get pieces. - -#include "../types.h" -#include "../evaluate.h" -#include - -namespace Eval -{ - // ------------------------------------------------- - // tables - // ------------------------------------------------- - - // --- Provide Mirror and Inverse to PieceSquare. - - // These arrays are initialized by calling init() or init_mir_inv_tables();. - // If you want to use only this table from the evaluation function, - // Call init_mir_inv_tables(). - // These arrays are referenced from the KK/KKP/KPP classes below. - - // Returns the value when a certain PieceSquare is seen from the other side - extern PieceSquare inv_piece(PieceSquare p); - - // Returns the one at the position where a PieceSquare on the board is mirrored. - extern PieceSquare mir_piece(PieceSquare p); - - - // callback called when initializing mir_piece/inv_piece - // Used when extending fe_end on the user side. - // Inv_piece_ and inv_piece_ are exposed because they are necessary for this initialization. - // At the timing when mir_piece_init_function is called, until fe_old_end - // It is guaranteed that these tables have been initialized. - extern std::function mir_piece_init_function; - extern int16_t mir_piece_[PieceSquare::PS_END]; - extern int16_t inv_piece_[PieceSquare::PS_END]; - - // The table above will be initialized when you call this function explicitly or call init(). - extern void init_mir_inv_tables(); -} - -#endif // defined(EVAL_NNUE) || defined(EVAL_LEARN) - -#endif diff --git a/src/evaluate.cpp b/src/evaluate.cpp index 4ba89675..8edc9bb8 100644 --- a/src/evaluate.cpp +++ b/src/evaluate.cpp @@ -61,10 +61,11 @@ namespace Eval { UCI::OptionsMap defaults; UCI::init(defaults); - std::cerr << "NNUE evaluation used, but the network file " << eval_file << " was not loaded successfully. " - << "These network evaluation parameters must be available, and compatible with this version of the code. " - << "The UCI option EvalFile might need to specify the full path, including the directory/folder name, to the file. " - << "The default net can be downloaded from: https://tests.stockfishchess.org/api/nn/"+std::string(defaults["EvalFile"]) << std::endl; + sync_cout << "info string ERROR: NNUE evaluation used, but the network file " << eval_file << " was not loaded successfully." << sync_endl; + sync_cout << "info string ERROR: The UCI option EvalFile might need to specify the full path, including the directory/folder name, to the file." << sync_endl; + sync_cout << "info string ERROR: The default net can be downloaded from: https://tests.stockfishchess.org/api/nn/"+std::string(defaults["EvalFile"]) << sync_endl; + sync_cout << "info string ERROR: If the UCI option Use NNUE is set to true, network evaluation parameters compatible with the program must be available." << sync_endl; + sync_cout << "info string ERROR: The engine will be terminated now." << sync_endl; std::exit(EXIT_FAILURE); } @@ -122,7 +123,8 @@ namespace { constexpr Value LazyThreshold1 = Value(1400); constexpr Value LazyThreshold2 = Value(1300); constexpr Value SpaceThreshold = Value(12222); - constexpr Value NNUEThreshold = Value(460); + constexpr Value NNUEThreshold1 = Value(550); + constexpr Value NNUEThreshold2 = Value(150); // KingAttackWeights[PieceType] contains king attack weights by piece type constexpr int KingAttackWeights[PIECE_TYPE_NB] = { 0, 0, 81, 52, 44, 10 }; @@ -294,8 +296,8 @@ namespace { attackedBy2[Us] = dblAttackByPawn | (attackedBy[Us][KING] & attackedBy[Us][PAWN]); // Init our king safety tables - Square s = make_square(Utility::clamp(file_of(ksq), FILE_B, FILE_G), - Utility::clamp(rank_of(ksq), RANK_2, RANK_7)); + Square s = make_square(std::clamp(file_of(ksq), FILE_B, FILE_G), + std::clamp(rank_of(ksq), RANK_2, RANK_7)); kingRing[Us] = attacks_bb(s) | s; kingAttackersCount[Them] = popcount(kingRing[Us] & pe->pawn_attacks(Them)); @@ -692,8 +694,8 @@ namespace { Square blockSq = s + Up; // Adjust bonus based on the king's proximity - bonus += make_score(0, ( (king_proximity(Them, blockSq) * 19) / 4 - - king_proximity(Us, blockSq) * 2) * w); + bonus += make_score(0, ( king_proximity(Them, blockSq) * 19 / 4 + - king_proximity(Us, blockSq) * 2) * w); // If blockSq is not the queening square then consider also a second push if (r != RANK_7) @@ -737,7 +739,7 @@ namespace { // Evaluation::space() computes a space evaluation for a given side, aiming to improve game - // play in the opening. It is based on the number of safe squares on the 4 central files + // play in the opening. It is based on the number of safe squares on the four central files // on ranks 2 to 4. Completely safe squares behind a friendly pawn are counted twice. // Finally, the space bonus is multiplied by a weight which decreases according to occupancy. @@ -810,7 +812,7 @@ namespace { // Now apply the bonus: note that we find the attacking side by extracting the // sign of the midgame or endgame values, and that we carefully cap the bonus // so that the midgame and endgame scores do not change sign after the bonus. - int u = ((mg > 0) - (mg < 0)) * Utility::clamp(complexity + 50, -abs(mg), 0); + int u = ((mg > 0) - (mg < 0)) * std::clamp(complexity + 50, -abs(mg), 0); int v = ((eg > 0) - (eg < 0)) * std::max(complexity, -abs(eg)); mg += u; @@ -935,9 +937,6 @@ make_v: // Side to move point of view v = (pos.side_to_move() == WHITE ? v : -v) + Tempo; - // Damp down the evaluation linearly when shuffling - v = v * (100 - pos.rule50_count()) / 100; - return v; } @@ -954,14 +953,21 @@ Value Eval::evaluate(const Position& pos) { } #endif - if (Eval::useNNUE) - { - Value v = eg_value(pos.psq_score()); - // Take NNUE eval only on balanced positions - if (abs(v) < NNUEThreshold + 20 * pos.count()) - return NNUE::evaluate(pos) + Tempo; - } - return Evaluation(pos).value(); + bool classical = !Eval::useNNUE + || abs(eg_value(pos.psq_score())) * 16 > NNUEThreshold1 * (16 + pos.rule50_count()); + Value v = classical ? Evaluation(pos).value() + : NNUE::evaluate(pos) * 5 / 4 + Tempo; + + if (classical && Eval::useNNUE && abs(v) * 16 < NNUEThreshold2 * (16 + pos.rule50_count())) + v = NNUE::evaluate(pos) * 5 / 4 + Tempo; + + // Damp down the evaluation linearly when shuffling + v = v * (100 - pos.rule50_count()) / 100; + + // Guarantee evaluation does not hit the tablebase range + v = std::clamp(v, VALUE_TB_LOSS_IN_MAX_PLY + 1, VALUE_TB_WIN_IN_MAX_PLY - 1); + + return v; } /// trace() is like evaluate(), but instead of returning a value, it returns @@ -979,141 +985,46 @@ std::string Eval::trace(const Position& pos) { Value v; - if (Eval::useNNUE) - { - v = NNUE::evaluate(pos); - } - else - { - std::memset(scores, 0, sizeof(scores)); + std::memset(scores, 0, sizeof(scores)); - pos.this_thread()->contempt = SCORE_ZERO; // Reset any dynamic contempt + pos.this_thread()->contempt = SCORE_ZERO; // Reset any dynamic contempt - v = Evaluation(pos).value(); + v = Evaluation(pos).value(); - ss << std::showpoint << std::noshowpos << std::fixed << std::setprecision(2) - << " Term | White | Black | Total \n" - << " | MG EG | MG EG | MG EG \n" - << " ------------+-------------+-------------+------------\n" - << " Material | " << Term(MATERIAL) - << " Imbalance | " << Term(IMBALANCE) - << " Pawns | " << Term(PAWN) - << " Knights | " << Term(KNIGHT) - << " Bishops | " << Term(BISHOP) - << " Rooks | " << Term(ROOK) - << " Queens | " << Term(QUEEN) - << " Mobility | " << Term(MOBILITY) - << " King safety | " << Term(KING) - << " Threats | " << Term(THREAT) - << " Passed | " << Term(PASSED) - << " Space | " << Term(SPACE) - << " Winnable | " << Term(WINNABLE) - << " ------------+-------------+-------------+------------\n" - << " Total | " << Term(TOTAL); - } + ss << std::showpoint << std::noshowpos << std::fixed << std::setprecision(2) + << " Term | White | Black | Total \n" + << " | MG EG | MG EG | MG EG \n" + << " ------------+-------------+-------------+------------\n" + << " Material | " << Term(MATERIAL) + << " Imbalance | " << Term(IMBALANCE) + << " Pawns | " << Term(PAWN) + << " Knights | " << Term(KNIGHT) + << " Bishops | " << Term(BISHOP) + << " Rooks | " << Term(ROOK) + << " Queens | " << Term(QUEEN) + << " Mobility | " << Term(MOBILITY) + << " King safety | " << Term(KING) + << " Threats | " << Term(THREAT) + << " Passed | " << Term(PASSED) + << " Space | " << Term(SPACE) + << " Winnable | " << Term(WINNABLE) + << " ------------+-------------+-------------+------------\n" + << " Total | " << Term(TOTAL); v = pos.side_to_move() == WHITE ? v : -v; - ss << "\nFinal evaluation: " << to_cp(v) << " (white side)\n"; + ss << "\nClassical evaluation: " << to_cp(v) << " (white side)\n"; + + if (Eval::useNNUE) + { + v = NNUE::evaluate(pos); + v = pos.side_to_move() == WHITE ? v : -v; + ss << "\nNNUE evaluation: " << to_cp(v) << " (white side)\n"; + } + + v = evaluate(pos); + v = pos.side_to_move() == WHITE ? v : -v; + ss << "\nFinal evaluation: " << to_cp(v) << " (white side)\n"; return ss.str(); } - -// Check whether the pieceListFw[] held internally is a correct PieceSquare. -// Note: For debugging. slow. -bool EvalList::is_valid(const Position& pos) -{ - std::set piece_numbers; - for (Square sq = SQ_A1; sq != SQUARE_NB; ++sq) { - auto piece_number = piece_id_list[sq]; - if (piece_number == PieceId::PIECE_ID_NONE) { - continue; - } - assert(!piece_numbers.count(piece_number)); - piece_numbers.insert(piece_number); - } - - for (int i = 0; i < PieceId::PIECE_ID_KING; ++i) - { - PieceSquare fw = pieceListFw[i]; - // Go to the Position class to see if this fw really exists. - - if (fw == PieceSquare::PS_NONE) { - continue; - } - - // Out of range - if (!(0 <= fw && fw < PieceSquare::PS_END)) - return false; - - // Since it is a piece on the board, I will check if this piece really exists. - for (Piece pc = NO_PIECE; pc < PIECE_NB; ++pc) - { - auto pt = type_of(pc); - if (pt == NO_PIECE_TYPE || pt == 7) // non-existing piece - continue; - - // PieceSquare start number of piece pc - auto s = PieceSquare(kpp_board_index[pc].from[Color::WHITE]); - if (s <= fw && fw < s + SQUARE_NB) - { - // Since it was found, check if this piece is at sq. - Square sq = (Square)(fw - s); - Piece pc2 = pos.piece_on(sq); - - if (pc2 != pc) - return false; - - goto Found; - } - } - // It was a piece that did not exist for some reason.. - return false; - Found:; - } - - // Validate piece_id_list - for (auto sq = SQUARE_ZERO; sq < SQUARE_NB; ++sq) { - Piece expected_piece = pos.piece_on(sq); - PieceId piece_number = piece_id_list[sq]; - if (piece_number == PieceId::PIECE_ID_NONE) { - assert(expected_piece == NO_PIECE); - if (expected_piece != NO_PIECE) { - return false; - } - continue; - } - - PieceSquare bona_piece_white = pieceListFw[piece_number]; - Piece actual_piece; - for (actual_piece = NO_PIECE; actual_piece < PIECE_NB; ++actual_piece) { - if (kpp_board_index[actual_piece].from[Color::WHITE] == PieceSquare::PS_NONE) { - continue; - } - - if (kpp_board_index[actual_piece].from[Color::WHITE] <= bona_piece_white - && bona_piece_white < kpp_board_index[actual_piece].from[Color::WHITE] + SQUARE_NB) { - break; - } - } - - assert(actual_piece != PIECE_NB); - if (actual_piece == PIECE_NB) { - return false; - } - - assert(actual_piece == expected_piece); - if (actual_piece != expected_piece) { - return false; - } - - Square actual_square = static_cast( - bona_piece_white - kpp_board_index[actual_piece].from[Color::WHITE]); - assert(sq == actual_square); - if (sq != actual_square) { - return false; - } - } - - return true; -} diff --git a/src/extra/sfen_packer.cpp b/src/extra/sfen_packer.cpp index 68576c82..ac789ce8 100644 --- a/src/extra/sfen_packer.cpp +++ b/src/extra/sfen_packer.cpp @@ -276,13 +276,6 @@ int Position::set_from_packed_sfen(const PackedSfen& sfen , StateInfo * si, Thre // Active color sideToMove = (Color)stream.read_one_bit(); - // clear evalList. It is cleared when memset is cleared to zero above... - evalList.clear(); - - // In updating the PieceList, we have to set which piece is where, - // A counter of how much each piece has been used - PieceId next_piece_number = PieceId::PIECE_ID_ZERO; - pieceList[W_KING][0] = SQUARE_NB; pieceList[B_KING][0] = SQUARE_NB; @@ -327,14 +320,6 @@ int Position::set_from_packed_sfen(const PackedSfen& sfen , StateInfo * si, Thre put_piece(Piece(pc), sq); - // update evalList - PieceId piece_no = - (pc == B_KING) ?PieceId::PIECE_ID_BKING :// Move ball - (pc == W_KING) ?PieceId::PIECE_ID_WKING :// Backing ball - next_piece_number++; // otherwise - - evalList.put_piece(piece_no, sq, pc); // Place the pc piece in the sq box - //cout << sq << ' ' << board[sq] << ' ' << stream.get_cursor() << endl; if (stream.get_cursor()> 256) @@ -402,9 +387,6 @@ set_state(st); //std::cout << *this << std::endl; assert(pos_is_ok()); -#if defined(EVAL_NNUE) - assert(evalList.is_valid(*this)); -#endif // defined(EVAL_NNUE) return 0; } diff --git a/src/learn/learning_tools.cpp b/src/learn/learning_tools.cpp index c97b4910..de6da9c5 100644 --- a/src/learn/learning_tools.cpp +++ b/src/learn/learning_tools.cpp @@ -20,237 +20,6 @@ namespace EvalLearningTools double Weight::eta3; uint64_t Weight::eta1_epoch; uint64_t Weight::eta2_epoch; - - std::vector min_index_flag; - - // --- initialization for each individual table - - void init_min_index_flag() - { - // Initialization of mir_piece and inv_piece must be completed. - assert(Eval::mir_piece(PieceSquare::PS_W_PAWN) == PieceSquare::PS_B_PAWN); - - // Initialize the flag array for dimension reduction - // Not involved in KPPP. - - KK g_kk; - g_kk.set(SQUARE_NB, PieceSquare::PS_END, 0); - KKP g_kkp; - g_kkp.set(SQUARE_NB, PieceSquare::PS_END, g_kk.max_index()); - KPP g_kpp; - g_kpp.set(SQUARE_NB, PieceSquare::PS_END, g_kkp.max_index()); - - uint64_t size = g_kpp.max_index(); - min_index_flag.resize(size); - -#pragma omp parallel - { -#if defined(_OPENMP) - // To prevent the logical 64 cores from being used when there are two CPUs under Windows - // explicitly assign to CPU here - int thread_index = omp_get_thread_num(); // get your thread number - WinProcGroup::bindThisThread(thread_index); -#endif - -#pragma omp for schedule(dynamic,20000) - - for (int64_t index_ = 0; index_ < (int64_t)size; ++index_) - { - // It seems that the loop variable must be a sign type due to OpenMP restrictions, but - // It's really difficult to use. - uint64_t index = (uint64_t)index_; - - if (g_kk.is_ok(index)) - { - // Make sure that the original index will be restored by conversion from index and reverse conversion. - // It is a process that is executed only once at startup, so write it in assert. - assert(g_kk.fromIndex(index).toIndex() == index); - - KK a[KK_LOWER_COUNT]; - g_kk.fromIndex(index).toLowerDimensions(a); - - // Make sure that the first element of dimension reduction is the same as the original index. - assert(a[0].toIndex() == index); - - uint64_t min_index = UINT64_MAX; - for (auto& e : a) - min_index = std::min(min_index, e.toIndex()); - min_index_flag[index] = (min_index == index); - } - else if (g_kkp.is_ok(index)) - { - assert(g_kkp.fromIndex(index).toIndex() == index); - - KKP x = g_kkp.fromIndex(index); - KKP a[KKP_LOWER_COUNT]; - x.toLowerDimensions(a); - - assert(a[0].toIndex() == index); - - uint64_t min_index = UINT64_MAX; - for (auto& e : a) - min_index = std::min(min_index, e.toIndex()); - min_index_flag[index] = (min_index == index); - } - else if (g_kpp.is_ok(index)) - { - assert(g_kpp.fromIndex(index).toIndex() == index); - - KPP x = g_kpp.fromIndex(index); - KPP a[KPP_LOWER_COUNT]; - x.toLowerDimensions(a); - - assert(a[0].toIndex() == index); - - uint64_t min_index = UINT64_MAX; - for (auto& e : a) - min_index = std::min(min_index, e.toIndex()); - min_index_flag[index] = (min_index == index); - } - else - { - assert(false); - } - } - } - } - - void learning_tools_unit_test_kpp() - { - - // test KPP triangulation for bugs - // All combinations of k-p0-p1 are properly handled by KPP, and the dimension reduction at that time is - // Determine if it is correct. - - KK g_kk; - g_kk.set(SQUARE_NB, PieceSquare::PS_END, 0); - KKP g_kkp; - g_kkp.set(SQUARE_NB, PieceSquare::PS_END, g_kk.max_index()); - KPP g_kpp; - g_kpp.set(SQUARE_NB, PieceSquare::PS_END, g_kkp.max_index()); - - std::vector f; - f.resize(g_kpp.max_index() - g_kpp.min_index()); - - for(auto k = SQUARE_ZERO ; k < SQUARE_NB ; ++k) - for(auto p0 = PieceSquare::PS_NONE; p0 < PieceSquare::PS_END ; ++p0) - for (auto p1 = PieceSquare::PS_NONE; p1 < PieceSquare::PS_END; ++p1) - { - KPP kpp_org = g_kpp.fromKPP(k,p0,p1); - KPP kpp0; - KPP kpp1 = g_kpp.fromKPP(flip_file(k), mir_piece(p0), mir_piece(p1)); - KPP kpp_array[2]; - - auto index = kpp_org.toIndex(); - assert(g_kpp.is_ok(index)); - - kpp0 = g_kpp.fromIndex(index); - - //if (kpp0 != kpp_org) - // std::cout << "index = " << index << "," << kpp_org << "," << kpp0 << std::endl; - - kpp0.toLowerDimensions(kpp_array); - - assert(kpp_array[0] == kpp0); - assert(kpp0 == kpp_org); - assert(kpp_array[1] == kpp1); - - auto index2 = kpp1.toIndex(); - f[index - g_kpp.min_index()] = f[index2-g_kpp.min_index()] = true; - } - - // Check if there is no missing index. - for(size_t index = 0 ; index < f.size(); index++) - if (!f[index]) - { - std::cout << index << g_kpp.fromIndex(index + g_kpp.min_index()) << std::endl; - } - } - - void learning_tools_unit_test_kppp() - { - // Test for missing KPPP calculations - - KPPP g_kppp; - g_kppp.set(15, PieceSquare::PS_END,0); - uint64_t min_index = g_kppp.min_index(); - uint64_t max_index = g_kppp.max_index(); - - // Confirm last element. - //KPPP x = KPPP::fromIndex(max_index-1); - //std::cout << x << std::endl; - - for (uint64_t index = min_index; index < max_index; ++index) - { - KPPP x = g_kppp.fromIndex(index); - //std::cout << x << std::endl; - -#if 0 - if ((index % 10000000) == 0) - std::cout << "index = " << index << std::endl; - - // index = 9360000000 - // done. - - if (x.toIndex() != index) - { - std::cout << "assertion failed , index = " << index << std::endl; - } -#endif - - assert(x.toIndex() == index); - -// ASSERT((&kppp_ksq_pcpcpc(x.king(), x.piece0(), x.piece1(), x.piece2()) - &kppp[0][0]) == (index - min_index)); - } - - } - - void learning_tools_unit_test_kkpp() - { - KKPP g_kkpp; - g_kkpp.set(SQUARE_NB, 10000, 0); - uint64_t n = 0; - for (int k = 0; k -#include "../eval/evaluate_mir_inv_tools.h" - #if defined(SGD_UPDATE) || defined(USE_KPPP_MIRROR_WRITE) #include "../misc.h" // PRNG , my_insertion_sort #endif @@ -17,27 +15,6 @@ namespace EvalLearningTools { - // ------------------------------------------------- - // Initialization - // ------------------------------------------------- - - // Initialize the tables in this EvalLearningTools namespace. - // Be sure to call once before learning starts. - // In this function, we also call init_mir_inv_tables(). - // (It is not necessary to call init_mir_inv_tables() when calling this function.) - void init(); - - // ------------------------------------------------- - // flags - // ------------------------------------------------- - - // When the dimension is lowered, it may become the smallest index among them - // A flag array that is true for the known index. - // This array is also initialized by init(). - // KPPP is not involved. - // Therefore, the valid index range of this array is from KK::min_index() to KPP::max_index(). - extern std::vector min_index_flag; - // ------------------------------------------------- // Array for learning that stores gradients etc. // ------------------------------------------------- @@ -217,817 +194,6 @@ namespace EvalLearningTools std::array get_grad() const { return std::array{w[0].get_grad(), w[1].get_grad()}; } }; - - // ------------------------------------------------ - - // A helper that calculates the index when the Weight array is serialized. - // ------------------------------------------------ - - - // Base class for KK,KKP,KPP,KKPP - // How to use these classes - // - // 1. Initialize with set() first. Example) KK g_kk; g_kk.set(SQUARE_NB,fe_end,0); - // 2. Next create an instance with fromIndex(), fromKK(), etc. - // 3. Access using properties such as king(), piece0(), piece1(). - // - // It may be difficult to understand just by this explanation, but if you look at init_grad(), add_grad(), update_weights() etc. in the learning part - // I think you can understand it including the necessity. - // - // Note: this derived class may indirectly reference the above inv_piece/mir_piece for dimension reduction, so - // Initialize by calling EvalLearningTools::init() or init_mir_inv_tables() first. - // - // Remarks) /*final*/ is written for the function name that should not be overridden on the derived class side. - // The function that should be overridden on the derived class side is a pure virtual function with "= 0". - // Only virtual functions are added to the derived class that may or may not be overridden. - // - struct SerializerBase - { - - // Minimum value and maximum value of serial number +1 when serializing KK, KKP, KPP arrays. - /*final*/ uint64_t min_index() const { return min_index_; } - /*final*/ uint64_t max_index() const { return min_index() + max_raw_index_; } - - // max_index() - min_index() the value of. - // Calculate the value from max_king_sq_,fe_end_ etc. on the derived class side and return it. - virtual uint64_t size() const = 0; - - // Determine if the given index is more than min_index() and less than max_index(). - /*final*/ bool is_ok(uint64_t index) { return min_index() <= index && index < max_index(); } - - // Make sure to call this set(). Otherwise, construct an instance using fromKK()/fromIndex() etc. on the derived class side. - virtual void set(int max_king_sq, uint64_t fe_end, uint64_t min_index) - { - max_king_sq_ = max_king_sq; - fe_end_ = fe_end; - min_index_ = min_index; - max_raw_index_ = size(); - } - - // Get the index when serialized, based on the value of the current member. - /*final*/ uint64_t toIndex() const { - return min_index() + toRawIndex(); - } - - // Returns the index when serializing. (The value of min_index() is before addition) - virtual uint64_t toRawIndex() const = 0; - - protected: - // The value of min_index() returned by this class - uint64_t min_index_; - - // The value of max_index() returned by this class = min_index() + max_raw_index_ - // This variable is calculated by size() of the derived class. - uint64_t max_raw_index_; - - // The number of balls to support (normally SQUARE_NB) - int max_king_sq_; - - // Maximum PieceSquare value supported - uint64_t fe_end_; - - }; - - struct KK : public SerializerBase - { - protected: - KK(Square king0, Square king1,bool inverse) : king0_(king0), king1_(king1) , inverse_sign(inverse) {} - public: - KK() {} - - virtual uint64_t size() const { return max_king_sq_ * max_king_sq_; } - - // builder that creates KK object from index (serial number) - KK fromIndex(uint64_t index) const { assert(index >= min_index()); return fromRawIndex(index - min_index()); } - - // builder that creates KK object from raw_index (number starting from 0, not serial number) - KK fromRawIndex(uint64_t raw_index) const - { - int king1 = (int)(raw_index % SQUARE_NB); - raw_index /= SQUARE_NB; - int king0 = (int)(raw_index /* % SQUARE_NB */); - assert(king0 < SQUARE_NB); - return fromKK((Square)king0, (Square)king1 , false); - } - KK fromKK(Square king0, Square king1 , bool inverse) const - { - // The variable name kk is used in the Eval::kk array etc., so it needs to be different. (The same applies to KKP, KPP classes, etc.) - KK my_kk(king0, king1, inverse); - my_kk.set(max_king_sq_, fe_end_, min_index()); - return my_kk; - } - KK fromKK(Square king0, Square king1) const { return fromKK(king0, king1, false); } - - // When you construct this object using fromIndex(), you can get information with the following accessors. - Square king0() const { return king0_; } - Square king1() const { return king1_; } - -// number of dimension reductions -#if defined(USE_KK_INVERSE_WRITE) - #define KK_LOWER_COUNT 4 -#elif defined(USE_KK_MIRROR_WRITE) - #define KK_LOWER_COUNT 2 -#else - #define KK_LOWER_COUNT 1 -#endif - -#if defined(USE_KK_INVERSE_WRITE) && !defined(USE_KK_MIRROR_WRITE) - // USE_KK_INVERSE_WRITE If you use it, please also define USE_KK_MIRROR_WRITE. - static_assert(false, "define also USE_KK_MIRROR_WRITE!"); -#endif - - // Get the index of the low-dimensional array. - // When USE_KK_INVERSE_WRITE is enabled, the inverse of them will be in [2] and [3]. - // Note that the sign of grad must be reversed for this dimension reduction. - // You can use is_inverse() because it can be determined. - void toLowerDimensions(/*out*/KK kk_[KK_LOWER_COUNT]) const { - kk_[0] = fromKK(king0_, king1_,false); -#if defined(USE_KK_MIRROR_WRITE) - kk_[1] = fromKK(flip_file(king0_),flip_file(king1_),false); -#if defined(USE_KK_INVERSE_WRITE) - kk_[2] = fromKK(rotate180(king1_), rotate180(king0_),true); - kk_[3] = fromKK(rotate180(flip_file(king1_)) , rotate180(flip_file(king0_)),true); -#endif -#endif - } - - // Get the index when counting the value of min_index() of this class as 0. - virtual uint64_t toRawIndex() const { - return (uint64_t)king0_ * (uint64_t)max_king_sq_ + (uint64_t)king1_; - } - - // Returns whether or not the dimension lowered with toLowerDimensions is inverse. - bool is_inverse() const { - return inverse_sign; - } - - // When is_inverse() == true, reverse the sign that is not grad's turn and return it. - template - std::array apply_inverse_sign(const std::array& rhs) - { - return !is_inverse() ? rhs : std::array{-rhs[0], rhs[1]}; - } - - // comparison operator - bool operator==(const KK& rhs) { return king0() == rhs.king0() && king1() == rhs.king1(); } - bool operator!=(const KK& rhs) { return !(*this == rhs); } - - private: - Square king0_, king1_ ; - bool inverse_sign; - }; - - // Output for debugging. - static std::ostream& operator<<(std::ostream& os, KK rhs) - { - os << "KK(" << rhs.king0() << "," << rhs.king1() << ")"; - return os; - } - - // Same as KK. For KKP. - struct KKP : public SerializerBase - { - protected: - KKP(Square king0, Square king1, PieceSquare p) : king0_(king0), king1_(king1), piece_(p), inverse_sign(false) {} - KKP(Square king0, Square king1, PieceSquare p, bool inverse) : king0_(king0), king1_(king1), piece_(p),inverse_sign(inverse) {} - public: - KKP() {} - - virtual uint64_t size() const { return (uint64_t)max_king_sq_*(uint64_t)max_king_sq_*(uint64_t)fe_end_; } - - // builder that creates KKP object from index (serial number) - KKP fromIndex(uint64_t index) const { assert(index >= min_index()); return fromRawIndex(index - min_index()); } - - // A builder that creates a KKP object from raw_index (a number that starts from 0, not a serial number) - KKP fromRawIndex(uint64_t raw_index) const - { - int piece = (int)(raw_index % PieceSquare::PS_END); - raw_index /= PieceSquare::PS_END; - int king1 = (int)(raw_index % SQUARE_NB); - raw_index /= SQUARE_NB; - int king0 = (int)(raw_index /* % SQUARE_NB */); - assert(king0 < SQUARE_NB); - return fromKKP((Square)king0, (Square)king1, (PieceSquare)piece,false); - } - - KKP fromKKP(Square king0, Square king1, PieceSquare p, bool inverse) const - { - KKP my_kkp(king0, king1, p, inverse); - my_kkp.set(max_king_sq_,fe_end_,min_index()); - return my_kkp; - } - KKP fromKKP(Square king0, Square king1, PieceSquare p) const { return fromKKP(king0, king1, p, false); } - - // When you construct this object using fromIndex(), you can get information with the following accessors. - Square king0() const { return king0_; } - Square king1() const { return king1_; } - PieceSquare piece() const { return piece_; } - - // Number of KKP dimension reductions -#if defined(USE_KKP_INVERSE_WRITE) - #define KKP_LOWER_COUNT 4 -#elif defined(USE_KKP_MIRROR_WRITE) - #define KKP_LOWER_COUNT 2 -#else - #define KKP_LOWER_COUNT 1 -#endif - -#if defined(USE_KKP_INVERSE_WRITE) && !defined(USE_KKP_MIRROR_WRITE) - // USE_KKP_INVERSE_WRITE If you use it, please also define USE_KKP_MIRROR_WRITE. - static_assert(false, "define also USE_KKP_MIRROR_WRITE!"); -#endif - - // Get the index of the low-dimensional array. The mirrored one is returned to kkp_[1]. - // When USE_KKP_INVERSE_WRITE is enabled, the inverse of them will be in [2] and [3]. - // Note that the sign of grad must be reversed for this dimension reduction. - // You can use is_inverse() because it can be determined. - void toLowerDimensions(/*out*/ KKP kkp_[KKP_LOWER_COUNT]) const { - kkp_[0] = fromKKP(king0_, king1_, piece_,false); -#if defined(USE_KKP_MIRROR_WRITE) - kkp_[1] = fromKKP(flip_file(king0_), flip_file(king1_), Eval::mir_piece(piece_),false); -#if defined(USE_KKP_INVERSE_WRITE) - kkp_[2] = fromKKP( rotate180(king1_), rotate180(king0_), Eval::inv_piece(piece_),true); - kkp_[3] = fromKKP( rotate180(flip_file(king1_)), rotate180(flip_file(king0_)) , Eval::inv_piece(Eval::mir_piece(piece_)),true); -#endif -#endif - } - - // Get the index when counting the value of min_index() of this class as 0. - virtual uint64_t toRawIndex() const { - return ((uint64_t)king0_ * (uint64_t)max_king_sq_ + (uint64_t)king1_) * (uint64_t)fe_end_ + (uint64_t)piece_; - } - - // Returns whether or not the dimension lowered with toLowerDimensions is inverse. - bool is_inverse() const { - return inverse_sign; - } - - // When is_inverse() == true, reverse the sign that is not grad's turn and return it. - template - std::array apply_inverse_sign(const std::array& rhs) - { - return !is_inverse() ? rhs : std::array{-rhs[0], rhs[1]}; - } - - // comparison operator - bool operator==(const KKP& rhs) { return king0() == rhs.king0() && king1() == rhs.king1() && piece() == rhs.piece(); } - bool operator!=(const KKP& rhs) { return !(*this == rhs); } - - private: - Square king0_, king1_; - PieceSquare piece_; - bool inverse_sign; - }; - - // Output for debugging. - static std::ostream& operator<<(std::ostream& os, KKP rhs) - { - os << "KKP(" << rhs.king0() << "," << rhs.king1() << "," << rhs.piece() << ")"; - return os; - } - - - // Same as KK and KKP. For KPP - struct KPP : public SerializerBase - { - protected: - KPP(Square king, PieceSquare p0, PieceSquare p1) : king_(king), piece0_(p0), piece1_(p1) {} - - public: - KPP() {} - - // The minimum and maximum KPP values ​​of serial numbers when serializing KK, KKP, KPP arrays. -#if !defined(USE_TRIANGLE_WEIGHT_ARRAY) - virtual uint64_t size() const { return (uint64_t)max_king_sq_*(uint64_t)fe_end_*(uint64_t)fe_end_; } -#else - // Triangularize the square array part of [fe_end][fe_end] of kpp[SQUARE_NB][fe_end][fe_end]. - // If kpp[SQUARE_NB][triangle_fe_end], the first row of this triangular array has one element, the second row has two elements, and so on. - // hence triangle_fe_end = 1 + 2 + .. + fe_end = fe_end * (fe_end + 1) / 2 - virtual uint64_t size() const { return (uint64_t)max_king_sq_*(uint64_t)triangle_fe_end; } -#endif - - virtual void set(int max_king_sq, uint64_t fe_end, uint64_t min_index) - { - // This value is used in size(), and size() is used in SerializerBase::set(), so calculate first. - triangle_fe_end = (uint64_t)fe_end*((uint64_t)fe_end + 1) / 2; - - SerializerBase::set(max_king_sq, fe_end, min_index); - } - - // builder that creates KPP object from index (serial number) - KPP fromIndex(uint64_t index) const { assert(index >= min_index()); return fromRawIndex(index - min_index()); } - - // A builder that creates KPP objects from raw_index (a number that starts from 0, not a serial number) - KPP fromRawIndex(uint64_t raw_index) const - { - const uint64_t triangle_fe_end = (uint64_t)fe_end_*((uint64_t)fe_end_ + 1) / 2; - -#if !defined(USE_TRIANGLE_WEIGHT_ARRAY) - int piece1 = (int)(raw_index % fe_end_); - raw_index /= fe_end_; - int piece0 = (int)(raw_index % fe_end_); - raw_index /= fe_end_; -#else - uint64_t index2 = raw_index % triangle_fe_end; - - // Write the expression to find piece0, piece1 from index2 here. - // This is the inverse function of index2 = i * (i+1) / 2 + j. - // If j = 0, i^2 + i-2 * index2 == 0 - // From the solution formula of the quadratic equation i = (sqrt(8*index2+1)-1) / 2. - // After i is converted into an integer, j can be calculated as j = index2-i * (i + 1) / 2. - - // PieceSquare assumes 32bit (may not fit in 16bit), so this multiplication must be 64bit. - int piece1 = int(sqrt(8 * index2 + 1) - 1) / 2; - int piece0 = int(index2 - (uint64_t)piece1*((uint64_t)piece1 + 1) / 2); - - assert(piece1 < (int)fe_end_); - assert(piece0 < (int)fe_end_); - assert(piece0 > piece1); - - raw_index /= triangle_fe_end; -#endif - int king = (int)(raw_index /* % SQUARE_NB */); - assert(king < max_king_sq_); - return fromKPP((Square)king, (PieceSquare)piece0, (PieceSquare)piece1); - } - - KPP fromKPP(Square king, PieceSquare p0, PieceSquare p1) const - { - KPP my_kpp(king, p0, p1); - my_kpp.set(max_king_sq_,fe_end_,min_index()); - return my_kpp; - } - - // When you construct this object using fromIndex(), you can get information with the following accessors. - Square king() const { return king_; } - PieceSquare piece0() const { return piece0_; } - PieceSquare piece1() const { return piece1_; } - - -// number of dimension reductions -#if defined(USE_KPP_MIRROR_WRITE) - #if !defined(USE_TRIANGLE_WEIGHT_ARRAY) - #define KPP_LOWER_COUNT 4 - #else - #define KPP_LOWER_COUNT 2 - #endif -#else - #if !defined(USE_TRIANGLE_WEIGHT_ARRAY) - #define KPP_LOWER_COUNT 2 - #else - #define KPP_LOWER_COUNT 1 - #endif -#endif - - // Get the index of the low-dimensional array. The ones with p1 and p2 swapped, the ones mirrored, etc. are returned. - void toLowerDimensions(/*out*/ KPP kpp_[KPP_LOWER_COUNT]) const { - -#if defined(USE_TRIANGLE_WEIGHT_ARRAY) - // Note that if you use a triangular array, the swapped piece0 and piece1 will not be returned. - kpp_[0] = fromKPP(king_, piece0_, piece1_); -#if defined(USE_KPP_MIRROR_WRITE) - kpp_[1] = fromKPP(flip_file(king_), Eval::mir_piece(piece0_), Eval::mir_piece(piece1_)); -#endif - -#else - // When not using triangular array - kpp_[0] = fromKPP(king_, piece0_, piece1_); - kpp_[1] = fromKPP(king_, piece1_, piece0_); -#if defined(USE_KPP_MIRROR_WRITE) - kpp_[2] = fromKPP(flip_file(king_), mir_piece(piece0_), mir_piece(piece1_)); - kpp_[3] = fromKPP(flip_file(king_), mir_piece(piece1_), mir_piece(piece0_)); -#endif -#endif - } - - // Get the index when counting the value of min_index() of this class as 0. - virtual uint64_t toRawIndex() const { - -#if !defined(USE_TRIANGLE_WEIGHT_ARRAY) - - return ((uint64_t)king_ * (uint64_t)fe_end_ + (uint64_t)piece0_) * (uint64_t)fe_end_ + (uint64_t)piece1_; - -#else - // Macro similar to that used in Bonanza 6.0 - auto PcPcOnSq = [&](Square k, PieceSquare i, PieceSquare j) - { - - // (i,j) in this triangular array is the element in the i-th row and the j-th column. - // 1st row + 2 + ... + i = i * (i+1) / 2 because the i-th row and 0th column is the total of the elements up to that point - // The i-th row and the j-th column is j plus this. i*(i+1)/2+j - - // PieceSquare type is assumed to be 32 bits, so if you do not pay attention to multiplication, it will overflow. - return (uint64_t)k * triangle_fe_end + (uint64_t)(uint64_t(i)*(uint64_t(i)+1) / 2 + uint64_t(j)); - }; - - auto k = king_; - auto i = piece0_; - auto j = piece1_; - - return (i >= j) ? PcPcOnSq(k, i, j) : PcPcOnSq(k, j, i); -#endif - } - - // Returns whether or not the dimension lowered with toLowerDimensions is inverse. - // Prepared to match KK, KKP and interface. This method always returns false for this KPP class. - bool is_inverse() const { - return false; - } - - // comparison operator - bool operator==(const KPP& rhs) { - return king() == rhs.king() && - ((piece0() == rhs.piece0() && piece1() == rhs.piece1()) -#if defined(USE_TRIANGLE_WEIGHT_ARRAY) - // When using a triangular array, allow swapping of piece0 and piece1. - || (piece0() == rhs.piece1() && piece1() == rhs.piece0()) -#endif - ); } - bool operator!=(const KPP& rhs) { return !(*this == rhs); } - - - private: - Square king_; - PieceSquare piece0_, piece1_; - - uint64_t triangle_fe_end; // = (uint64_t)fe_end_*((uint64_t)fe_end_ + 1) / 2; - }; - - // Output for debugging. - static std::ostream& operator<<(std::ostream& os, KPP rhs) - { - os << "KPP(" << rhs.king() << "," << rhs.piece0() << "," << rhs.piece1() << ")"; - return os; - } - - // 4 pieces related to KPPP. However, if there is a turn and you do not consider mirrors etc., memory of 2 TB or more is required for learning. - // Even if you use a triangular array, you need 50GB x 12 bytes = 600GB for learning. - // It takes about half as much as storing only the mirrored one. - // Here, the triangular array is always used and the mirrored one is stored. - // - // Also, king() of this class is not limited to Square of the actual king, but a value from 0 to (king_sq-1) is simply returned. - // This needs to be converted to an appropriate ball position on the user side when performing compression using a mirror. - // - // Later, regarding the pieces0,1,2 returned by this class, - // piece0() >piece1() >piece2() - // It is, and it is necessary to keep this constraint when passing piece0,1,2 in the constructor. - struct KPPP : public SerializerBase - { - protected: - KPPP(int king, PieceSquare p0, PieceSquare p1, PieceSquare p2) : - king_(king), piece0_(p0), piece1_(p1), piece2_(p2) - { - assert(piece0_ > piece1_ && piece1_ > piece2_); - /* sort_piece(); */ - } - - public: - KPPP() {} - - virtual uint64_t size() const { return (uint64_t)max_king_sq_*triangle_fe_end; } - - // Set fe_end and king_sq. - // fe_end: fe_end assumed by this KPPP class - // king_sq: Number of balls to handle in KPPP. - // 3 layers x 3 mirrors = 3 layers x 5 lines = 15 - // 2 steps x 2 mirrors without mirror = 18 - // Set this first using set() on the side that uses this KPPP class. - virtual void set(int max_king_sq, uint64_t fe_end,uint64_t min_index) { - // This value is used in size(), and size() is used in SerializerBase::set(), so calculate first. - triangle_fe_end = fe_end * (fe_end - 1) * (fe_end - 2) / 6; - - SerializerBase::set(max_king_sq, fe_end, min_index); - } - - // number of dimension reductions - // For the time being, the dimension reduction of the mirror is not supported. I wonder if I'll do it here... -/* -#if defined(USE_KPPP_MIRROR_WRITE) -#define KPPP_LOWER_COUNT 2 -#else -#define KPPP_LOWER_COUNT 1 -#endif -*/ -#define KPPP_LOWER_COUNT 1 - - // Get the index of the low-dimensional array. - // Note that the one with p0,p1,p2 swapped will not be returned. - // Also, the mirrored one is returned only when USE_KPPP_MIRROR_WRITE is enabled. - void toLowerDimensions(/*out*/ KPPP kppp_[KPPP_LOWER_COUNT]) const - { - kppp_[0] = fromKPPP(king_, piece0_, piece1_,piece2_); -#if KPPP_LOWER_COUNT > 1 - // If mir_piece is done, it will be in a state not sorted. Need code to sort. - PieceSquare p_list[3] = { mir_piece(piece2_), mir_piece(piece1_), mir_piece(piece0_) }; - my_insertion_sort(p_list, 0, 3); - kppp_[1] = fromKPPP((int)flip_file((Square)king_), p_list[2] , p_list[1], p_list[0]); -#endif - } - - // builder that creates KPPP object from index (serial number) - KPPP fromIndex(uint64_t index) const { assert(index >= min_index()); return fromRawIndex(index - min_index()); } - - // A builder that creates KPPP objects from raw_index (a number that starts from 0, not a serial number) - KPPP fromRawIndex(uint64_t raw_index) const - { - uint64_t index2 = raw_index % triangle_fe_end; - - // Write the expression to find piece0, piece1, piece2 from index2 here. - // This is the inverse function of index2 = i(i-1)(i-2)/6-1 + j(j+1)/2 + k. - // For j = k = 0, the real root is i = ... from the solution formula of the cubic equation. (The following formula) - // However, if index2 is 0 or 1, there are multiple real solutions. You have to consider this. It is necessary to take measures against insufficient calculation accuracy. - // After i is calculated, i can be converted into an integer, then put in the first expression and then j can be calculated in the same way as in KPP. - - // This process is a relatively difficult numerical calculation. Various ideas are needed. - - int piece0; - if (index2 <= 1) - { - // There are multiple real solutions only when index2 == 0,1. - piece0 = (int)index2 + 2; - - } else { - - //double t = pow(sqrt((243 *index2 * index2-1) * 3) + 27 * index2, 1.0 / 3); - // → In this case, the content of sqrt() will overflow if index2 becomes large. - - // Since the contents of sqrt() overflow, do not multiply 3.0 in sqrt, but multiply sqrt(3.0) outside sqrt. - // Since the contents of sqrt() will overflow, use an approximate expression when index2 is large. - - double t; - - if (index2 < 100000000) - t = pow(sqrt((243.0 *index2 * index2 - 1)) * sqrt(3.0) + 27 * index2, 1.0 / 3); - else - // If index2 is very large, we can think of the contents of sqrt as approximately √243 * index2. - t = pow( index2 * sqrt(243 * 3.0) + 27 * index2, 1.0 / 3); - - // Add deltas to avoid a slight calculation error when rounding. - // If it is too large, it may increase by 1 so adjustment is necessary. - - const double delta = 0.000000001; - - piece0 = int(t / pow(3.0, 2.0 / 3) + 1.0 / (pow(3.0, 1.0 / 3) * t) + delta) + 1; - // Uuu. Is it really like this? ('Ω`) - } - - //Since piece2 is obtained, substitute piece2 for i of i(i-1)(i-2)/6 (=a) in the above formula. Also substitute k = 0. - // j(j+1)/2 = index2-a - // This is from the solution formula of the quadratic equation.. - - uint64_t a = (uint64_t)piece0*((uint64_t)piece0 - 1)*((uint64_t)piece0 - 2) / 6; - int piece1 = int((1 + sqrt(8.0 * (index2 - a ) + 1)) / 2); - uint64_t b = (uint64_t)piece1 * (piece1 - 1) / 2; - int piece2 = int(index2 - a - b); - -#if 0 - if (!((piece0 > piece1 && piece1 > piece2))) - { - std::cout << index << " , " << index2 << "," << a << "," << sqrt(8.0 * (index2 - a) + 1); - } -#endif - - assert(piece0 > piece1 && piece1 > piece2); - - assert(piece2 < (int)fe_end_); - assert(piece1 < (int)fe_end_); - assert(piece0 < (int)fe_end_); - - raw_index /= triangle_fe_end; - - int king = (int)(raw_index /* % SQUARE_NB */); - assert(king < max_king_sq_); - - // Propagate king_sq and fe_end. - return fromKPPP((Square)king, (PieceSquare)piece0, (PieceSquare)piece1 , (PieceSquare)piece2); - } - - // Specify k,p0,p1,p2 to build KPPP instance. - // The king_sq and fe_end passed by set() which is internally retained are inherited. - KPPP fromKPPP(int king, PieceSquare p0, PieceSquare p1, PieceSquare p2) const - { - KPPP kppp(king, p0, p1, p2); - kppp.set(max_king_sq_, fe_end_,min_index()); - return kppp; - } - - // Get the index when counting the value of min_index() of this class as 0. - virtual uint64_t toRawIndex() const { - - // Macro similar to the one used in Bonanza 6.0 - // Precondition) i> j> k. - // NG in case of i==j,j==k. - auto PcPcPcOnSq = [this](int king, PieceSquare i, PieceSquare j , PieceSquare k) - { - // (i,j,k) in this triangular array is the element in the i-th row and the j-th column. - // 0th row 0th column 0th is the sum of the elements up to that point, so 0 + 0 + 1 + 3 + 6 + ... + (i)*(i-1)/2 = i*( i-1)*(i-2)/6 - // i-th row, j-th column, 0-th is j with j added. + j*(j-1) / 2 - // i-th row, j-th column and k-th row is k plus it. + k - assert(i > j && j > k); - - // PieceSquare type is assumed to be 32 bits, so if you do not pay attention to multiplication, it will overflow. - return (uint64_t)king * triangle_fe_end + (uint64_t)( - uint64_t(i)*(uint64_t(i) - 1) * (uint64_t(i) - 2) / 6 - + uint64_t(j)*(uint64_t(j) - 1) / 2 - + uint64_t(k) - ); - }; - - return PcPcPcOnSq(king_, piece0_, piece1_, piece2_); - } - - // When you construct this object using fromIndex(), you can get information with the following accessors. - int king() const { return king_; } - PieceSquare piece0() const { return piece0_; } - PieceSquare piece1() const { return piece1_; } - PieceSquare piece2() const { return piece2_; } - // Returns whether or not the dimension lowered with toLowerDimensions is inverse. - // Prepared to match KK, KKP and interface. This method always returns false for this KPPP class. - bool is_inverse() const { - return false; - } - - // Returns the number of elements in a triangular array. It is assumed that the kppp array is the following two-dimensional array. - // kppp[king_sq][triangle_fe_end]; - uint64_t get_triangle_fe_end() const { return triangle_fe_end; } - - // comparison operator - bool operator==(const KPPP& rhs) { - // piece0> piece1> piece2 is assumed, so there is no possibility of replacement. - return king() == rhs.king() && piece0() == rhs.piece0() && piece1() == rhs.piece1() && piece2() == rhs.piece2(); - } - bool operator!=(const KPPP& rhs) { return !(*this == rhs); } - - private: - - int king_; - PieceSquare piece0_, piece1_,piece2_; - - // The part of the square array of [fe_end][fe_end][fe_end] of kppp[king_sq][fe_end][fe_end][fe_end] is made into a triangular array. - // If kppp[king_sq][triangle_fe_end], the number of elements from the 0th row of this triangular array is 0,0,1,3,..., The nth row is n(n-1)/2. - // therefore, - // triangle_fe_end = Σn(n-1)/2 , n=0..fe_end-1 - // = fe_end * (fe_end - 1) * (fe_end - 2) / 6 - uint64_t triangle_fe_end; // ((uint64_t)PieceSquare::PS_END)*((uint64_t)PieceSquare::PS_END - 1)*((uint64_t)PieceSquare::PS_END - 2) / 6; - }; - - // Output for debugging. - static std::ostream& operator<<(std::ostream& os, KPPP rhs) - { - os << "KPPP(" << rhs.king() << "," << rhs.piece0() << "," << rhs.piece1() << "," << rhs.piece2() << ")"; - return os; - } - - // For learning about 4 pieces by KKPP. - // - // Same design as KPPP class. In KPPP class, treat as one with less p. - // The positions of the two balls are encoded as values ​​from 0 to king_sq-1. - // - // Later, regarding the pieces0 and 1 returned by this class, - // piece0() >piece1() - // It is, and it is necessary to keep this constraint even when passing piece0,1 in the constructor. - // - // Due to this constraint, PieceSquareZero cannot be assigned to piece0 and piece1 at the same time and passed. - // If you want to support learning of dropped frames, you need to devise with evaluate(). - struct KKPP: SerializerBase - { - protected: - KKPP(int king, PieceSquare p0, PieceSquare p1) : - king_(king), piece0_(p0), piece1_(p1) - { - assert(piece0_ > piece1_); - /* sort_piece(); */ - } - - public: - KKPP() {} - - virtual uint64_t size() const { return (uint64_t)max_king_sq_*triangle_fe_end; } - - // Set fe_end and king_sq. - // fe_end: fe_end assumed by this KPPP class - // king_sq: Number of balls to handle in KPPP. - // 9 steps x mirrors 9 steps x 5 squared squares (balls before and after) = 45*45 = 2025. - // Set this first using set() on the side that uses this KKPP class. - void set(int max_king_sq, uint64_t fe_end , uint64_t min_index) { - // This value is used in size(), and size() is used in SerializerBase::set(), so calculate first. - triangle_fe_end = fe_end * (fe_end - 1) / 2; - - SerializerBase::set(max_king_sq, fe_end, min_index); - } - - // number of dimension reductions - // For the time being, the dimension reduction of the mirror is not supported. I wonder if I'll do it here... (Because the memory for learning is a waste) -#define KKPP_LOWER_COUNT 1 - - // Get the index of the low-dimensional array. - //Note that the one with p0,p1,p2 swapped will not be returned. - // Also, the mirrored one is returned only when USE_KPPP_MIRROR_WRITE is enabled. - void toLowerDimensions(/*out*/ KKPP kkpp_[KPPP_LOWER_COUNT]) const - { - kkpp_[0] = fromKKPP(king_, piece0_, piece1_); - - // When mirroring, mir_piece will not be sorted. Need code to sort. - // We also need to define a mirror for king_. - } - - // builder that creates KKPP object from index (serial number) - KKPP fromIndex(uint64_t index) const { assert(index >= min_index()); return fromRawIndex(index - min_index()); } - - // builder that creates KKPP object from raw_index (number starting from 0, not serial number) - KKPP fromRawIndex(uint64_t raw_index) const - { - uint64_t index2 = raw_index % triangle_fe_end; - - // Write the expression to find piece0, piece1, piece2 from index2 here. - // This is the inverse function of index2 = i(i-1)/2 + j. - // Use the formula of the solution of the quadratic equation with j=0. - // When index2=0, it is a double root, but the smaller one does not satisfy i>j and is ignored. - - int piece0 = (int(sqrt(8 * index2 + 1)) + 1)/2; - int piece1 = int(index2 - piece0 * (piece0 - 1) /2 ); - - assert(piece0 > piece1); - - assert(piece1 < (int)fe_end_); - assert(piece0 < (int)fe_end_); - - raw_index /= triangle_fe_end; - - int king = (int)(raw_index /* % SQUARE_NB */); - assert(king < max_king_sq_); - - // Propagate king_sq and fe_end. - return fromKKPP(king, (PieceSquare)piece0, (PieceSquare)piece1); - } - - // Specify k,p0,p1 to build KKPP instance. - // The king_sq and fe_end passed by set() which is internally retained are inherited. - KKPP fromKKPP(int king, PieceSquare p0, PieceSquare p1) const - { - KKPP kkpp(king, p0, p1); - kkpp.set(max_king_sq_, fe_end_,min_index()); - return kkpp; - } - - // Get the index when counting the value of min_index() of this class as 0. - virtual uint64_t toRawIndex() const { - - // Macro similar to the one used in Bonanza 6.0 - // Precondition) i> j. - // NG in case of i==j,j==k. - auto PcPcOnSq = [this](int king, PieceSquare i, PieceSquare j) - { - assert(i > j); - - // PieceSquare type is assumed to be 32 bits, so if you do not pay attention to multiplication, it will overflow. - return (uint64_t)king * triangle_fe_end + (uint64_t)( - + uint64_t(i)*(uint64_t(i) - 1) / 2 - + uint64_t(j) - ); - }; - - return PcPcOnSq(king_, piece0_, piece1_); - } - - // When you construct this object using fromIndex(), fromKKPP(), you can get information with the following accessors. - int king() const { return king_; } - PieceSquare piece0() const { return piece0_; } - PieceSquare piece1() const { return piece1_; } - - // Returns whether or not the dimension lowered with toLowerDimensions is inverse. - // Prepared to match KK, KKP and interface. In this KKPP class, this method always returns false. - bool is_inverse() const { - return false; - } - - //Returns the number of elements in a triangular array. It is assumed that the kkpp array is the following two-dimensional array. - // kkpp[king_sq][triangle_fe_end]; - uint64_t get_triangle_fe_end() const { return triangle_fe_end; } - - // comparison operator - bool operator==(const KKPP& rhs) { - // Since piece0> piece1 is assumed, there is no possibility of replacement. - return king() == rhs.king() && piece0() == rhs.piece0() && piece1() == rhs.piece1(); - } - bool operator!=(const KKPP& rhs) { return !(*this == rhs); } - - private: - - int king_; - PieceSquare piece0_, piece1_; - - // Triangularize the square array part of [fe_end][fe_end] of kppp[king_sq][fe_end][fe_end]. - uint64_t triangle_fe_end = 0; - - }; - - // Output for debugging. - static std::ostream& operator<<(std::ostream& os, KKPP rhs) - { - os << "KKPP(" << rhs.king() << "," << rhs.piece0() << "," << rhs.piece1() << ")"; - return os; - } - - } #endif // defined (EVAL_LEARN) diff --git a/src/material.cpp b/src/material.cpp index 0ef9926f..870a5e11 100644 --- a/src/material.cpp +++ b/src/material.cpp @@ -130,7 +130,7 @@ Entry* probe(const Position& pos) { Value npm_w = pos.non_pawn_material(WHITE); Value npm_b = pos.non_pawn_material(BLACK); - Value npm = Utility::clamp(npm_w + npm_b, EndgameLimit, MidgameLimit); + Value npm = std::clamp(npm_w + npm_b, EndgameLimit, MidgameLimit); // Map total non-pawn material into [PHASE_ENDGAME, PHASE_MIDGAME] e->gamePhase = Phase(((npm - EndgameLimit) * PHASE_MIDGAME) / (MidgameLimit - EndgameLimit)); diff --git a/src/misc.cpp b/src/misc.cpp index 725450c2..851280fe 100644 --- a/src/misc.cpp +++ b/src/misc.cpp @@ -51,6 +51,11 @@ typedef bool(*fun3_t)(HANDLE, CONST GROUP_AFFINITY*, PGROUP_AFFINITY); #include #endif +#if defined(__APPLE__) || defined(__ANDROID__) || defined(__OpenBSD__) || (defined(__GLIBCXX__) && !defined(_GLIBCXX_HAVE_ALIGNED_ALLOC) && !defined(_WIN32)) +#define POSIXALIGNEDALLOC +#include +#endif + #include "misc.h" #include "thread.h" @@ -214,26 +219,33 @@ const std::string compiler_info() { compiler += "\nCompilation settings include: "; compiler += (Is64Bit ? " 64bit" : " 32bit"); + #if defined(USE_VNNI) + compiler += " VNNI"; + #endif #if defined(USE_AVX512) compiler += " AVX512"; #endif + compiler += (HasPext ? " BMI2" : ""); #if defined(USE_AVX2) compiler += " AVX2"; #endif - #if defined(USE_SSE42) - compiler += " SSE42"; - #endif #if defined(USE_SSE41) compiler += " SSE41"; #endif #if defined(USE_SSSE3) compiler += " SSSE3"; #endif - #if defined(USE_SSE3) - compiler += " SSE3"; + #if defined(USE_SSE2) + compiler += " SSE2"; #endif - compiler += (HasPext ? " BMI2" : ""); - compiler += (HasPopCnt ? " POPCNT" : ""); + compiler += (HasPopCnt ? " POPCNT" : ""); + #if defined(USE_MMX) + compiler += " MMX"; + #endif + #if defined(USE_NEON) + compiler += " NEON"; + #endif + #if !defined(NDEBUG) compiler += " DEBUG"; #endif @@ -316,14 +328,17 @@ void prefetch(void* addr) { #endif -/// Wrappers for systems where the c++17 implementation doesn't guarantee the availability of aligned_alloc. -/// Memory allocated with std_aligned_alloc must be freed with std_aligned_free. -/// + +/// std_aligned_alloc() is our wrapper for systems where the c++17 implementation +/// does not guarantee the availability of aligned_alloc(). Memory allocated with +/// std_aligned_alloc() must be freed with std_aligned_free(). void* std_aligned_alloc(size_t alignment, size_t size) { -#if (defined(__APPLE__) && defined(_LIBCPP_HAS_C11_FEATURES)) || defined(__ANDROID__) || defined(__OpenBSD__) || (defined(__GLIBCXX__) && !defined(_GLIBCXX_HAVE_ALIGNED_ALLOC) && !defined(_WIN32)) - return aligned_alloc(alignment, size); -#elif (defined(_WIN32) || (defined(__APPLE__) && !defined(_LIBCPP_HAS_C11_FEATURES))) + +#if defined(POSIXALIGNEDALLOC) + void *mem; + return posix_memalign(&mem, alignment, size) ? nullptr : mem; +#elif defined(_WIN32) return _mm_malloc(size, alignment); #else return std::aligned_alloc(alignment, size); @@ -331,16 +346,17 @@ void* std_aligned_alloc(size_t alignment, size_t size) { } void std_aligned_free(void* ptr) { -#if (defined(__APPLE__) && defined(_LIBCPP_HAS_C11_FEATURES)) || defined(__ANDROID__) || defined(__OpenBSD__) || (defined(__GLIBCXX__) && !defined(_GLIBCXX_HAVE_ALIGNED_ALLOC) && !defined(_WIN32)) + +#if defined(POSIXALIGNEDALLOC) free(ptr); -#elif (defined(_WIN32) || (defined(__APPLE__) && !defined(_LIBCPP_HAS_C11_FEATURES))) +#elif defined(_WIN32) _mm_free(ptr); #else free(ptr); #endif } -/// aligned_ttmem_alloc() will return suitably aligned memory, and if possible use large pages. +/// aligned_ttmem_alloc() will return suitably aligned memory, if possible using large pages. /// The returned pointer is the aligned one, while the mem argument is the one that needs /// to be passed to free. With c++17 some of this functionality could be simplified. @@ -352,7 +368,9 @@ void* aligned_ttmem_alloc(size_t allocSize, void*& mem) { size_t size = ((allocSize + alignment - 1) / alignment) * alignment; // multiple of alignment if (posix_memalign(&mem, alignment, size)) mem = nullptr; +#if defined(MADV_HUGEPAGE) madvise(mem, allocSize, MADV_HUGEPAGE); +#endif return mem; } diff --git a/src/misc.h b/src/misc.h index ecef028f..19bb008c 100644 --- a/src/misc.h +++ b/src/misc.h @@ -67,14 +67,6 @@ std::ostream& operator<<(std::ostream&, SyncCout); #define sync_cout std::cout << IO_LOCK #define sync_endl std::endl << IO_UNLOCK -namespace Utility { - -/// Clamp a value between lo and hi. Available in c++17. -template constexpr const T& clamp(const T& v, const T& lo, const T& hi) { - return v < lo ? lo : v > hi ? hi : v; -} - -} /// xorshift64star Pseudo-Random Number Generator /// This class is based on original code written and dedicated diff --git a/src/movegen.cpp b/src/movegen.cpp index d74df4c3..3340f65c 100644 --- a/src/movegen.cpp +++ b/src/movegen.cpp @@ -248,7 +248,7 @@ namespace { *moveList++ = make_move(ksq, pop_lsb(&b)); if ((Type != CAPTURES) && pos.can_castle(Us & ANY_CASTLING)) - for(CastlingRights cr : { Us & KING_SIDE, Us & QUEEN_SIDE } ) + for (CastlingRights cr : { Us & KING_SIDE, Us & QUEEN_SIDE } ) if (!pos.castling_impeded(cr) && pos.can_castle(cr)) *moveList++ = make(ksq, pos.castling_rook_square(cr)); } diff --git a/src/movepick.cpp b/src/movepick.cpp index 96a44449..153d323e 100644 --- a/src/movepick.cpp +++ b/src/movepick.cpp @@ -182,7 +182,7 @@ top: --endMoves; ++stage; - /* fallthrough */ + [[fallthrough]]; case REFUTATION: if (select([&](){ return *cur != MOVE_NONE @@ -190,7 +190,7 @@ top: && pos.pseudo_legal(*cur); })) return *(cur - 1); ++stage; - /* fallthrough */ + [[fallthrough]]; case QUIET_INIT: if (!skipQuiets) @@ -203,7 +203,7 @@ top: } ++stage; - /* fallthrough */ + [[fallthrough]]; case QUIET: if ( !skipQuiets @@ -217,7 +217,7 @@ top: endMoves = endBadCaptures; ++stage; - /* fallthrough */ + [[fallthrough]]; case BAD_CAPTURE: return select([](){ return true; }); @@ -228,7 +228,7 @@ top: score(); ++stage; - /* fallthrough */ + [[fallthrough]]; case EVASION: return select([](){ return true; }); @@ -246,14 +246,14 @@ top: return MOVE_NONE; ++stage; - /* fallthrough */ + [[fallthrough]]; case QCHECK_INIT: cur = moves; endMoves = generate(pos, cur); ++stage; - /* fallthrough */ + [[fallthrough]]; case QCHECK: return select([](){ return true; }); diff --git a/src/movepick.h b/src/movepick.h index f080935a..4c0ad551 100644 --- a/src/movepick.h +++ b/src/movepick.h @@ -86,9 +86,9 @@ enum StatsType { NoCaptures, Captures }; /// the move's from and to squares, see www.chessprogramming.org/Butterfly_Boards typedef Stats ButterflyHistory; -/// At higher depths LowPlyHistory records successful quiet moves near the root and quiet -/// moves which are/were in the PV (ttPv) -/// It is cleared with each new search and filled during iterative deepening +/// At higher depths LowPlyHistory records successful quiet moves near the root +/// and quiet moves which are/were in the PV (ttPv). It is cleared with each new +/// search and filled during iterative deepening. constexpr int MAX_LPH = 4; typedef Stats LowPlyHistory; diff --git a/src/nnue/evaluate_nnue.cpp b/src/nnue/evaluate_nnue.cpp index a28a4573..a2845c96 100644 --- a/src/nnue/evaluate_nnue.cpp +++ b/src/nnue/evaluate_nnue.cpp @@ -29,30 +29,29 @@ #include "evaluate_nnue.h" -ExtPieceSquare kpp_board_index[PIECE_NB] = { - // convention: W - us, B - them - // viewed from other side, W and B are reversed - { PS_NONE, PS_NONE }, - { PS_W_PAWN, PS_B_PAWN }, - { PS_W_KNIGHT, PS_B_KNIGHT }, - { PS_W_BISHOP, PS_B_BISHOP }, - { PS_W_ROOK, PS_B_ROOK }, - { PS_W_QUEEN, PS_B_QUEEN }, - { PS_W_KING, PS_B_KING }, - { PS_NONE, PS_NONE }, - { PS_NONE, PS_NONE }, - { PS_B_PAWN, PS_W_PAWN }, - { PS_B_KNIGHT, PS_W_KNIGHT }, - { PS_B_BISHOP, PS_W_BISHOP }, - { PS_B_ROOK, PS_W_ROOK }, - { PS_B_QUEEN, PS_W_QUEEN }, - { PS_B_KING, PS_W_KING }, - { PS_NONE, PS_NONE } -}; - - namespace Eval::NNUE { + uint32_t kpp_board_index[PIECE_NB][COLOR_NB] = { + // convention: W - us, B - them + // viewed from other side, W and B are reversed + { PS_NONE, PS_NONE }, + { PS_W_PAWN, PS_B_PAWN }, + { PS_W_KNIGHT, PS_B_KNIGHT }, + { PS_W_BISHOP, PS_B_BISHOP }, + { PS_W_ROOK, PS_B_ROOK }, + { PS_W_QUEEN, PS_B_QUEEN }, + { PS_W_KING, PS_B_KING }, + { PS_NONE, PS_NONE }, + { PS_NONE, PS_NONE }, + { PS_B_PAWN, PS_W_PAWN }, + { PS_B_KNIGHT, PS_W_KNIGHT }, + { PS_B_BISHOP, PS_W_BISHOP }, + { PS_B_ROOK, PS_W_ROOK }, + { PS_B_QUEEN, PS_W_QUEEN }, + { PS_B_KING, PS_W_KING }, + { PS_NONE, PS_NONE } + }; + // Input feature converter AlignedPtr feature_transformer; @@ -86,7 +85,7 @@ namespace Eval::NNUE { bool ReadParameters(std::istream& stream, const AlignedPtr& pointer) { std::uint32_t header; - stream.read(reinterpret_cast(&header), sizeof(header)); + header = read_little_endian(stream); if (!stream || header != T::GetHashValue()) return false; return pointer->ReadParameters(stream); } @@ -109,13 +108,13 @@ namespace Eval::NNUE { } // Read network header - bool ReadHeader(std::istream& stream, - std::uint32_t* hash_value, std::string* architecture) { - + bool ReadHeader(std::istream& stream, std::uint32_t* hash_value, std::string* architecture) + { std::uint32_t version, size; - stream.read(reinterpret_cast(&version), sizeof(version)); - stream.read(reinterpret_cast(hash_value), sizeof(*hash_value)); - stream.read(reinterpret_cast(&size), sizeof(size)); + + version = read_little_endian(stream); + *hash_value = read_little_endian(stream); + size = read_little_endian(stream); if (!stream || version != kVersion) return false; architecture->resize(size); stream.read(&(*architecture)[0], size); @@ -202,10 +201,7 @@ namespace Eval::NNUE { // Evaluation function. Perform differential calculation. Value evaluate(const Position& pos) { - Value v = ComputeScore(pos, false); - v = Utility::clamp(v, VALUE_TB_LOSS_IN_MAX_PLY + 1, VALUE_TB_WIN_IN_MAX_PLY - 1); - - return v; + return ComputeScore(pos, false); } // Evaluation function. Perform full calculation. diff --git a/src/nnue/features/feature_set.h b/src/nnue/features/feature_set.h index ec34a486..b933d2d9 100644 --- a/src/nnue/features/feature_set.h +++ b/src/nnue/features/feature_set.h @@ -106,8 +106,7 @@ namespace Eval::NNUE::Features { reset[perspective] = false; switch (trigger) { case TriggerEvent::kFriendKingMoved: - reset[perspective] = - dp.pieceId[0] == PIECE_ID_KING + perspective; + reset[perspective] = dp.piece[0] == make_piece(perspective, KING); break; default: assert(false); diff --git a/src/nnue/features/half_kp.cpp b/src/nnue/features/half_kp.cpp index 628add6e..88e384a3 100644 --- a/src/nnue/features/half_kp.cpp +++ b/src/nnue/features/half_kp.cpp @@ -23,25 +23,17 @@ namespace Eval::NNUE::Features { - // Find the index of the feature quantity from the king position and PieceSquare - template - inline IndexType HalfKP::MakeIndex(Square sq_k, PieceSquare p) { - return static_cast(PS_END) * static_cast(sq_k) + p; + // Orient a square according to perspective (rotates by 180 for black) + inline Square orient(Color perspective, Square s) { + return Square(int(s) ^ (bool(perspective) * 63)); } - // Get pieces information + // Find the index of the feature quantity from the king position and PieceSquare template - inline void HalfKP::GetPieces( - const Position& pos, Color perspective, - PieceSquare** pieces, Square* sq_target_k) { + inline IndexType HalfKP::MakeIndex( + Color perspective, Square s, Piece pc, Square ksq) { - *pieces = (perspective == BLACK) ? - pos.eval_list()->piece_list_fb() : - pos.eval_list()->piece_list_fw(); - const PieceId target = (AssociatedKing == Side::kFriend) ? - static_cast(PIECE_ID_KING + perspective) : - static_cast(PIECE_ID_KING + ~perspective); - *sq_target_k = static_cast(((*pieces)[target] - PS_W_KING) % SQUARE_NB); + return IndexType(orient(perspective, s) + kpp_board_index[pc][perspective] + PS_END * ksq); } // Get a list of indices for active features @@ -49,16 +41,11 @@ namespace Eval::NNUE::Features { void HalfKP::AppendActiveIndices( const Position& pos, Color perspective, IndexList* active) { - // Do nothing if array size is small to avoid compiler warning - if (RawFeatures::kMaxActiveDimensions < kMaxActiveDimensions) return; - - PieceSquare* pieces; - Square sq_target_k; - GetPieces(pos, perspective, &pieces, &sq_target_k); - for (PieceId i = PIECE_ID_ZERO; i < PIECE_ID_KING; ++i) { - if (pieces[i] != PS_NONE) { - active->push_back(MakeIndex(sq_target_k, pieces[i])); - } + Square ksq = orient(perspective, pos.square(perspective)); + Bitboard bb = pos.pieces() & ~pos.pieces(KING); + while (bb) { + Square s = pop_lsb(&bb); + active->push_back(MakeIndex(perspective, s, pos.piece_on(s), ksq)); } } @@ -68,22 +55,15 @@ namespace Eval::NNUE::Features { const Position& pos, Color perspective, IndexList* removed, IndexList* added) { - PieceSquare* pieces; - Square sq_target_k; - GetPieces(pos, perspective, &pieces, &sq_target_k); + Square ksq = orient(perspective, pos.square(perspective)); const auto& dp = pos.state()->dirtyPiece; for (int i = 0; i < dp.dirty_num; ++i) { - if (dp.pieceId[i] >= PIECE_ID_KING) continue; - const auto old_p = static_cast( - dp.old_piece[i].from[perspective]); - if (old_p != PS_NONE) { - removed->push_back(MakeIndex(sq_target_k, old_p)); - } - const auto new_p = static_cast( - dp.new_piece[i].from[perspective]); - if (new_p != PS_NONE) { - added->push_back(MakeIndex(sq_target_k, new_p)); - } + Piece pc = dp.piece[i]; + if (type_of(pc) == KING) continue; + if (dp.from[i] != SQ_NONE) + removed->push_back(MakeIndex(perspective, dp.from[i], pc, ksq)); + if (dp.to[i] != SQ_NONE) + added->push_back(MakeIndex(perspective, dp.to[i], pc, ksq)); } } diff --git a/src/nnue/features/half_kp.h b/src/nnue/features/half_kp.h index 99842eea..ee6a8df3 100644 --- a/src/nnue/features/half_kp.h +++ b/src/nnue/features/half_kp.h @@ -41,7 +41,7 @@ namespace Eval::NNUE::Features { static constexpr IndexType kDimensions = static_cast(SQUARE_NB) * static_cast(PS_END); // Maximum number of simultaneously active features - static constexpr IndexType kMaxActiveDimensions = PIECE_ID_KING; + static constexpr IndexType kMaxActiveDimensions = 30; // Kings don't count // Trigger for full calculation instead of difference calculation static constexpr TriggerEvent kRefreshTrigger = TriggerEvent::kFriendKingMoved; @@ -53,13 +53,9 @@ namespace Eval::NNUE::Features { static void AppendChangedIndices(const Position& pos, Color perspective, IndexList* removed, IndexList* added); - // Index of a feature for a given king position and another piece on some square - static IndexType MakeIndex(Square sq_k, PieceSquare p); - private: - // Get pieces information - static void GetPieces(const Position& pos, Color perspective, - PieceSquare** pieces, Square* sq_target_k); + // Index of a feature for a given king position and another piece on some square + static IndexType MakeIndex(Color perspective, Square s, Piece pc, Square sq_k); }; } // namespace Eval::NNUE::Features diff --git a/src/nnue/features/half_relative_kp.cpp b/src/nnue/features/half_relative_kp.cpp index 7f15ff39..015ecb73 100644 --- a/src/nnue/features/half_relative_kp.cpp +++ b/src/nnue/features/half_relative_kp.cpp @@ -11,49 +11,41 @@ namespace NNUE { namespace Features { +// Orient a square according to perspective (rotates by 180 for black) +inline Square orient(Color perspective, Square s) { + return Square(int(s) ^ (bool(perspective) * 63)); +} + // Find the index of the feature quantity from the ball position and PieceSquare template inline IndexType HalfRelativeKP::MakeIndex( - Square sq_k, PieceSquare p) { + Color perspective, Square s, Piece pc, Square sq_k) { + const IndexType p = IndexType(orient(perspective, s) + kpp_board_index[pc][perspective]); + return MakeIndex(sq_k, p); +} + +// Find the index of the feature quantity from the ball position and PieceSquare +template +inline IndexType HalfRelativeKP::MakeIndex( + Square sq_k, IndexType p) { constexpr IndexType W = kBoardWidth; constexpr IndexType H = kBoardHeight; - const IndexType piece_index = (p - PieceSquare::PS_W_PAWN) / SQUARE_NB; - const Square sq_p = static_cast((p - PieceSquare::PS_W_PAWN) % SQUARE_NB); + const IndexType piece_index = (p - PS_W_PAWN) / SQUARE_NB; + const Square sq_p = static_cast((p - PS_W_PAWN) % SQUARE_NB); const IndexType relative_file = file_of(sq_p) - file_of(sq_k) + (W / 2); const IndexType relative_rank = rank_of(sq_p) - rank_of(sq_k) + (H / 2); return H * W * piece_index + H * relative_file + relative_rank; } -// Get the piece information -template -inline void HalfRelativeKP::GetPieces( - const Position& pos, Color perspective, - PieceSquare** pieces, Square* sq_target_k) { - *pieces = (perspective == BLACK) ? - pos.eval_list()->piece_list_fb() : - pos.eval_list()->piece_list_fw(); - const PieceId target = (AssociatedKing == Side::kFriend) ? - static_cast(PieceId::PIECE_ID_KING + perspective) : - static_cast(PieceId::PIECE_ID_KING + ~perspective); - *sq_target_k = static_cast(((*pieces)[target] - PieceSquare::PS_W_KING) % SQUARE_NB); -} - // Get a list of indices with a value of 1 among the features template void HalfRelativeKP::AppendActiveIndices( const Position& pos, Color perspective, IndexList* active) { - // do nothing if array size is small to avoid compiler warning - if (RawFeatures::kMaxActiveDimensions < kMaxActiveDimensions) return; - - PieceSquare* pieces; - Square sq_target_k; - GetPieces(pos, perspective, &pieces, &sq_target_k); - for (PieceId i = PieceId::PIECE_ID_ZERO; i < PieceId::PIECE_ID_KING; ++i) { - if (pieces[i] >= PieceSquare::PS_W_PAWN) { - if (pieces[i] != PieceSquare::PS_NONE) { - active->push_back(MakeIndex(sq_target_k, pieces[i])); - } - } + Square ksq = orient(perspective, pos.square(perspective)); + Bitboard bb = pos.pieces() & ~pos.pieces(KING); + while (bb) { + Square s = pop_lsb(&bb); + active->push_back(MakeIndex(perspective, s, pos.piece_on(s), ksq)); } } @@ -62,26 +54,15 @@ template void HalfRelativeKP::AppendChangedIndices( const Position& pos, Color perspective, IndexList* removed, IndexList* added) { - PieceSquare* pieces; - Square sq_target_k; - GetPieces(pos, perspective, &pieces, &sq_target_k); + Square ksq = orient(perspective, pos.square(perspective)); const auto& dp = pos.state()->dirtyPiece; for (int i = 0; i < dp.dirty_num; ++i) { - if (dp.pieceId[i] >= PieceId::PIECE_ID_KING) continue; - const auto old_p = static_cast( - dp.old_piece[i].from[perspective]); - if (old_p >= PieceSquare::PS_W_PAWN) { - if (old_p != PieceSquare::PS_NONE) { - removed->push_back(MakeIndex(sq_target_k, old_p)); - } - } - const auto new_p = static_cast( - dp.new_piece[i].from[perspective]); - if (new_p >= PieceSquare::PS_W_PAWN) { - if (new_p != PieceSquare::PS_NONE) { - added->push_back(MakeIndex(sq_target_k, new_p)); - } - } + Piece pc = dp.piece[i]; + if (type_of(pc) == KING) continue; + if (dp.from[i] != SQ_NONE) + removed->push_back(MakeIndex(perspective, dp.from[i], pc, ksq)); + if (dp.to[i] != SQ_NONE) + added->push_back(MakeIndex(perspective, dp.to[i], pc, ksq)); } } diff --git a/src/nnue/features/half_relative_kp.h b/src/nnue/features/half_relative_kp.h index 9561ab91..2d4182e4 100644 --- a/src/nnue/features/half_relative_kp.h +++ b/src/nnue/features/half_relative_kp.h @@ -25,7 +25,7 @@ class HalfRelativeKP { static constexpr std::uint32_t kHashValue = 0xF9180919u ^ (AssociatedKing == Side::kFriend); // Piece type excluding balls - static constexpr IndexType kNumPieceKinds = (PieceSquare::PS_END - PieceSquare::PS_W_PAWN) / SQUARE_NB; + static constexpr IndexType kNumPieceKinds = 5 * 2; // width of the virtual board with the ball in the center static constexpr IndexType kBoardWidth = FILE_NB * 2 - 1; // height of a virtual board with balls in the center @@ -34,7 +34,7 @@ class HalfRelativeKP { static constexpr IndexType kDimensions = kNumPieceKinds * kBoardHeight * kBoardWidth; // The maximum value of the number of indexes whose value is 1 at the same time among the feature values - static constexpr IndexType kMaxActiveDimensions = PieceId::PIECE_ID_KING; + static constexpr IndexType kMaxActiveDimensions = 30; // Kings don't count // Timing of full calculation instead of difference calculation static constexpr TriggerEvent kRefreshTrigger = (AssociatedKing == Side::kFriend) ? @@ -49,12 +49,9 @@ class HalfRelativeKP { IndexList* removed, IndexList* added); // Find the index of the feature quantity from the ball position and PieceSquare - static IndexType MakeIndex(Square sq_k, PieceSquare p); - - private: - // Get the piece information - static void GetPieces(const Position& pos, Color perspective, - PieceSquare** pieces, Square* sq_target_k); + static IndexType MakeIndex(Square s, IndexType p); + // Find the index of the feature quantity from the ball position and PieceSquare + static IndexType MakeIndex(Color perspective, Square s, Piece pc, Square sq_k); }; } // namespace Features diff --git a/src/nnue/features/k.cpp b/src/nnue/features/k.cpp index 001e4b98..314b1338 100644 --- a/src/nnue/features/k.cpp +++ b/src/nnue/features/k.cpp @@ -11,19 +11,21 @@ namespace NNUE { namespace Features { +// Orient a square according to perspective (rotates by 180 for black) +inline Square orient(Color perspective, Square s) { + return Square(int(s) ^ (bool(perspective) * 63)); +} + +// Index of a feature for a given king position. +IndexType K::MakeIndex(Color perspective, Square s, Color king_color) { + return IndexType(orient(perspective, s) + bool(perspective ^ king_color) * 64); +} + // Get a list of indices with a value of 1 among the features void K::AppendActiveIndices( const Position& pos, Color perspective, IndexList* active) { - // do nothing if array size is small to avoid compiler warning - if (RawFeatures::kMaxActiveDimensions < kMaxActiveDimensions) return; - - const PieceSquare* pieces = (perspective == BLACK) ? - pos.eval_list()->piece_list_fb() : - pos.eval_list()->piece_list_fw(); - assert(pieces[PieceId::PIECE_ID_BKING] != PieceSquare::PS_NONE); - assert(pieces[PieceId::PIECE_ID_WKING] != PieceSquare::PS_NONE); - for (PieceId i = PieceId::PIECE_ID_KING; i < PieceId::PIECE_ID_NONE; ++i) { - active->push_back(pieces[i] - PieceSquare::PS_END); + for (auto color : Colors) { + active->push_back(MakeIndex(perspective, pos.square(color), color)); } } @@ -32,12 +34,19 @@ void K::AppendChangedIndices( const Position& pos, Color perspective, IndexList* removed, IndexList* added) { const auto& dp = pos.state()->dirtyPiece; - if (dp.pieceId[0] >= PieceId::PIECE_ID_KING) { - removed->push_back( - dp.old_piece[0].from[perspective] - PieceSquare::PS_END); - added->push_back( - dp.new_piece[0].from[perspective] - PieceSquare::PS_END); + Color king_color; + if (dp.piece[0] == Piece::W_KING) { + king_color = WHITE; } + else if (dp.piece[0] == Piece::B_KING) { + king_color = BLACK; + } + else { + return; + } + + removed->push_back(MakeIndex(perspective, dp.from[0], king_color)); + added->push_back(MakeIndex(perspective, dp.to[0], king_color)); } } // namespace Features diff --git a/src/nnue/features/k.h b/src/nnue/features/k.h index 28431010..0c394f4e 100644 --- a/src/nnue/features/k.h +++ b/src/nnue/features/k.h @@ -35,6 +35,10 @@ class K { // Get a list of indices whose values ​​have changed from the previous one in the feature quantity static void AppendChangedIndices(const Position& pos, Color perspective, IndexList* removed, IndexList* added); + +private: + // Index of a feature for a given king position. + static IndexType MakeIndex(Color perspective, Square s, Color king_color); }; } // namespace Features diff --git a/src/nnue/features/p.cpp b/src/nnue/features/p.cpp index 8b24f544..b4a6faf9 100644 --- a/src/nnue/features/p.cpp +++ b/src/nnue/features/p.cpp @@ -11,19 +11,24 @@ namespace NNUE { namespace Features { +// Orient a square according to perspective (rotates by 180 for black) +inline Square orient(Color perspective, Square s) { + return Square(int(s) ^ (bool(perspective) * 63)); +} + +// Find the index of the feature quantity from the king position and PieceSquare +inline IndexType P::MakeIndex( + Color perspective, Square s, Piece pc) { + return IndexType(orient(perspective, s) + kpp_board_index[pc][perspective]); +} + // Get a list of indices with a value of 1 among the features void P::AppendActiveIndices( const Position& pos, Color perspective, IndexList* active) { - // do nothing if array size is small to avoid compiler warning - if (RawFeatures::kMaxActiveDimensions < kMaxActiveDimensions) return; - - const PieceSquare* pieces = (perspective == BLACK) ? - pos.eval_list()->piece_list_fb() : - pos.eval_list()->piece_list_fw(); - for (PieceId i = PieceId::PIECE_ID_ZERO; i < PieceId::PIECE_ID_KING; ++i) { - if (pieces[i] != PieceSquare::PS_NONE) { - active->push_back(pieces[i]); - } + Bitboard bb = pos.pieces() & ~pos.pieces(KING); + while (bb) { + Square s = pop_lsb(&bb); + active->push_back(MakeIndex(perspective, s, pos.piece_on(s))); } } @@ -33,13 +38,12 @@ void P::AppendChangedIndices( IndexList* removed, IndexList* added) { const auto& dp = pos.state()->dirtyPiece; for (int i = 0; i < dp.dirty_num; ++i) { - if (dp.pieceId[i] >= PieceId::PIECE_ID_KING) continue; - if (dp.old_piece[i].from[perspective] != PieceSquare::PS_NONE) { - removed->push_back(dp.old_piece[i].from[perspective]); - } - if (dp.new_piece[i].from[perspective] != PieceSquare::PS_NONE) { - added->push_back(dp.new_piece[i].from[perspective]); - } + Piece pc = dp.piece[i]; + if (type_of(pc) == KING) continue; + if (dp.from[i] != SQ_NONE) + removed->push_back(MakeIndex(perspective, dp.from[i], pc)); + if (dp.to[i] != SQ_NONE) + added->push_back(MakeIndex(perspective, dp.to[i], pc)); } } diff --git a/src/nnue/features/p.h b/src/nnue/features/p.h index 2a83c4ad..b3d4191e 100644 --- a/src/nnue/features/p.h +++ b/src/nnue/features/p.h @@ -22,9 +22,9 @@ class P { // Hash value embedded in the evaluation function file static constexpr std::uint32_t kHashValue = 0x764CFB4Bu; // number of feature dimensions - static constexpr IndexType kDimensions = PieceSquare::PS_END; + static constexpr IndexType kDimensions = PS_END; // The maximum value of the number of indexes whose value is 1 at the same time among the feature values - static constexpr IndexType kMaxActiveDimensions = PieceId::PIECE_ID_KING; + static constexpr IndexType kMaxActiveDimensions = 30; // Kings don't count // Timing of full calculation instead of difference calculation static constexpr TriggerEvent kRefreshTrigger = TriggerEvent::kNone; @@ -35,6 +35,10 @@ class P { // Get a list of indices whose values ​​have changed from the previous one in the feature quantity static void AppendChangedIndices(const Position& pos, Color perspective, IndexList* removed, IndexList* added); + + private: + // Index of a feature for a given piece on some square + static IndexType MakeIndex(Color perspective, Square s, Piece pc); }; } // namespace Features diff --git a/src/nnue/layers/affine_transform.h b/src/nnue/layers/affine_transform.h index 7336be52..f24578a8 100644 --- a/src/nnue/layers/affine_transform.h +++ b/src/nnue/layers/affine_transform.h @@ -70,11 +70,10 @@ namespace Eval::NNUE::Layers { // Read network parameters bool ReadParameters(std::istream& stream) { if (!previous_layer_.ReadParameters(stream)) return false; - stream.read(reinterpret_cast(biases_), - kOutputDimensions * sizeof(BiasType)); - stream.read(reinterpret_cast(weights_), - kOutputDimensions * kPaddedInputDimensions * - sizeof(WeightType)); + for (std::size_t i = 0; i < kOutputDimensions; ++i) + biases_[i] = read_little_endian(stream); + for (std::size_t i = 0; i < kOutputDimensions * kPaddedInputDimensions; ++i) + weights_[i] = read_little_endian(stream); return !stream.fail(); } @@ -98,19 +97,32 @@ namespace Eval::NNUE::Layers { #if defined(USE_AVX512) constexpr IndexType kNumChunks = kPaddedInputDimensions / (kSimdWidth * 2); - const __m512i kOnes = _mm512_set1_epi16(1); const auto input_vector = reinterpret_cast(input); + #if !defined(USE_VNNI) + const __m512i kOnes = _mm512_set1_epi16(1); + #endif #elif defined(USE_AVX2) constexpr IndexType kNumChunks = kPaddedInputDimensions / kSimdWidth; - const __m256i kOnes = _mm256_set1_epi16(1); const auto input_vector = reinterpret_cast(input); + #if !defined(USE_VNNI) + const __m256i kOnes = _mm256_set1_epi16(1); + #endif - #elif defined(USE_SSSE3) + #elif defined(USE_SSE2) constexpr IndexType kNumChunks = kPaddedInputDimensions / kSimdWidth; + #ifndef USE_SSSE3 + const __m128i kZeros = _mm_setzero_si128(); + #else const __m128i kOnes = _mm_set1_epi16(1); + #endif const auto input_vector = reinterpret_cast(input); + #elif defined(USE_MMX) + constexpr IndexType kNumChunks = kPaddedInputDimensions / kSimdWidth; + const __m64 kZeros = _mm_setzero_si64(); + const auto input_vector = reinterpret_cast(input); + #elif defined(USE_NEON) constexpr IndexType kNumChunks = kPaddedInputDimensions / kSimdWidth; const auto input_vector = reinterpret_cast(input); @@ -123,60 +135,115 @@ namespace Eval::NNUE::Layers { __m512i sum = _mm512_setzero_si512(); const auto row = reinterpret_cast(&weights_[offset]); for (IndexType j = 0; j < kNumChunks; ++j) { - __m512i product = _mm512_maddubs_epi16( - _mm512_load_si512(&input_vector[j]), _mm512_load_si512(&row[j])); + #if defined(USE_VNNI) + sum = _mm512_dpbusd_epi32(sum, _mm512_loadA_si512(&input_vector[j]), _mm512_load_si512(&row[j])); + #else + __m512i product = _mm512_maddubs_epi16(_mm512_loadA_si512(&input_vector[j]), _mm512_load_si512(&row[j])); product = _mm512_madd_epi16(product, kOnes); sum = _mm512_add_epi32(sum, product); + #endif } - output[i] = _mm512_reduce_add_epi32(sum) + biases_[i]; // Note: Changing kMaxSimdWidth from 32 to 64 breaks loading existing networks. // As a result kPaddedInputDimensions may not be an even multiple of 64(512bit) // and we have to do one more 256bit chunk. if (kPaddedInputDimensions != kNumChunks * kSimdWidth * 2) { - const auto iv_256 = reinterpret_cast(input); - const auto row_256 = reinterpret_cast(&weights_[offset]); - int j = kNumChunks * 2; - - __m256i sum256 = _mm256_maddubs_epi16( - _mm256_load_si256(&iv_256[j]), _mm256_load_si256(&row_256[j])); - sum256 = _mm256_madd_epi16(sum256, _mm256_set1_epi16(1)); - sum256 = _mm256_hadd_epi32(sum256, sum256); - sum256 = _mm256_hadd_epi32(sum256, sum256); - const __m128i lo = _mm256_extracti128_si256(sum256, 0); - const __m128i hi = _mm256_extracti128_si256(sum256, 1); - output[i] += _mm_cvtsi128_si32(lo) + _mm_cvtsi128_si32(hi); + const auto iv256 = reinterpret_cast(&input_vector[kNumChunks]); + const auto row256 = reinterpret_cast(&row[kNumChunks]); + #if defined(USE_VNNI) + __m256i product256 = _mm256_dpbusd_epi32( + _mm512_castsi512_si256(sum), _mm256_loadA_si256(&iv256[0]), _mm256_load_si256(&row256[0])); + sum = _mm512_inserti32x8(sum, product256, 0); + #else + __m256i product256 = _mm256_maddubs_epi16(_mm256_loadA_si256(&iv256[0]), _mm256_load_si256(&row256[0])); + sum = _mm512_add_epi32(sum, _mm512_cvtepi16_epi32(product256)); + #endif } + output[i] = _mm512_reduce_add_epi32(sum) + biases_[i]; #elif defined(USE_AVX2) __m256i sum = _mm256_setzero_si256(); const auto row = reinterpret_cast(&weights_[offset]); for (IndexType j = 0; j < kNumChunks; ++j) { - __m256i product = _mm256_maddubs_epi16( - _mm256_load_si256(&input_vector[j]), _mm256_load_si256(&row[j])); + #if defined(USE_VNNI) + sum = _mm256_dpbusd_epi32(sum, _mm256_loadA_si256(&input_vector[j]), _mm256_load_si256(&row[j])); + #else + __m256i product = _mm256_maddubs_epi16(_mm256_loadA_si256(&input_vector[j]), _mm256_load_si256(&row[j])); product = _mm256_madd_epi16(product, kOnes); sum = _mm256_add_epi32(sum, product); + #endif } - sum = _mm256_hadd_epi32(sum, sum); - sum = _mm256_hadd_epi32(sum, sum); - const __m128i lo = _mm256_extracti128_si256(sum, 0); - const __m128i hi = _mm256_extracti128_si256(sum, 1); - output[i] = _mm_cvtsi128_si32(lo) + _mm_cvtsi128_si32(hi) + biases_[i]; + __m128i sum128 = _mm_add_epi32(_mm256_castsi256_si128(sum), _mm256_extracti128_si256(sum, 1)); + sum128 = _mm_add_epi32(sum128, _mm_shuffle_epi32(sum128, _MM_PERM_BADC)); + sum128 = _mm_add_epi32(sum128, _mm_shuffle_epi32(sum128, _MM_PERM_CDAB)); + output[i] = _mm_cvtsi128_si32(sum128) + biases_[i]; #elif defined(USE_SSSE3) - __m128i sum = _mm_cvtsi32_si128(biases_[i]); + __m128i sum = _mm_setzero_si128(); const auto row = reinterpret_cast(&weights_[offset]); - for (IndexType j = 0; j < kNumChunks; ++j) { - __m128i product = _mm_maddubs_epi16( - _mm_load_si128(&input_vector[j]), _mm_load_si128(&row[j])); + for (int j = 0; j < (int)kNumChunks - 1; j += 2) { + __m128i product0 = _mm_maddubs_epi16(_mm_load_si128(&input_vector[j]), _mm_load_si128(&row[j])); + product0 = _mm_madd_epi16(product0, kOnes); + sum = _mm_add_epi32(sum, product0); + __m128i product1 = _mm_maddubs_epi16(_mm_load_si128(&input_vector[j+1]), _mm_load_si128(&row[j+1])); + product1 = _mm_madd_epi16(product1, kOnes); + sum = _mm_add_epi32(sum, product1); + } + if (kNumChunks & 0x1) { + __m128i product = _mm_maddubs_epi16(_mm_load_si128(&input_vector[kNumChunks-1]), _mm_load_si128(&row[kNumChunks-1])); product = _mm_madd_epi16(product, kOnes); sum = _mm_add_epi32(sum, product); } - sum = _mm_hadd_epi32(sum, sum); - sum = _mm_hadd_epi32(sum, sum); + sum = _mm_add_epi32(sum, _mm_shuffle_epi32(sum, 0x4E)); //_MM_PERM_BADC + sum = _mm_add_epi32(sum, _mm_shuffle_epi32(sum, 0xB1)); //_MM_PERM_CDAB + output[i] = _mm_cvtsi128_si32(sum) + biases_[i]; + + #elif defined(USE_SSE2) + __m128i sum_lo = _mm_cvtsi32_si128(biases_[i]); + __m128i sum_hi = kZeros; + const auto row = reinterpret_cast(&weights_[offset]); + for (IndexType j = 0; j < kNumChunks; ++j) { + __m128i row_j = _mm_load_si128(&row[j]); + __m128i input_j = _mm_load_si128(&input_vector[j]); + __m128i row_signs = _mm_cmpgt_epi8(kZeros, row_j); + __m128i extended_row_lo = _mm_unpacklo_epi8(row_j, row_signs); + __m128i extended_row_hi = _mm_unpackhi_epi8(row_j, row_signs); + __m128i extended_input_lo = _mm_unpacklo_epi8(input_j, kZeros); + __m128i extended_input_hi = _mm_unpackhi_epi8(input_j, kZeros); + __m128i product_lo = _mm_madd_epi16(extended_row_lo, extended_input_lo); + __m128i product_hi = _mm_madd_epi16(extended_row_hi, extended_input_hi); + sum_lo = _mm_add_epi32(sum_lo, product_lo); + sum_hi = _mm_add_epi32(sum_hi, product_hi); + } + __m128i sum = _mm_add_epi32(sum_lo, sum_hi); + __m128i sum_high_64 = _mm_shuffle_epi32(sum, _MM_SHUFFLE(1, 0, 3, 2)); + sum = _mm_add_epi32(sum, sum_high_64); + __m128i sum_second_32 = _mm_shufflelo_epi16(sum, _MM_SHUFFLE(1, 0, 3, 2)); + sum = _mm_add_epi32(sum, sum_second_32); output[i] = _mm_cvtsi128_si32(sum); + #elif defined(USE_MMX) + __m64 sum_lo = _mm_cvtsi32_si64(biases_[i]); + __m64 sum_hi = kZeros; + const auto row = reinterpret_cast(&weights_[offset]); + for (IndexType j = 0; j < kNumChunks; ++j) { + __m64 row_j = row[j]; + __m64 input_j = input_vector[j]; + __m64 row_signs = _mm_cmpgt_pi8(kZeros, row_j); + __m64 extended_row_lo = _mm_unpacklo_pi8(row_j, row_signs); + __m64 extended_row_hi = _mm_unpackhi_pi8(row_j, row_signs); + __m64 extended_input_lo = _mm_unpacklo_pi8(input_j, kZeros); + __m64 extended_input_hi = _mm_unpackhi_pi8(input_j, kZeros); + __m64 product_lo = _mm_madd_pi16(extended_row_lo, extended_input_lo); + __m64 product_hi = _mm_madd_pi16(extended_row_hi, extended_input_hi); + sum_lo = _mm_add_pi32(sum_lo, product_lo); + sum_hi = _mm_add_pi32(sum_hi, product_hi); + } + __m64 sum = _mm_add_pi32(sum_lo, sum_hi); + sum = _mm_add_pi32(sum, _mm_unpackhi_pi32(sum, sum)); + output[i] = _mm_cvtsi64_si32(sum); + #elif defined(USE_NEON) int32x4_t sum = {biases_[i]}; const auto row = reinterpret_cast(&weights_[offset]); @@ -196,6 +263,9 @@ namespace Eval::NNUE::Layers { #endif } + #if defined(USE_MMX) + _mm_empty(); + #endif return output; } diff --git a/src/nnue/layers/clipped_relu.h b/src/nnue/layers/clipped_relu.h index 9b5a5f5f..d923986e 100644 --- a/src/nnue/layers/clipped_relu.h +++ b/src/nnue/layers/clipped_relu.h @@ -86,18 +86,17 @@ namespace Eval::NNUE::Layers { const auto out = reinterpret_cast<__m256i*>(output); for (IndexType i = 0; i < kNumChunks; ++i) { const __m256i words0 = _mm256_srai_epi16(_mm256_packs_epi32( - _mm256_load_si256(&in[i * 4 + 0]), - _mm256_load_si256(&in[i * 4 + 1])), kWeightScaleBits); + _mm256_loadA_si256(&in[i * 4 + 0]), + _mm256_loadA_si256(&in[i * 4 + 1])), kWeightScaleBits); const __m256i words1 = _mm256_srai_epi16(_mm256_packs_epi32( - _mm256_load_si256(&in[i * 4 + 2]), - _mm256_load_si256(&in[i * 4 + 3])), kWeightScaleBits); - _mm256_store_si256( - &out[i], _mm256_permutevar8x32_epi32(_mm256_max_epi8( + _mm256_loadA_si256(&in[i * 4 + 2]), + _mm256_loadA_si256(&in[i * 4 + 3])), kWeightScaleBits); + _mm256_storeA_si256(&out[i], _mm256_permutevar8x32_epi32(_mm256_max_epi8( _mm256_packs_epi16(words0, words1), kZero), kOffsets)); } constexpr IndexType kStart = kNumChunks * kSimdWidth; - #elif defined(USE_SSSE3) + #elif defined(USE_SSE2) constexpr IndexType kNumChunks = kInputDimensions / kSimdWidth; #ifdef USE_SSE41 @@ -128,6 +127,24 @@ namespace Eval::NNUE::Layers { } constexpr IndexType kStart = kNumChunks * kSimdWidth; + #elif defined(USE_MMX) + constexpr IndexType kNumChunks = kInputDimensions / kSimdWidth; + const __m64 k0x80s = _mm_set1_pi8(-128); + const auto in = reinterpret_cast(input); + const auto out = reinterpret_cast<__m64*>(output); + for (IndexType i = 0; i < kNumChunks; ++i) { + const __m64 words0 = _mm_srai_pi16( + _mm_packs_pi32(in[i * 4 + 0], in[i * 4 + 1]), + kWeightScaleBits); + const __m64 words1 = _mm_srai_pi16( + _mm_packs_pi32(in[i * 4 + 2], in[i * 4 + 3]), + kWeightScaleBits); + const __m64 packedbytes = _mm_packs_pi16(words0, words1); + out[i] = _mm_subs_pi8(_mm_adds_pi8(packedbytes, k0x80s), k0x80s); + } + _mm_empty(); + constexpr IndexType kStart = kNumChunks * kSimdWidth; + #elif defined(USE_NEON) constexpr IndexType kNumChunks = kInputDimensions / (kSimdWidth / 2); const int8x8_t kZero = {0}; diff --git a/src/nnue/nnue_accumulator.h b/src/nnue/nnue_accumulator.h index 2a354a3c..69dfaad2 100644 --- a/src/nnue/nnue_accumulator.h +++ b/src/nnue/nnue_accumulator.h @@ -26,7 +26,7 @@ namespace Eval::NNUE { // Class that holds the result of affine transformation of input features - struct alignas(32) Accumulator { + struct alignas(kCacheLineSize) Accumulator { std::int16_t accumulation[2][kRefreshTriggers.size()][kTransformedFeatureDimensions]; Value score; diff --git a/src/nnue/nnue_common.h b/src/nnue/nnue_common.h index 36fda7d7..d7ffa21a 100644 --- a/src/nnue/nnue_common.h +++ b/src/nnue/nnue_common.h @@ -21,6 +21,9 @@ #ifndef NNUE_COMMON_H_INCLUDED #define NNUE_COMMON_H_INCLUDED +#include +#include + #if defined(USE_AVX2) #include @@ -33,10 +36,36 @@ #elif defined(USE_SSE2) #include +#elif defined(USE_MMX) +#include + #elif defined(USE_NEON) #include #endif +// HACK: Use _mm256_loadu_si256() instead of _mm256_load_si256. Otherwise a binary +// compiled with older g++ crashes because the output memory is not aligned +// even though alignas is specified. +#if defined(USE_AVX2) +#if defined(__GNUC__ ) && (__GNUC__ < 9) && defined(_WIN32) && !defined(__clang__) +#define _mm256_loadA_si256 _mm256_loadu_si256 +#define _mm256_storeA_si256 _mm256_storeu_si256 +#else +#define _mm256_loadA_si256 _mm256_load_si256 +#define _mm256_storeA_si256 _mm256_store_si256 +#endif +#endif + +#if defined(USE_AVX512) +#if defined(__GNUC__ ) && (__GNUC__ < 9) && defined(_WIN32) && !defined(__clang__) +#define _mm512_loadA_si512 _mm512_loadu_si512 +#define _mm512_storeA_si512 _mm512_storeu_si512 +#else +#define _mm512_loadA_si512 _mm512_load_si512 +#define _mm512_storeA_si512 _mm512_store_si512 +#endif +#endif + namespace Eval::NNUE { // Version of the evaluation file @@ -56,12 +85,36 @@ namespace Eval::NNUE { #elif defined(USE_SSE2) constexpr std::size_t kSimdWidth = 16; + #elif defined(USE_MMX) + constexpr std::size_t kSimdWidth = 8; + #elif defined(USE_NEON) constexpr std::size_t kSimdWidth = 16; #endif constexpr std::size_t kMaxSimdWidth = 32; + // unique number for each piece type on each square + enum { + PS_NONE = 0, + PS_W_PAWN = 1, + PS_B_PAWN = 1 * SQUARE_NB + 1, + PS_W_KNIGHT = 2 * SQUARE_NB + 1, + PS_B_KNIGHT = 3 * SQUARE_NB + 1, + PS_W_BISHOP = 4 * SQUARE_NB + 1, + PS_B_BISHOP = 5 * SQUARE_NB + 1, + PS_W_ROOK = 6 * SQUARE_NB + 1, + PS_B_ROOK = 7 * SQUARE_NB + 1, + PS_W_QUEEN = 8 * SQUARE_NB + 1, + PS_B_QUEEN = 9 * SQUARE_NB + 1, + PS_W_KING = 10 * SQUARE_NB + 1, + PS_END = PS_W_KING, // pieces without kings (pawns included) + PS_B_KING = 11 * SQUARE_NB + 1, + PS_END2 = 12 * SQUARE_NB + 1 + }; + + extern uint32_t kpp_board_index[PIECE_NB][COLOR_NB]; + // Type of input feature after conversion using TransformedFeatureType = std::uint8_t; using IndexType = std::uint32_t; @@ -73,7 +126,25 @@ namespace Eval::NNUE { // Round n up to be a multiple of base template constexpr IntType CeilToMultiple(IntType n, IntType base) { - return (n + base - 1) / base * base; + return (n + base - 1) / base * base; + } + + // read_little_endian() is our utility to read an integer (signed or unsigned, any size) + // from a stream in little-endian order. We swap the byte order after the read if + // necessary to return a result with the byte ordering of the compiling machine. + template + inline IntType read_little_endian(std::istream& stream) { + + IntType result; + std::uint8_t u[sizeof(IntType)]; + typename std::make_unsigned::type v = 0; + + stream.read(reinterpret_cast(u), sizeof(IntType)); + for (std::size_t i = 0; i < sizeof(IntType); ++i) + v = (v << 8) | u[sizeof(IntType) - i - 1]; + + std::memcpy(&result, &v, sizeof(IntType)); + return result; } } // namespace Eval::NNUE diff --git a/src/nnue/nnue_feature_transformer.h b/src/nnue/nnue_feature_transformer.h index 29e6db6e..e1bc2ab8 100644 --- a/src/nnue/nnue_feature_transformer.h +++ b/src/nnue/nnue_feature_transformer.h @@ -62,10 +62,10 @@ namespace Eval::NNUE { // Read network parameters bool ReadParameters(std::istream& stream) { - stream.read(reinterpret_cast(biases_), - kHalfDimensions * sizeof(BiasType)); - stream.read(reinterpret_cast(weights_), - kHalfDimensions * kInputDimensions * sizeof(WeightType)); + for (std::size_t i = 0; i < kHalfDimensions; ++i) + biases_[i] = read_little_endian(stream); + for (std::size_t i = 0; i < kHalfDimensions * kInputDimensions; ++i) + weights_[i] = read_little_endian(stream); return !stream.fail(); } @@ -104,7 +104,7 @@ namespace Eval::NNUE { constexpr int kControl = 0b11011000; const __m256i kZero = _mm256_setzero_si256(); - #elif defined(USE_SSSE3) + #elif defined(USE_SSE2) constexpr IndexType kNumChunks = kHalfDimensions / kSimdWidth; #ifdef USE_SSE41 @@ -113,6 +113,10 @@ namespace Eval::NNUE { const __m128i k0x80s = _mm_set1_epi8(-128); #endif + #elif defined(USE_MMX) + constexpr IndexType kNumChunks = kHalfDimensions / kSimdWidth; + const __m64 k0x80s = _mm_set1_pi8(-128); + #elif defined(USE_NEON) constexpr IndexType kNumChunks = kHalfDimensions / (kSimdWidth / 2); const int8x8_t kZero = {0}; @@ -125,17 +129,15 @@ namespace Eval::NNUE { #if defined(USE_AVX2) auto out = reinterpret_cast<__m256i*>(&output[offset]); for (IndexType j = 0; j < kNumChunks; ++j) { - __m256i sum0 = - _mm256_load_si256(&reinterpret_cast( - accumulation[perspectives[p]][0])[j * 2 + 0]); - __m256i sum1 = - _mm256_load_si256(&reinterpret_cast( - accumulation[perspectives[p]][0])[j * 2 + 1]); - _mm256_store_si256(&out[j], _mm256_permute4x64_epi64(_mm256_max_epi8( + __m256i sum0 = _mm256_loadA_si256( + &reinterpret_cast(accumulation[perspectives[p]][0])[j * 2 + 0]); + __m256i sum1 = _mm256_loadA_si256( + &reinterpret_cast(accumulation[perspectives[p]][0])[j * 2 + 1]); + _mm256_storeA_si256(&out[j], _mm256_permute4x64_epi64(_mm256_max_epi8( _mm256_packs_epi16(sum0, sum1), kZero), kControl)); } - #elif defined(USE_SSSE3) + #elif defined(USE_SSE2) auto out = reinterpret_cast<__m128i*>(&output[offset]); for (IndexType j = 0; j < kNumChunks; ++j) { __m128i sum0 = _mm_load_si128(&reinterpret_cast( @@ -155,6 +157,17 @@ namespace Eval::NNUE { ); } + #elif defined(USE_MMX) + auto out = reinterpret_cast<__m64*>(&output[offset]); + for (IndexType j = 0; j < kNumChunks; ++j) { + __m64 sum0 = *(&reinterpret_cast( + accumulation[perspectives[p]][0])[j * 2 + 0]); + __m64 sum1 = *(&reinterpret_cast( + accumulation[perspectives[p]][0])[j * 2 + 1]); + const __m64 packedbytes = _mm_packs_pi16(sum0, sum1); + out[j] = _mm_subs_pi8(_mm_adds_pi8(packedbytes, k0x80s), k0x80s); + } + #elif defined(USE_NEON) const auto out = reinterpret_cast(&output[offset]); for (IndexType j = 0; j < kNumChunks; ++j) { @@ -172,6 +185,9 @@ namespace Eval::NNUE { #endif } + #if defined(USE_MMX) + _mm_empty(); + #endif } private: @@ -187,23 +203,37 @@ namespace Eval::NNUE { kHalfDimensions * sizeof(BiasType)); for (const auto index : active_indices[perspective]) { const IndexType offset = kHalfDimensions * index; + #if defined(USE_AVX512) + auto accumulation = reinterpret_cast<__m512i*>( + &accumulator.accumulation[perspective][i][0]); + auto column = reinterpret_cast(&weights_[offset]); + constexpr IndexType kNumChunks = kHalfDimensions / kSimdWidth; + for (IndexType j = 0; j < kNumChunks; ++j) + _mm512_storeA_si512(&accumulation[j], _mm512_add_epi16(_mm512_loadA_si512(&accumulation[j]), column[j])); - #if defined(USE_AVX2) + #elif defined(USE_AVX2) auto accumulation = reinterpret_cast<__m256i*>( &accumulator.accumulation[perspective][i][0]); auto column = reinterpret_cast(&weights_[offset]); constexpr IndexType kNumChunks = kHalfDimensions / (kSimdWidth / 2); - for (IndexType j = 0; j < kNumChunks; ++j) { - accumulation[j] = _mm256_add_epi16(accumulation[j], column[j]); - } + for (IndexType j = 0; j < kNumChunks; ++j) + _mm256_storeA_si256(&accumulation[j], _mm256_add_epi16(_mm256_loadA_si256(&accumulation[j]), column[j])); #elif defined(USE_SSE2) auto accumulation = reinterpret_cast<__m128i*>( &accumulator.accumulation[perspective][i][0]); auto column = reinterpret_cast(&weights_[offset]); constexpr IndexType kNumChunks = kHalfDimensions / (kSimdWidth / 2); - for (IndexType j = 0; j < kNumChunks; ++j) { + for (IndexType j = 0; j < kNumChunks; ++j) accumulation[j] = _mm_add_epi16(accumulation[j], column[j]); + + #elif defined(USE_MMX) + auto accumulation = reinterpret_cast<__m64*>( + &accumulator.accumulation[perspective][i][0]); + auto column = reinterpret_cast(&weights_[offset]); + constexpr IndexType kNumChunks = kHalfDimensions / (kSimdWidth / 2); + for (IndexType j = 0; j < kNumChunks; ++j) { + accumulation[j] = _mm_add_pi16(accumulation[j], column[j]); } #elif defined(USE_NEON) @@ -211,18 +241,19 @@ namespace Eval::NNUE { &accumulator.accumulation[perspective][i][0]); auto column = reinterpret_cast(&weights_[offset]); constexpr IndexType kNumChunks = kHalfDimensions / (kSimdWidth / 2); - for (IndexType j = 0; j < kNumChunks; ++j) { + for (IndexType j = 0; j < kNumChunks; ++j) accumulation[j] = vaddq_s16(accumulation[j], column[j]); - } #else - for (IndexType j = 0; j < kHalfDimensions; ++j) { + for (IndexType j = 0; j < kHalfDimensions; ++j) accumulator.accumulation[perspective][i][j] += weights_[offset + j]; - } #endif } } + #if defined(USE_MMX) + _mm_empty(); + #endif accumulator.computed_accumulation = true; accumulator.computed_score = false; @@ -249,6 +280,11 @@ namespace Eval::NNUE { auto accumulation = reinterpret_cast<__m128i*>( &accumulator.accumulation[perspective][i][0]); + #elif defined(USE_MMX) + constexpr IndexType kNumChunks = kHalfDimensions / (kSimdWidth / 2); + auto accumulation = reinterpret_cast<__m64*>( + &accumulator.accumulation[perspective][i][0]); + #elif defined(USE_NEON) constexpr IndexType kNumChunks = kHalfDimensions / (kSimdWidth / 2); auto accumulation = reinterpret_cast( @@ -278,6 +314,12 @@ namespace Eval::NNUE { accumulation[j] = _mm_sub_epi16(accumulation[j], column[j]); } + #elif defined(USE_MMX) + auto column = reinterpret_cast(&weights_[offset]); + for (IndexType j = 0; j < kNumChunks; ++j) { + accumulation[j] = _mm_sub_pi16(accumulation[j], column[j]); + } + #elif defined(USE_NEON) auto column = reinterpret_cast(&weights_[offset]); for (IndexType j = 0; j < kNumChunks; ++j) { @@ -309,6 +351,12 @@ namespace Eval::NNUE { accumulation[j] = _mm_add_epi16(accumulation[j], column[j]); } + #elif defined(USE_MMX) + auto column = reinterpret_cast(&weights_[offset]); + for (IndexType j = 0; j < kNumChunks; ++j) { + accumulation[j] = _mm_add_pi16(accumulation[j], column[j]); + } + #elif defined(USE_NEON) auto column = reinterpret_cast(&weights_[offset]); for (IndexType j = 0; j < kNumChunks; ++j) { @@ -325,6 +373,9 @@ namespace Eval::NNUE { } } } + #if defined(USE_MMX) + _mm_empty(); + #endif accumulator.computed_accumulation = true; accumulator.computed_score = false; diff --git a/src/nnue/trainer/features/factorizer_half_kp.h b/src/nnue/trainer/features/factorizer_half_kp.h index 48a99797..955894e8 100644 --- a/src/nnue/trainer/features/factorizer_half_kp.h +++ b/src/nnue/trainer/features/factorizer_half_kp.h @@ -62,8 +62,8 @@ class Factorizer> { IndexType index_offset = AppendBaseFeature( kProperties[kFeaturesHalfKP], base_index, training_features); - const auto sq_k = static_cast(base_index / PieceSquare::PS_END); - const auto p = static_cast(base_index % PieceSquare::PS_END); + const auto sq_k = static_cast(base_index / PS_END); + const auto p = static_cast(base_index % PS_END); // kFeaturesHalfK { const auto& properties = kProperties[kFeaturesHalfK]; @@ -76,7 +76,7 @@ class Factorizer> { index_offset += InheritFeaturesIfRequired

( index_offset, kProperties[kFeaturesP], p, training_features); // kFeaturesHalfRelativeKP - if (p >= PieceSquare::PS_W_PAWN) { + if (p >= PS_W_PAWN) { index_offset += InheritFeaturesIfRequired>( index_offset, kProperties[kFeaturesHalfRelativeKP], HalfRelativeKP::MakeIndex(sq_k, p), diff --git a/src/pawns.cpp b/src/pawns.cpp index 868d0c8e..af0f6618 100644 --- a/src/pawns.cpp +++ b/src/pawns.cpp @@ -219,7 +219,7 @@ Score Entry::evaluate_shelter(const Position& pos, Square ksq) const { Score bonus = make_score(5, 5); - File center = Utility::clamp(file_of(ksq), FILE_B, FILE_G); + File center = std::clamp(file_of(ksq), FILE_B, FILE_G); for (File f = File(center - 1); f <= File(center + 1); ++f) { b = ourPawns & file_bb(f); diff --git a/src/position.cpp b/src/position.cpp index 46e5d78b..fe89b753 100644 --- a/src/position.cpp +++ b/src/position.cpp @@ -198,9 +198,6 @@ Position& Position::set(const string& fenStr, bool isChess960, StateInfo* si, Th std::fill_n(&pieceList[0][0], sizeof(pieceList) / sizeof(Square), SQ_NONE); st = si; - // Each piece on board gets a unique ID used to track the piece later - PieceId piece_id, next_piece_id = PIECE_ID_ZERO; - ss >> std::noskipws; // 1. Piece placement @@ -212,21 +209,8 @@ Position& Position::set(const string& fenStr, bool isChess960, StateInfo* si, Th else if (token == '/') sq += 2 * SOUTH; - else if ((idx = PieceToChar.find(token)) != string::npos) - { - auto pc = Piece(idx); - put_piece(pc, sq); - - if (Eval::useNNUE) - { - // Kings get a fixed ID, other pieces get ID in order of placement - piece_id = - (idx == W_KING) ? PIECE_ID_WKING : - (idx == B_KING) ? PIECE_ID_BKING : - next_piece_id++; - evalList.put_piece(piece_id, sq, pc); - } - + else if ((idx = PieceToChar.find(token)) != string::npos) { + put_piece(Piece(idx), sq); ++sq; } } @@ -721,8 +705,6 @@ void Position::do_move(Move m, StateInfo& newSt, bool givesCheck) { // Used by NNUE st->accumulator.computed_accumulation = false; st->accumulator.computed_score = false; - PieceId dp0 = PIECE_ID_NONE; - PieceId dp1 = PIECE_ID_NONE; auto& dp = st->dirtyPiece; dp.dirty_num = 1; @@ -775,12 +757,10 @@ void Position::do_move(Move m, StateInfo& newSt, bool givesCheck) { if (Eval::useNNUE) { - dp.dirty_num = 2; // 2 pieces moved - dp1 = piece_id_on(capsq); - dp.pieceId[1] = dp1; - dp.old_piece[1] = evalList.piece_with_id(dp1); - evalList.put_piece(dp1, capsq, NO_PIECE); - dp.new_piece[1] = evalList.piece_with_id(dp1); + dp.dirty_num = 2; // 1 piece moved, 1 piece captured + dp.piece[1] = captured; + dp.from[1] = capsq; + dp.to[1] = SQ_NONE; } // Update board and piece lists @@ -821,11 +801,9 @@ void Position::do_move(Move m, StateInfo& newSt, bool givesCheck) { { if (Eval::useNNUE) { - dp0 = piece_id_on(from); - dp.pieceId[0] = dp0; - dp.old_piece[0] = evalList.piece_with_id(dp0); - evalList.put_piece(dp0, to, pc); - dp.new_piece[0] = evalList.piece_with_id(dp0); + dp.piece[0] = pc; + dp.from[0] = from; + dp.to[0] = to; } move_piece(from, to); @@ -854,9 +832,12 @@ void Position::do_move(Move m, StateInfo& newSt, bool givesCheck) { if (Eval::useNNUE) { - dp0 = piece_id_on(to); - evalList.put_piece(dp0, to, promotion); - dp.new_piece[0] = evalList.piece_with_id(dp0); + // Promoting pawn to SQ_NONE, promoted piece from SQ_NONE + dp.to[0] = SQ_NONE; + dp.piece[dp.dirty_num] = promotion; + dp.from[dp.dirty_num] = SQ_NONE; + dp.to[dp.dirty_num] = to; + dp.dirty_num++; } // Update hash keys @@ -950,12 +931,6 @@ void Position::undo_move(Move m) { { move_piece(to, from); // Put the piece back at the source square - if (Eval::useNNUE) - { - PieceId dp0 = st->dirtyPiece.pieceId[0]; - evalList.put_piece(dp0, from, pc); - } - if (st->capturedPiece) { Square capsq = to; @@ -972,14 +947,6 @@ void Position::undo_move(Move m) { } put_piece(st->capturedPiece, capsq); // Restore the captured piece - - if (Eval::useNNUE) - { - PieceId dp1 = st->dirtyPiece.pieceId[1]; - assert(evalList.piece_with_id(dp1).from[WHITE] == PS_NONE); - assert(evalList.piece_with_id(dp1).from[BLACK] == PS_NONE); - evalList.put_piece(dp1, capsq, st->capturedPiece); - } } } @@ -1001,32 +968,16 @@ void Position::do_castling(Color us, Square from, Square& to, Square& rfrom, Squ rto = relative_square(us, kingSide ? SQ_F1 : SQ_D1); to = relative_square(us, kingSide ? SQ_G1 : SQ_C1); - if (Eval::useNNUE) + if (Do && Eval::useNNUE) { - PieceId dp0, dp1; auto& dp = st->dirtyPiece; - dp.dirty_num = 2; // 2 pieces moved - - if (Do) - { - dp0 = piece_id_on(from); - dp1 = piece_id_on(rfrom); - dp.pieceId[0] = dp0; - dp.old_piece[0] = evalList.piece_with_id(dp0); - evalList.put_piece(dp0, to, make_piece(us, KING)); - dp.new_piece[0] = evalList.piece_with_id(dp0); - dp.pieceId[1] = dp1; - dp.old_piece[1] = evalList.piece_with_id(dp1); - evalList.put_piece(dp1, rto, make_piece(us, ROOK)); - dp.new_piece[1] = evalList.piece_with_id(dp1); - } - else - { - dp0 = piece_id_on(to); - dp1 = piece_id_on(rto); - evalList.put_piece(dp0, from, make_piece(us, KING)); - evalList.put_piece(dp1, rfrom, make_piece(us, ROOK)); - } + dp.piece[0] = make_piece(us, KING); + dp.from[0] = from; + dp.to[0] = to; + dp.piece[1] = make_piece(us, ROOK); + dp.from[1] = rfrom; + dp.to[1] = rto; + dp.dirty_num = 2; } // Remove both pieces first since squares could overlap in Chess960 @@ -1145,8 +1096,8 @@ bool Position::see_ge(Move m, Value threshold) const { // Don't allow pinned pieces to attack (except the king) as long as // there are pinners on their original square. - if (st->pinners[~stm] & occupied) - stmAttackers &= ~st->blockersForKing[stm]; + if (pinners(~stm) & occupied) + stmAttackers &= ~blockers_for_king(stm); if (!stmAttackers) break; diff --git a/src/position.h b/src/position.h index b5dbaf59..e3f758e0 100644 --- a/src/position.h +++ b/src/position.h @@ -116,6 +116,7 @@ public: Bitboard checkers() const; Bitboard blockers_for_king(Color c) const; Bitboard check_squares(PieceType pt) const; + Bitboard pinners(Color c) const; bool is_discovery_check_on_king(Color c, Move m) const; // Attacks to/from a given square @@ -173,7 +174,6 @@ public: // Used by NNUE StateInfo* state() const; - const EvalList* eval_list() const; #if defined(EVAL_LEARN) // --sfenization helper @@ -208,9 +208,6 @@ private: template void do_castling(Color us, Square from, Square& to, Square& rfrom, Square& rto); - // ID of a piece on a given square - PieceId piece_id_on(Square sq) const; - // Data members Piece board[SQUARE_NB]; Bitboard byTypeBB[PIECE_TYPE_NB]; @@ -227,9 +224,6 @@ private: Thread* thisThread; StateInfo* st; bool chess960; - - // List of pieces used in NNUE evaluation function - EvalList evalList; }; namespace PSQT { @@ -332,6 +326,10 @@ inline Bitboard Position::blockers_for_king(Color c) const { return st->blockersForKing[c]; } +inline Bitboard Position::pinners(Color c) const { + return st->pinners[c]; +} + inline Bitboard Position::check_squares(PieceType pt) const { return st->checkSquares[pt]; } @@ -469,20 +467,4 @@ inline StateInfo* Position::state() const { return st; } -inline const EvalList* Position::eval_list() const { - - return &evalList; -} - -inline PieceId Position::piece_id_on(Square sq) const -{ - - assert(piece_on(sq) != NO_PIECE); - - PieceId pid = evalList.piece_id_list[sq]; - assert(is_ok(pid)); - - return pid; -} - #endif // #ifndef POSITION_H_INCLUDED diff --git a/src/search.cpp b/src/search.cpp index b7561a96..2d848bcd 100644 --- a/src/search.cpp +++ b/src/search.cpp @@ -63,9 +63,9 @@ namespace { constexpr uint64_t TtHitAverageResolution = 1024; // Razor and futility margins - constexpr int RazorMargin = 527; + constexpr int RazorMargin = 510; Value futility_margin(Depth d, bool improving) { - return Value(227 * (d - improving)); + return Value(223 * (d - improving)); } bool training; @@ -75,7 +75,7 @@ namespace { Depth reduction(bool i, Depth d, int mn) { int r = Reductions[d] * Reductions[mn]; - return (r + 570) / 1024 + (!i && r > 1018); + return (r + 509) / 1024 + (!i && r > 894); } constexpr int futility_move_count(bool improving, Depth depth) { @@ -84,7 +84,7 @@ namespace { // History and stats update bonus, based on depth int stat_bonus(Depth d) { - return d > 15 ? 27 : 17 * d * d + 133 * d - 134; + return d > 13 ? 29 : 17 * d * d + 134 * d - 134; } // Add a small random component to draw evaluations to avoid 3fold-blindness @@ -194,7 +194,7 @@ namespace { void Search::init() { for (int i = 1; i < MAX_MOVES; ++i) - Reductions[i] = int((24.8 + std::log(Threads.size())) * std::log(i)); + Reductions[i] = int((22.0 + std::log(Threads.size())) * std::log(i)); training = Options["Training"]; } @@ -339,7 +339,7 @@ void Thread::search() { // for match (TC 60+0.6) results spanning a wide range of k values. PRNG rng(now()); double floatLevel = Options["UCI_LimitStrength"] ? - Utility::clamp(std::pow((Options["UCI_Elo"] - 1346.6) / 143.4, 1 / 0.806), 0.0, 20.0) : + std::clamp(std::pow((Options["UCI_Elo"] - 1346.6) / 143.4, 1 / 0.806), 0.0, 20.0) : double(Options["Skill Level"]); int intLevel = int(floatLevel) + ((floatLevel - int(floatLevel)) * 1024 > rng.rand() % 1024 ? 1 : 0); @@ -407,12 +407,12 @@ void Thread::search() { if (rootDepth >= 4) { Value prev = rootMoves[pvIdx].previousScore; - delta = Value(19); + delta = Value(17); alpha = std::max(prev - delta,-VALUE_INFINITE); beta = std::min(prev + delta, VALUE_INFINITE); // Adjust contempt based on root move's previousScore (dynamic contempt) - int dct = ct + (110 - ct / 2) * prev / (abs(prev) + 140); + int dct = ct + (105 - ct / 2) * prev / (abs(prev) + 149); contempt = (us == WHITE ? make_score(dct, dct / 2) : -make_score(dct, dct / 2)); @@ -510,13 +510,13 @@ void Thread::search() { && !Threads.stop && !mainThread->stopOnPonderhit) { - double fallingEval = (296 + 6 * (mainThread->bestPreviousScore - bestValue) - + 6 * (mainThread->iterValue[iterIdx] - bestValue)) / 725.0; - fallingEval = Utility::clamp(fallingEval, 0.5, 1.5); + double fallingEval = (318 + 6 * (mainThread->bestPreviousScore - bestValue) + + 6 * (mainThread->iterValue[iterIdx] - bestValue)) / 825.0; + fallingEval = std::clamp(fallingEval, 0.5, 1.5); // If the bestMove is stable over several iterations, reduce time accordingly - timeReduction = lastBestMoveDepth + 10 < completedDepth ? 1.92 : 0.95; - double reduction = (1.47 + mainThread->previousTimeReduction) / (2.22 * timeReduction); + timeReduction = lastBestMoveDepth + 9 < completedDepth ? 1.92 : 0.95; + double reduction = (1.47 + mainThread->previousTimeReduction) / (2.32 * timeReduction); // Use part of the gained time from a previous stable move for the current move for (Thread* th : Threads) @@ -541,7 +541,7 @@ void Thread::search() { } else if ( Threads.increaseDepth && !mainThread->ponder - && Time.elapsed() > totalTime * 0.56) + && Time.elapsed() > totalTime * 0.58) Threads.increaseDepth = false; else Threads.increaseDepth = true; @@ -600,7 +600,7 @@ namespace { Key posKey; Move ttMove, move, excludedMove, bestMove; Depth extension, newDepth; - Value bestValue, value, ttValue, eval, maxValue, probcutBeta; + Value bestValue, value, ttValue, eval, maxValue, probCutBeta; bool ttHit, ttPv, formerPv, givesCheck, improving, didLMR, priorCapture; bool captureOrPromotion, doFullDepthSearch, moveCountPruning, ttCapture, singularQuietLMR; @@ -798,11 +798,7 @@ namespace { else { if ((ss-1)->currentMove != MOVE_NULL) - { - int bonus = -(ss-1)->statScore / 512; - - ss->staticEval = eval = evaluate(pos) + bonus; - } + ss->staticEval = eval = evaluate(pos); else ss->staticEval = eval = -(ss-1)->staticEval + 2 * Tempo; @@ -815,8 +811,9 @@ namespace { && eval <= alpha - RazorMargin) return qsearch(pos, ss, alpha, beta); - improving = (ss-2)->staticEval == VALUE_NONE ? (ss->staticEval > (ss-4)->staticEval - || (ss-4)->staticEval == VALUE_NONE) : ss->staticEval > (ss-2)->staticEval; + improving = (ss-2)->staticEval == VALUE_NONE + ? ss->staticEval > (ss-4)->staticEval || (ss-4)->staticEval == VALUE_NONE + : ss->staticEval > (ss-2)->staticEval; // Step 8. Futility pruning: child node (~50 Elo) if ( !PvNode @@ -828,10 +825,10 @@ namespace { // Step 9. Null move search with verification search (~40 Elo) if ( !PvNode && (ss-1)->currentMove != MOVE_NULL - && (ss-1)->statScore < 23824 + && (ss-1)->statScore < 22977 && eval >= beta && eval >= ss->staticEval - && ss->staticEval >= beta - 28 * depth - 28 * improving + 94 * ttPv + 200 + && ss->staticEval >= beta - 30 * depth - 28 * improving + 84 * ttPv + 182 && !excludedMove && pos.non_pawn_material(us) && (ss->ply >= thisThread->nmpMinPly || us != thisThread->nmpColor)) @@ -839,7 +836,7 @@ namespace { assert(eval - beta >= 0); // Null move dynamic reduction based on depth and value - Depth R = (737 + 77 * depth) / 246 + std::min(int(eval - beta) / 192, 3); + Depth R = (817 + 71 * depth) / 213 + std::min(int(eval - beta) / 192, 3); ss->currentMove = MOVE_NULL; ss->continuationHistory = &thisThread->continuationHistory[0][0][NO_PIECE][0]; @@ -875,7 +872,7 @@ namespace { } } - probcutBeta = beta + 176 - 49 * improving; + probCutBeta = beta + 176 - 49 * improving; // Step 10. ProbCut (~10 Elo) // If we have a good enough capture and a reduced search returns a value @@ -883,21 +880,27 @@ namespace { if ( !PvNode && depth > 4 && abs(beta) < VALUE_TB_WIN_IN_MAX_PLY + // if value from transposition table is lower than probCutBeta, don't attempt probCut + // there and in further interactions with transposition table cutoff depth is set to depth - 3 + // because probCut search has depth set to depth - 4 but we also do a move before it + // so effective depth is equal to depth - 3 && !( ttHit && tte->depth() >= depth - 3 && ttValue != VALUE_NONE - && ttValue < probcutBeta)) + && ttValue < probCutBeta)) { + // if ttMove is a capture and value from transposition table is good enough produce probCut + // cutoff without digging into actual probCut search if ( ttHit && tte->depth() >= depth - 3 && ttValue != VALUE_NONE - && ttValue >= probcutBeta + && ttValue >= probCutBeta && ttMove && pos.capture_or_promotion(ttMove)) - return probcutBeta; + return probCutBeta; - assert(probcutBeta < VALUE_INFINITE); - MovePicker mp(pos, ttMove, probcutBeta - ss->staticEval, &captureHistory); + assert(probCutBeta < VALUE_INFINITE); + MovePicker mp(pos, ttMove, probCutBeta - ss->staticEval, &captureHistory); int probCutCount = 0; while ( (move = mp.next_move()) != MOVE_NONE @@ -919,16 +922,17 @@ namespace { pos.do_move(move, st); // Perform a preliminary qsearch to verify that the move holds - value = -qsearch(pos, ss+1, -probcutBeta, -probcutBeta+1); + value = -qsearch(pos, ss+1, -probCutBeta, -probCutBeta+1); // If the qsearch held, perform the regular search - if (value >= probcutBeta) - value = -search(pos, ss+1, -probcutBeta, -probcutBeta+1, depth - 4, !cutNode); + if (value >= probCutBeta) + value = -search(pos, ss+1, -probCutBeta, -probCutBeta+1, depth - 4, !cutNode); pos.undo_move(move); - if (value >= probcutBeta) + if (value >= probCutBeta) { + // if transposition table doesn't have equal or more deep info write probCut data into it if ( !(ttHit && tte->depth() >= depth - 3 && ttValue != VALUE_NONE)) @@ -940,16 +944,6 @@ namespace { } } - // Step 11. Internal iterative deepening (~1 Elo) - if (depth >= 7 && !ttMove) - { - search(pos, ss, alpha, beta, depth - 7, cutNode); - - tte = TT.probe(posKey, ttHit); - ttValue = ttHit ? value_from_tt(tte->value(), ss->ply, pos.rule50_count()) : VALUE_NONE; - ttMove = ttHit ? tte->move() : MOVE_NONE; - } - moves_loop: // When in check, search starts from here const PieceToHistory* contHist[] = { (ss-1)->continuationHistory, (ss-2)->continuationHistory, @@ -973,7 +967,7 @@ moves_loop: // When in check, search starts from here // Mark this node as being searched ThreadHolding th(thisThread, posKey, ss->ply); - // Step 12. Loop through all pseudo-legal moves until no moves remain + // Step 11. Loop through all pseudo-legal moves until no moves remain // or a beta cutoff occurs. while ((move = mp.next_move(moveCountPruning)) != MOVE_NONE) { @@ -1015,7 +1009,7 @@ moves_loop: // When in check, search starts from here // Calculate new depth for this move newDepth = depth - 1; - // Step 13. Pruning at shallow depth (~200 Elo) + // Step 12. Pruning at shallow depth (~200 Elo) if ( !rootNode && !(training && PvNode) && pos.non_pawn_material(us) @@ -1037,17 +1031,17 @@ moves_loop: // When in check, search starts from here continue; // Futility pruning: parent node (~5 Elo) - if ( lmrDepth < 8 + if ( lmrDepth < 7 && !ss->inCheck - && ss->staticEval + 284 + 188 * lmrDepth <= alpha + && ss->staticEval + 283 + 170 * lmrDepth <= alpha && (*contHist[0])[movedPiece][to_sq(move)] + (*contHist[1])[movedPiece][to_sq(move)] + (*contHist[3])[movedPiece][to_sq(move)] - + (*contHist[5])[movedPiece][to_sq(move)] / 2 < 28388) + + (*contHist[5])[movedPiece][to_sq(move)] / 2 < 27376) continue; // Prune moves with negative SEE (~20 Elo) - if (!pos.see_ge(move, Value(-(29 - std::min(lmrDepth, 17)) * lmrDepth * lmrDepth))) + if (!pos.see_ge(move, Value(-(29 - std::min(lmrDepth, 18)) * lmrDepth * lmrDepth))) continue; } else @@ -1064,17 +1058,17 @@ moves_loop: // When in check, search starts from here && !(PvNode && abs(bestValue) < 2) && PieceValue[MG][type_of(movedPiece)] >= PieceValue[MG][type_of(pos.piece_on(to_sq(move)))] && !ss->inCheck - && ss->staticEval + 178 + 261 * lmrDepth + && ss->staticEval + 169 + 244 * lmrDepth + PieceValue[MG][type_of(pos.piece_on(to_sq(move)))] <= alpha) continue; // See based pruning - if (!pos.see_ge(move, Value(-202) * depth)) // (~25 Elo) + if (!pos.see_ge(move, Value(-221) * depth)) // (~25 Elo) continue; } } - // Step 14. Extensions (~75 Elo) + // Step 13. Extensions (~75 Elo) // Singular extension search (~70 Elo). If all moves but one fail low on a // search of (alpha-s, beta-s), and just one fails high on (alpha, beta), @@ -1128,19 +1122,14 @@ moves_loop: // When in check, search starts from here && (pos.is_discovery_check_on_king(~us, move) || pos.see_ge(move))) extension = 1; - // Passed pawn extension - else if ( move == ss->killers[0] - && pos.advanced_pawn_push(move) - && pos.pawn_passed(us, to_sq(move))) - extension = 1; - // Last captures extension else if ( PieceValue[EG][pos.captured_piece()] > PawnValueEg && pos.non_pawn_material() <= 2 * RookValueMg) extension = 1; // Castling extension - if (type_of(move) == CASTLING) + if ( type_of(move) == CASTLING + && popcount(pos.pieces(us) & ~pos.pieces(PAWN) & (to_sq(move) & KingSide ? KingSide : QueenSide)) <= 2) extension = 1; // Late irreversible move extension @@ -1162,10 +1151,10 @@ moves_loop: // When in check, search starts from here [movedPiece] [to_sq(move)]; - // Step 15. Make the move + // Step 14. Make the move pos.do_move(move, st, givesCheck); - // Step 16. Reduced depth search (LMR, ~200 Elo). If the move fails high it will be + // Step 15. Reduced depth search (LMR, ~200 Elo). If the move fails high it will be // re-searched at full depth. if ( depth >= 3 && moveCount > 1 + 2 * rootNode + 2 * (PvNode && abs(bestValue) < 2) @@ -1174,7 +1163,7 @@ moves_loop: // When in check, search starts from here || moveCountPruning || ss->staticEval + PieceValue[EG][pos.captured_piece()] <= alpha || cutNode - || thisThread->ttHitAverage < 415 * TtHitAverageResolution * TtHitAverageWindow / 1024)) + || thisThread->ttHitAverage < 427 * TtHitAverageResolution * TtHitAverageWindow / 1024)) { Depth r = reduction(improving, depth, moveCount); @@ -1186,7 +1175,7 @@ moves_loop: // When in check, search starts from here r--; // Decrease reduction if the ttHit running average is large - if (thisThread->ttHitAverage > 473 * TtHitAverageResolution * TtHitAverageWindow / 1024) + if (thisThread->ttHitAverage > 509 * TtHitAverageResolution * TtHitAverageWindow / 1024) r--; // Reduction if other threads are searching this position @@ -1229,17 +1218,17 @@ moves_loop: // When in check, search starts from here + (*contHist[0])[movedPiece][to_sq(move)] + (*contHist[1])[movedPiece][to_sq(move)] + (*contHist[3])[movedPiece][to_sq(move)] - - 4826; + - 5287; // Decrease/increase reduction by comparing opponent's stat score (~10 Elo) - if (ss->statScore >= -100 && (ss-1)->statScore < -112) + if (ss->statScore >= -106 && (ss-1)->statScore < -104) r--; - else if ((ss-1)->statScore >= -125 && ss->statScore < -138) + else if ((ss-1)->statScore >= -119 && ss->statScore < -140) r++; // Decrease/increase reduction for moves with a good/bad history (~30 Elo) - r -= ss->statScore / 14615; + r -= ss->statScore / 14884; } else { @@ -1249,11 +1238,11 @@ moves_loop: // When in check, search starts from here // Unless giving check, this capture is likely bad if ( !givesCheck - && ss->staticEval + PieceValue[EG][pos.captured_piece()] + 211 * depth <= alpha) + && ss->staticEval + PieceValue[EG][pos.captured_piece()] + 213 * depth <= alpha) r++; } - Depth d = Utility::clamp(newDepth - r, 1, newDepth); + Depth d = std::clamp(newDepth - r, 1, newDepth); value = -search(pos, ss+1, -(alpha+1), -alpha, d, true); @@ -1268,7 +1257,7 @@ moves_loop: // When in check, search starts from here didLMR = false; } - // Step 17. Full depth search when LMR is skipped or fails high + // Step 16. Full depth search when LMR is skipped or fails high if (doFullDepthSearch) { value = -search(pos, ss+1, -(alpha+1), -alpha, newDepth, !cutNode); @@ -1296,12 +1285,12 @@ moves_loop: // When in check, search starts from here value = -search(pos, ss+1, -beta, -alpha, newDepth, false); } - // Step 18. Undo move + // Step 17. Undo move pos.undo_move(move); assert(value > -VALUE_INFINITE && value < VALUE_INFINITE); - // Step 19. Check for a new best move + // Step 18. Check for a new best move // Finished searching the move. If a stop occurred, the return value of // the search cannot be trusted, and we return immediately without // updating best move, PV and TT. @@ -1378,7 +1367,7 @@ moves_loop: // When in check, search starts from here return VALUE_DRAW; */ - // Step 20. Check for mate and stalemate + // Step 19. Check for mate and stalemate // All legal moves have been searched and if there are no legal moves, it // must be a mate or a stalemate. If we are in a singular extension search then // return a fail low score. @@ -1511,7 +1500,7 @@ moves_loop: // When in check, search starts from here if (PvNode && bestValue > alpha) alpha = bestValue; - futilityBase = bestValue + 141; + futilityBase = bestValue + 145; } const PieceToHistory* contHist[] = { (ss-1)->continuationHistory, (ss-2)->continuationHistory, @@ -1545,6 +1534,10 @@ moves_loop: // When in check, search starts from here { assert(type_of(move) != ENPASSANT); // Due to !pos.advanced_pawn_push + // moveCount pruning + if (moveCount > 2) + continue; + futilityValue = futilityBase + PieceValue[EG][pos.piece_on(to_sq(move))]; if (futilityValue <= alpha) @@ -1586,6 +1579,12 @@ moves_loop: // When in check, search starts from here [pos.moved_piece(move)] [to_sq(move)]; + if ( !captureOrPromotion + && moveCount >= abs(depth) + 1 + && (*contHist[0])[pos.moved_piece(move)][to_sq(move)] < CounterMovePruneThreshold + && (*contHist[1])[pos.moved_piece(move)][to_sq(move)] < CounterMovePruneThreshold) + continue; + // Make and search the move pos.do_move(move, st, givesCheck); value = -qsearch(pos, ss+1, -beta, -alpha, depth - 1); @@ -1768,7 +1767,7 @@ moves_loop: // When in check, search starts from here } if (depth > 11 && ss->ply < MAX_LPH) - thisThread->lowPlyHistory[ss->ply][from_to(move)] << stat_bonus(depth - 6); + thisThread->lowPlyHistory[ss->ply][from_to(move)] << stat_bonus(depth - 7); } // When playing with strength handicap, choose best move among a set of RootMoves diff --git a/src/thread_win32_osx.h b/src/thread_win32_osx.h index c4b55a48..75ef5d9a 100644 --- a/src/thread_win32_osx.h +++ b/src/thread_win32_osx.h @@ -27,7 +27,7 @@ /// The implementation calls pthread_create() with the stack size parameter /// equal to the linux 8MB default, on platforms that support it. -#if defined(__APPLE__) || defined(__MINGW32__) || defined(__MINGW64__) +#if defined(__APPLE__) || defined(__MINGW32__) || defined(__MINGW64__) || defined(USE_PTHREADS) #include diff --git a/src/timeman.cpp b/src/timeman.cpp index df4ba9b2..6d9c95ef 100644 --- a/src/timeman.cpp +++ b/src/timeman.cpp @@ -38,9 +38,9 @@ void TimeManagement::init(Search::LimitsType& limits, Color us, int ply) { TimePoint slowMover = TimePoint(Options["Slow Mover"]); TimePoint npmsec = TimePoint(Options["nodestime"]); - // opt_scale is a percentage of available time to use for the current move. - // max_scale is a multiplier applied to optimumTime. - double opt_scale, max_scale; + // optScale is a percentage of available time to use for the current move. + // maxScale is a multiplier applied to optimumTime. + double optScale, maxScale; // If we have to play in 'nodes as time' mode, then convert from time // to nodes, and use resulting values in time management formulas. @@ -75,22 +75,22 @@ void TimeManagement::init(Search::LimitsType& limits, Color us, int ply) { // game time for the current move, so also cap to 20% of available game time. if (limits.movestogo == 0) { - opt_scale = std::min(0.008 + std::pow(ply + 3.0, 0.5) / 250.0, + optScale = std::min(0.008 + std::pow(ply + 3.0, 0.5) / 250.0, 0.2 * limits.time[us] / double(timeLeft)); - max_scale = std::min(7.0, 4.0 + ply / 12.0); + maxScale = std::min(7.0, 4.0 + ply / 12.0); } // x moves in y seconds (+ z increment) else { - opt_scale = std::min((0.8 + ply / 128.0) / mtg, + optScale = std::min((0.8 + ply / 128.0) / mtg, 0.8 * limits.time[us] / double(timeLeft)); - max_scale = std::min(6.3, 1.5 + 0.11 * mtg); + maxScale = std::min(6.3, 1.5 + 0.11 * mtg); } // Never use more than 80% of the available time for this move - optimumTime = TimePoint(opt_scale * timeLeft); - maximumTime = TimePoint(std::min(0.8 * limits.time[us] - moveOverhead, max_scale * optimumTime)); + optimumTime = TimePoint(optScale * timeLeft); + maximumTime = TimePoint(std::min(0.8 * limits.time[us] - moveOverhead, maxScale * optimumTime)); if (Options["Ponder"]) optimumTime += optimumTime / 4; diff --git a/src/tt.cpp b/src/tt.cpp index d494c27d..60a3a5f1 100644 --- a/src/tt.cpp +++ b/src/tt.cpp @@ -37,18 +37,19 @@ void TTEntry::save(Key k, Value v, bool pv, Bound b, Depth d, Move m, Value ev) if (m || (uint16_t)k != key16) move16 = (uint16_t)m; - // Overwrite less valuable entries - if ((uint16_t)k != key16 - || d - DEPTH_OFFSET > depth8 - 4 - || b == BOUND_EXACT) + // Overwrite less valuable entries (cheapest checks first) + if (b == BOUND_EXACT + || (uint16_t)k != key16 + || d - DEPTH_OFFSET > depth8 - 4) { - assert(d >= DEPTH_OFFSET); + assert(d > DEPTH_OFFSET); + assert(d < 256 + DEPTH_OFFSET); key16 = (uint16_t)k; + depth8 = (uint8_t)(d - DEPTH_OFFSET); + genBound8 = (uint8_t)(TT.generation8 | uint8_t(pv) << 2 | b); value16 = (int16_t)v; eval16 = (int16_t)ev; - genBound8 = (uint8_t)(TT.generation8 | uint8_t(pv) << 2 | b); - depth8 = (uint8_t)(d - DEPTH_OFFSET); } } @@ -119,11 +120,11 @@ TTEntry* TranspositionTable::probe(const Key key, bool& found) const { const uint16_t key16 = (uint16_t)key; // Use the low 16 bits as key inside the cluster for (int i = 0; i < ClusterSize; ++i) - if (!tte[i].key16 || tte[i].key16 == key16) + if (tte[i].key16 == key16 || !tte[i].depth8) { tte[i].genBound8 = uint8_t(generation8 | (tte[i].genBound8 & 0x7)); // Refresh - return found = (bool)tte[i].key16, &tte[i]; + return found = (bool)tte[i].depth8, &tte[i]; } // Find an entry to be replaced according to the replacement strategy @@ -149,7 +150,7 @@ int TranspositionTable::hashfull() const { int cnt = 0; for (int i = 0; i < 1000; ++i) for (int j = 0; j < ClusterSize; ++j) - cnt += (table[i].entry[j].genBound8 & 0xF8) == generation8; + cnt += table[i].entry[j].depth8 && (table[i].entry[j].genBound8 & 0xF8) == generation8; return cnt / ClusterSize; } diff --git a/src/tt.h b/src/tt.h index c177ca52..fdfd6769 100644 --- a/src/tt.h +++ b/src/tt.h @@ -25,13 +25,13 @@ /// TTEntry struct is the 10 bytes transposition table entry, defined as below: /// /// key 16 bit -/// move 16 bit -/// value 16 bit -/// eval value 16 bit +/// depth 8 bit /// generation 5 bit /// pv node 1 bit /// bound type 2 bit -/// depth 8 bit +/// move 16 bit +/// value 16 bit +/// eval value 16 bit struct TTEntry { @@ -47,11 +47,11 @@ private: friend class TranspositionTable; uint16_t key16; + uint8_t depth8; + uint8_t genBound8; uint16_t move16; int16_t value16; int16_t eval16; - uint8_t genBound8; - uint8_t depth8; }; diff --git a/src/types.h b/src/types.h index ce4c2dbb..bcc4f77f 100644 --- a/src/types.h +++ b/src/types.h @@ -203,22 +203,6 @@ enum Piece { PIECE_NB = 16 }; -// An ID used to track the pieces. Max. 32 pieces on board. -enum PieceId { - PIECE_ID_ZERO = 0, - PIECE_ID_KING = 30, - PIECE_ID_WKING = 30, - PIECE_ID_BKING = 31, - PIECE_ID_NONE = 32 -}; - -inline PieceId operator++(PieceId& d, int) { - - PieceId x = d; - d = PieceId(int(d) + 1); - return x; -} - constexpr Value PieceValue[PHASE_NB][PIECE_NB] = { { VALUE_ZERO, PawnValueMg, KnightValueMg, BishopValueMg, RookValueMg, QueenValueMg, VALUE_ZERO, VALUE_ZERO, VALUE_ZERO, PawnValueMg, KnightValueMg, BishopValueMg, RookValueMg, QueenValueMg, VALUE_ZERO, VALUE_ZERO }, @@ -234,7 +218,8 @@ enum : int { DEPTH_QS_RECAPTURES = -5, DEPTH_NONE = -6, - DEPTH_OFFSET = DEPTH_NONE + + DEPTH_OFFSET = -7 // value used only for TT entry occupancy check }; enum Square : int { @@ -272,118 +257,20 @@ enum Rank : int { RANK_1, RANK_2, RANK_3, RANK_4, RANK_5, RANK_6, RANK_7, RANK_8, RANK_NB }; -// unique number for each piece type on each square -enum PieceSquare : uint32_t { - PS_NONE = 0, - PS_W_PAWN = 1, - PS_B_PAWN = 1 * SQUARE_NB + 1, - PS_W_KNIGHT = 2 * SQUARE_NB + 1, - PS_B_KNIGHT = 3 * SQUARE_NB + 1, - PS_W_BISHOP = 4 * SQUARE_NB + 1, - PS_B_BISHOP = 5 * SQUARE_NB + 1, - PS_W_ROOK = 6 * SQUARE_NB + 1, - PS_B_ROOK = 7 * SQUARE_NB + 1, - PS_W_QUEEN = 8 * SQUARE_NB + 1, - PS_B_QUEEN = 9 * SQUARE_NB + 1, - PS_W_KING = 10 * SQUARE_NB + 1, - PS_END = PS_W_KING, // pieces without kings (pawns included) - PS_B_KING = 11 * SQUARE_NB + 1, - PS_END2 = 12 * SQUARE_NB + 1, - - PS_NOT_INIT = PS_END2 + 1, -}; - -struct ExtPieceSquare { - PieceSquare from[COLOR_NB]; -}; - -// Array for finding the PieceSquare corresponding to the piece on the board -extern ExtPieceSquare kpp_board_index[PIECE_NB]; - -constexpr bool is_ok(PieceId pid); -constexpr Square rotate180(Square sq); - -class Position; - -// Structure holding which tracked piece (PieceId) is where (PieceSquare) -class EvalList { - -public: - // Max. number of pieces without kings is 30 but must be a multiple of 4 in AVX2 - static const int MAX_LENGTH = 32; - - // Array that holds the piece id for the pieces on the board - PieceId piece_id_list[SQUARE_NB]; - - // List of pieces, separate from White and Black POV - PieceSquare* piece_list_fw() const { return const_cast(pieceListFw); } - PieceSquare* piece_list_fb() const { return const_cast(pieceListFb); } - - // Place the piece pc with piece_id on the square sq on the board - void put_piece(PieceId piece_id, Square sq, Piece pc) - { - assert(is_ok(piece_id)); - if (pc != NO_PIECE) - { - pieceListFw[piece_id] = PieceSquare(kpp_board_index[pc].from[WHITE] + sq); - pieceListFb[piece_id] = PieceSquare(kpp_board_index[pc].from[BLACK] + rotate180(sq)); - piece_id_list[sq] = piece_id; - } - else - { - pieceListFw[piece_id] = PS_NONE; - pieceListFb[piece_id] = PS_NONE; - piece_id_list[sq] = piece_id; - } - } - - // Convert the specified piece_id piece to ExtPieceSquare type and return it - ExtPieceSquare piece_with_id(PieceId piece_id) const - { - ExtPieceSquare eps; - eps.from[WHITE] = pieceListFw[piece_id]; - eps.from[BLACK] = pieceListFb[piece_id]; - return eps; - } - - // Initialize the pieceList. - // Set the value of unused pieces to PieceSquare::PS_NONE in case you want to deal with dropped pieces. - // A normal evaluation function can be used as an evaluation function for missing frames. - // piece_no_list is initialized with PieceId::PIECE_ID_NONE to facilitate debugging. - void clear() - { - - for (auto& p : pieceListFw) - p = PieceSquare::PS_NONE; - - for (auto& p : pieceListFb) - p = PieceSquare::PS_NONE; - - for (auto& v : piece_id_list) - v = PieceId::PIECE_ID_NONE; - } - - // Check whether the pieceListFw[] held internally is a correct BonaPiece. - // Note: For debugging. slow. - bool is_valid(const Position& pos); - -private: - PieceSquare pieceListFw[MAX_LENGTH]; - PieceSquare pieceListFb[MAX_LENGTH]; -}; - -// For differential evaluation of pieces that changed since last turn +// Keep track of what a move changes on the board (used by NNUE) struct DirtyPiece { // Number of changed pieces int dirty_num; - // The ids of changed pieces, max. 2 pieces can change in one move - PieceId pieceId[2]; + // Max 3 pieces can change in one move. A promotion with capture moves + // both the pawn and the captured piece to SQ_NONE and the piece promoted + // to from SQ_NONE to the capture square. + Piece piece[3]; - // What changed from the piece with that piece number - ExtPieceSquare old_piece[2]; - ExtPieceSquare new_piece[2]; + // From and to squares, which may be SQ_NONE + Square from[3]; + Square to[3]; }; /// Score enum stores a middlegame and an endgame value in a single integer (enum). @@ -433,8 +320,6 @@ ENABLE_FULL_OPERATORS_ON(Value) ENABLE_FULL_OPERATORS_ON(Direction) ENABLE_INCR_OPERATORS_ON(Piece) -ENABLE_INCR_OPERATORS_ON(PieceSquare) -ENABLE_INCR_OPERATORS_ON(PieceId) ENABLE_INCR_OPERATORS_ON(PieceType) ENABLE_INCR_OPERATORS_ON(Square) ENABLE_INCR_OPERATORS_ON(File) @@ -523,10 +408,6 @@ inline Color color_of(Piece pc) { return Color(pc >> 3); } -constexpr bool is_ok(PieceId pid) { - return pid < PIECE_ID_NONE; -} - constexpr bool is_ok(Square s) { return s >= SQ_A1 && s <= SQ_H8; } @@ -565,7 +446,7 @@ constexpr Square to_sq(Move m) { // Return relative square when turning the board 180 degrees constexpr Square rotate180(Square sq) { - return (Square)(sq ^ 0x3F); + return (Square)(sq ^ 0x3F); } constexpr int from_to(Move m) { diff --git a/src/uci.cpp b/src/uci.cpp index 00941040..d6745d19 100644 --- a/src/uci.cpp +++ b/src/uci.cpp @@ -260,7 +260,7 @@ double UCI::win_rate_model_double(double v, int ply) { double b = (((bs[0] * m + bs[1]) * m + bs[2]) * m) + bs[3]; // Transform eval to centipawns with limited range - double x = Utility::clamp(double(100 * v) / PawnValueEg, -1000.0, 1000.0); + double x = std::clamp(double(100 * v) / PawnValueEg, -1000.0, 1000.0); // Return win rate in per mille return 1000.0 / (1 + std::exp((a - x) / b)); diff --git a/src/ucioption.cpp b/src/ucioption.cpp index ef40fe82..519160cf 100644 --- a/src/ucioption.cpp +++ b/src/ucioption.cpp @@ -79,8 +79,10 @@ void init(OptionsMap& o) { o["SyzygyProbeDepth"] << Option(1, 1, 100); o["Syzygy50MoveRule"] << Option(true); o["SyzygyProbeLimit"] << Option(7, 0, 7); - o["Use NNUE"] << Option(false, on_use_NNUE); - o["EvalFile"] << Option("nn-9931db908a9b.nnue", on_eval_file); + o["Use NNUE"] << Option(true, on_use_NNUE); + // The default must follow the format nn-[SHA256 first 12 digits].nnue + // for the build process (profile-build and fishtest) to work. + o["EvalFile"] << Option("nn-82215d0fd0df.nnue", on_eval_file); #ifdef EVAL_NNUE // When the evaluation function is loaded at the ucinewgame timing, it is necessary to convert the new evaluation function. // I want to hit the test eval convert command, but there is no new evaluation function diff --git a/tests/instrumented.sh b/tests/instrumented.sh index ae6d5c4b..03ded74a 100755 --- a/tests/instrumented.sh +++ b/tests/instrumented.sh @@ -70,7 +70,7 @@ for args in "eval" \ "go depth 10" \ "go movetime 1000" \ "go wtime 8000 btime 8000 winc 500 binc 500" \ - "bench 128 $threads 10 default depth" + "bench 128 $threads 8 default depth" do echo "$prefix $exeprefix ./stockfish $args $postfix" @@ -80,7 +80,7 @@ done # more general testing, following an uci protocol exchange cat << EOF > game.exp - set timeout 10 + set timeout 240 spawn $exeprefix ./stockfish send "uci\n" @@ -98,7 +98,7 @@ cat << EOF > game.exp expect "bestmove" send "position fen 5rk1/1K4p1/8/8/3B4/8/8/8 b - - 0 1\n" - send "go depth 30\n" + send "go depth 20\n" expect "bestmove" send "quit\n" @@ -121,7 +121,7 @@ cat << EOF > syzygy.exp send "uci\n" send "setoption name SyzygyPath value ../tests/syzygy/\n" expect "info string Found 35 tablebases" {} timeout {exit 1} - send "bench 128 1 10 default depth\n" + send "bench 128 1 8 default depth\n" send "quit\n" expect eof