diff --git a/.github/workflows/stockfish.yml b/.github/workflows/stockfish.yml index 07ecfc07..99c4259a 100644 --- a/.github/workflows/stockfish.yml +++ b/.github/workflows/stockfish.yml @@ -1,6 +1,8 @@ name: Stockfish on: push: + tags: + - '*' branches: - master - tools @@ -10,6 +12,31 @@ on: - master - tools jobs: + Prerelease: + if: github.ref == 'refs/heads/master' + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v3 + with: + fetch-depth: 0 + + # returns null if no pre-release exists + - name: Get Commit SHA of Latest Pre-release + run: | + # Install required packages + sudo apt-get update + sudo apt-get install -y curl jq + + echo "COMMIT_SHA=$(jq -r 'map(select(.prerelease)) | first | .tag_name' <<< $(curl -s https://api.github.com/repos/${{ github.repository_owner }}/Stockfish/releases))" >> $GITHUB_ENV + + # delete old previous pre-release and tag + - uses: dev-drprasad/delete-tag-and-release@v0.2.1 + if: env.COMMIT_SHA != 'null' + with: + tag_name: ${{ env.COMMIT_SHA }} + env: + GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} + Sanitizers: uses: ./.github/workflows/stockfish_sanitizers.yml Tests: @@ -17,5 +44,8 @@ jobs: Compiles: uses: ./.github/workflows/stockfish_compile_test.yml Binaries: - if: github.ref == 'refs/heads/master' + if: github.ref == 'refs/heads/master' || (startsWith(github.ref_name, 'sf_') && github.ref_type == 'tag') uses: ./.github/workflows/stockfish_binaries.yml + ARM_Binaries: + if: github.ref == 'refs/heads/master' || (startsWith(github.ref_name, 'sf_') && github.ref_type == 'tag') + uses: ./.github/workflows/stockfish_arm_binaries.yml diff --git a/.github/workflows/stockfish_arm_binaries.yml b/.github/workflows/stockfish_arm_binaries.yml new file mode 100644 index 00000000..52105eb6 --- /dev/null +++ b/.github/workflows/stockfish_arm_binaries.yml @@ -0,0 +1,158 @@ +name: Stockfish +on: + workflow_call: +jobs: + Stockfish: + name: ${{ matrix.config.name }} ${{ matrix.binaries }} + runs-on: ${{ matrix.config.os }} + env: + COMPILER: ${{ matrix.config.compiler }} + COMP: ${{ matrix.config.comp }} + EMU: ${{ matrix.config.emu }} + EXT: ${{ matrix.config.ext }} + OS: ${{ matrix.config.os }} + BINARY: ${{ matrix.binaries }} + strategy: + matrix: + config: + - name: Android NDK aarch64 + os: ubuntu-22.04 + compiler: aarch64-linux-android21-clang++ + emu: qemu-aarch64 + comp: ndk + shell: bash {0} + - name: Android NDK arm + os: ubuntu-22.04 + compiler: armv7a-linux-androideabi21-clang++ + emu: qemu-arm + comp: ndk + shell: bash {0} + binaries: + - armv8 + - armv7 + - armv7-neon + exclude: + - binaries: armv8 + config: {compiler: armv7a-linux-androideabi21-clang++} + - binaries: armv7 + config: {compiler: aarch64-linux-android21-clang++} + - binaries: armv7-neon + config: {compiler: aarch64-linux-android21-clang++} + defaults: + run: + working-directory: src + shell: ${{ matrix.config.shell }} + steps: + - uses: actions/checkout@v3 + with: + fetch-depth: 0 + + - name: Download required linux packages + if: runner.os == 'Linux' + run: | + sudo apt update + sudo apt install qemu-user + + - name: Install NDK + if: runner.os == 'Linux' + run: | + if [ $COMP == ndk ]; then + NDKV="21.4.7075529" + ANDROID_ROOT=/usr/local/lib/android + ANDROID_SDK_ROOT=$ANDROID_ROOT/sdk + SDKMANAGER=$ANDROID_SDK_ROOT/cmdline-tools/latest/bin/sdkmanager + echo "y" | $SDKMANAGER "ndk;$NDKV" + ANDROID_NDK_ROOT=$ANDROID_SDK_ROOT/ndk/$NDKV + ANDROID_NDK_BIN=$ANDROID_NDK_ROOT/toolchains/llvm/prebuilt/linux-x86_64/bin + echo "ANDROID_NDK_BIN=$ANDROID_NDK_BIN" >> $GITHUB_ENV + fi + + - name: Download the used network from the fishtest framework + run: make net + + - name: Check compiler + run: | + if [ $COMP == ndk ]; then + export PATH=${{ env.ANDROID_NDK_BIN }}:$PATH + fi + $COMPILER -v + + - name: Test help target + run: make help + + - name: Check git + run: git --version + + # Compile profile guided builds + + - name: Compile ${{ matrix.binaries }} build + run: | + if [ $COMP == ndk ]; then + export PATH=${{ env.ANDROID_NDK_BIN }}:$PATH + export LDFLAGS="-static -Wno-unused-command-line-argument" + fi + make clean + make -j2 profile-build ARCH=$BINARY COMP=$COMP WINE_PATH=$EMU + make strip ARCH=$BINARY COMP=$COMP + mv ./stockfish$EXT ../stockfish-android-$BINARY$EXT + + - name: Remove non src files + run: rm -f *.o .depend *.nnue + + - name: Download wiki + run: | + git clone https://github.com/official-stockfish/Stockfish.wiki.git ../wiki + cd ../wiki + rm -rf .git + + - name: Create tar archive. + run: | + cd .. + mkdir stockfish + cp -r wiki stockfish/ + cp -r src stockfish/ + cp stockfish-android-$BINARY$EXT stockfish/ + cp "Top CPU Contributors.txt" stockfish/ + cp Copying.txt stockfish/ + cp AUTHORS stockfish/ + cp CITATION.cff stockfish/ + cp README.md stockfish/ + tar -cvf stockfish-android-$BINARY.tar stockfish + + - name: Upload binaries + uses: actions/upload-artifact@v3 + with: + name: stockfish-android-${{ matrix.binaries }} + path: stockfish-android-${{ matrix.binaries }}.tar + + - name: Release + if: startsWith(github.ref_name, 'sf_') && github.ref_type == 'tag' + uses: softprops/action-gh-release@v1 + with: + files: stockfish-android-${{ matrix.binaries }}.tar + + - name: Get last commit sha + id: last_commit + run: echo "COMMIT_SHA=$(git rev-parse HEAD | cut -c 1-8)" >> $GITHUB_ENV + + - name: Get commit date + id: commit_date + run: echo "COMMIT_DATE=$(git show -s --date=format:'%Y%m%d' --format=%cd HEAD)" >> $GITHUB_ENV + + # Make sure that an old ci which still runs on master doesn't recreate a prerelease + - name: Check Pullable Commits + id: check_commits + run: | + git fetch + CHANGES=$(git rev-list HEAD..origin/master --count) + echo "CHANGES=$CHANGES" >> $GITHUB_ENV + + - name: Prerelease + if: github.ref_name == 'master' && env.CHANGES == '0' + continue-on-error: true + uses: softprops/action-gh-release@v1 + with: + name: Stockfish dev-${{ env.COMMIT_DATE }}-${{ env.COMMIT_SHA }} + tag_name: stockfish-dev-${{ env.COMMIT_DATE }}-${{ env.COMMIT_SHA }} + prerelease: true + files: stockfish-android-${{ matrix.binaries }}.tar \ No newline at end of file diff --git a/.github/workflows/stockfish_binaries.yml b/.github/workflows/stockfish_binaries.yml index 0b205ded..0a53cb03 100644 --- a/.github/workflows/stockfish_binaries.yml +++ b/.github/workflows/stockfish_binaries.yml @@ -9,35 +9,35 @@ jobs: COMPILER: ${{ matrix.config.compiler }} COMP: ${{ matrix.config.comp }} EXT: ${{ matrix.config.ext }} - OS: ${{ matrix.config.os }} + NAME: ${{ matrix.config.simple_name }} BINARY: ${{ matrix.binaries }} strategy: matrix: config: - - { - name: "Ubuntu 20.04 GCC", - os: ubuntu-20.04, - compiler: g++, - comp: gcc, - shell: 'bash {0}' - } - - { - name: "MacOS 12 Apple Clang", - os: macos-12, - compiler: clang++, - comp: clang, - shell: 'bash {0}' - } - - { - name: "Windows 2022 Mingw-w64 GCC x86_64", - os: windows-2022, - compiler: g++, - comp: mingw, - msys_sys: 'mingw64', - msys_env: 'x86_64-gcc', - shell: 'msys2 {0}', - ext: .exe - } + - name: Ubuntu 20.04 GCC + os: ubuntu-20.04 + simple_name: ubuntu + compiler: g++ + comp: gcc + shell: bash {0} + archive_ext: tar + - name: MacOS 12 Apple Clang + os: macos-12 + simple_name: macos + compiler: clang++ + comp: clang + shell: bash {0} + archive_ext: tar + - name: Windows 2022 Mingw-w64 GCC x86_64 + os: windows-2022 + simple_name: windows + compiler: g++ + comp: mingw + msys_sys: mingw64 + msys_env: x86_64-gcc + shell: msys2 {0} + ext: .exe + archive_ext: zip binaries: - x86-64 - x86-64-modern @@ -56,55 +56,113 @@ jobs: - name: Download required linux packages if: runner.os == 'Linux' - run: | - sudo apt update + run: sudo apt update - name: Setup msys and install required packages if: runner.os == 'Windows' uses: msys2/setup-msys2@v2 with: - msystem: ${{matrix.config.msys_sys}} - install: mingw-w64-${{matrix.config.msys_env}} make git expect + msystem: ${{ matrix.config.msys_sys }} + install: mingw-w64-${{ matrix.config.msys_env }} make git zip - name: Download the used network from the fishtest framework - run: | - make net + run: make net - name: Check compiler - run: | - export PATH=$PATH:$ANDROID_NDK_HOME/toolchains/llvm/prebuilt/linux-x86_64/bin - $COMPILER -v + run: $COMPILER -v - name: Test help target - run: | - make help + run: make help + + - name: Check git + run: git --version # Compile profile guided builds - name: Compile ${{ matrix.binaries }} build run: | - make clean make -j2 profile-build ARCH=$BINARY COMP=$COMP - strip ./stockfish$EXT - mv ./stockfish$EXT ../stockfish-$OS-$BINARY$EXT + make strip ARCH=$BINARY COMP=$COMP + mv ./stockfish$EXT ../stockfish-$NAME-$BINARY$EXT - name: Remove non src files - run: rm -f *.o .depend *.nnue + run: git clean -fx - - name: Create tar archive. + - name: Download wiki + run: | + git clone https://github.com/official-stockfish/Stockfish.wiki.git ../wiki + rm -rf ../wiki/.git + + - name: Create directory. run: | cd .. mkdir stockfish + cp -r wiki stockfish/ cp -r src stockfish/ - cp stockfish-$OS-$BINARY$EXT stockfish/ + cp stockfish-$NAME-$BINARY$EXT stockfish/ cp "Top CPU Contributors.txt" stockfish/ cp Copying.txt stockfish/ cp AUTHORS stockfish/ - tar -cvf stockfish-$OS-$BINARY.tar stockfish + cp CITATION.cff stockfish/ + cp README.md stockfish/ + + - name: Create tar + if: runner.os != 'Windows' + run: | + cd .. + tar -cvf stockfish-$NAME-$BINARY.tar stockfish + + - name: Create zip + if: runner.os == 'Windows' + run: | + cd .. + zip -r stockfish-$NAME-$BINARY.zip stockfish - name: Upload binaries + if: runner.os != 'Windows' uses: actions/upload-artifact@v3 with: name: stockfish-${{ matrix.config.os }}-${{ matrix.binaries }} - path: | - stockfish-${{ matrix.config.os }}-${{ matrix.binaries }}.tar + path: stockfish-${{ matrix.config.simple_name }}-${{ matrix.binaries }}.tar + + # Artifacts automatically get zipped + # to avoid double zipping, we use the unzipped directory + - name: Upload binaries + if: runner.os == 'Windows' + uses: actions/upload-artifact@v3 + with: + name: stockfish-${{ matrix.config.os }}-${{ matrix.binaries }} + path: stockfish + + - name: Release + if: startsWith(github.ref_name, 'sf_') && github.ref_type == 'tag' + uses: softprops/action-gh-release@v1 + with: + files: stockfish-${{ matrix.config.simple_name }}-${{ matrix.binaries }}.${{ matrix.config.archive_ext }} + + - name: Get last commit sha + id: last_commit + run: echo "COMMIT_SHA=$(git rev-parse HEAD | cut -c 1-8)" >> $GITHUB_ENV + + - name: Get commit date + id: commit_date + run: echo "COMMIT_DATE=$(git show -s --date=format:'%Y%m%d' --format=%cd HEAD)" >> $GITHUB_ENV + + # Make sure that an old ci which still runs on master doesn't recreate a prerelease + - name: Check Pullable Commits + id: check_commits + run: | + git fetch + CHANGES=$(git rev-list HEAD..origin/master --count) + echo "CHANGES=$CHANGES" >> $GITHUB_ENV + + - name: Prerelease + if: github.ref_name == 'master' && env.CHANGES == '0' + continue-on-error: true + uses: softprops/action-gh-release@v1 + with: + name: Stockfish dev-${{ env.COMMIT_DATE }}-${{ env.COMMIT_SHA }} + tag_name: stockfish-dev-${{ env.COMMIT_DATE }}-${{ env.COMMIT_SHA }} + prerelease: true + files: stockfish-${{ matrix.config.simple_name }}-${{ matrix.binaries }}.${{ matrix.config.archive_ext }} + diff --git a/.github/workflows/stockfish_compile_test.yml b/.github/workflows/stockfish_compile_test.yml index 63136737..c7280a85 100644 --- a/.github/workflows/stockfish_compile_test.yml +++ b/.github/workflows/stockfish_compile_test.yml @@ -11,52 +11,40 @@ jobs: strategy: matrix: config: - - { - name: "Ubuntu 20.04 GCC", - os: ubuntu-20.04, - compiler: g++, - comp: gcc, - shell: 'bash {0}' - } - - { - name: "Ubuntu 20.04 Clang", - os: ubuntu-20.04, - compiler: clang++, - comp: clang, - shell: 'bash {0}' - } - - { - name: "MacOS 12 Apple Clang", - os: macos-12, - compiler: clang++, - comp: clang, - shell: 'bash {0}' - } - - { - name: "MacOS 12 GCC 11", - os: macos-12, - compiler: g++-11, - comp: gcc, - shell: 'bash {0}' - } - - { - name: "Windows 2022 Mingw-w64 GCC x86_64", - os: windows-2022, - compiler: g++, - comp: mingw, - msys_sys: 'mingw64', - msys_env: 'x86_64-gcc', - shell: 'msys2 {0}' - } - - { - name: "Windows 2022 Mingw-w64 Clang x86_64", - os: windows-2022, - compiler: clang++, - comp: clang, - msys_sys: 'clang64', - msys_env: 'clang-x86_64-clang', - shell: 'msys2 {0}' - } + - name: Ubuntu 20.04 GCC + os: ubuntu-20.04 + compiler: g++ + comp: gcc + shell: bash {0} + - name: Ubuntu 20.04 Clang + os: ubuntu-20.04 + compiler: clang++ + comp: clang + shell: bash {0} + - name: MacOS 12 Apple Clang + os: macos-12 + compiler: clang++ + comp: clang + shell: bash {0} + - name: MacOS 12 GCC 11 + os: macos-12 + compiler: g++-11 + comp: gcc + shell: bash {0} + - name: Windows 2022 Mingw-w64 GCC x86_64 + os: windows-2022 + compiler: g++ + comp: mingw + msys_sys: mingw64 + msys_env: x86_64-gcc + shell: msys2 {0} + - name: Windows 2022 Mingw-w64 Clang x86_64 + os: windows-2022 + compiler: clang++ + comp: clang + msys_sys: clang64 + msys_env: clang-x86_64-clang + shell: msys2 {0} defaults: run: @@ -72,20 +60,19 @@ jobs: uses: msys2/setup-msys2@v2 with: msystem: ${{matrix.config.msys_sys}} - install: mingw-w64-${{matrix.config.msys_env}} make git expect + install: mingw-w64-${{matrix.config.msys_env}} make git - name: Download the used network from the fishtest framework - run: | - make net + run: make net - name: Check compiler - run: | - export PATH=$PATH:$ANDROID_NDK_HOME/toolchains/llvm/prebuilt/linux-x86_64/bin - $COMPILER -v + run: $COMPILER -v - name: Test help target - run: | - make help + run: make help + + - name: Check git + run: git --version # x86-64 with newer extensions tests @@ -112,4 +99,4 @@ jobs: - name: Compile x86-64-vnni256 build run: | make clean - make -j2 ARCH=x86-64-vnni256 build \ No newline at end of file + make -j2 ARCH=x86-64-vnni256 build diff --git a/.github/workflows/stockfish_sanitizers.yml b/.github/workflows/stockfish_sanitizers.yml index 61eaf0c9..708c9227 100644 --- a/.github/workflows/stockfish_sanitizers.yml +++ b/.github/workflows/stockfish_sanitizers.yml @@ -12,34 +12,24 @@ jobs: strategy: matrix: config: - - { - name: "Ubuntu 20.04 GCC", - os: ubuntu-20.04, - compiler: g++, - comp: gcc, - shell: 'bash {0}' - } + - name: Ubuntu 20.04 GCC + os: ubuntu-20.04 + compiler: g++ + comp: gcc + shell: bash {0} sanitizers: - - { - name: Run with thread sanitizer, - make_option: sanitize=thread, - instrumented_option: sanitizer-thread - } - - { - name: Run with UB sanitizer, - make_option: sanitize=undefined, - instrumented_option: sanitizer-undefined - } - - { - name: Run under valgrind, - make_option: "", - instrumented_option: valgrind - } - - { - name: Run under valgrind-thread, - make_option: "", - instrumented_option: valgrind-thread - } + - name: Run with thread sanitizer + make_option: sanitize=thread + instrumented_option: sanitizer-thread + - name: Run with UB sanitizer + make_option: sanitize=undefined + instrumented_option: sanitizer-undefined + - name: Run under valgrind + make_option: "" + instrumented_option: valgrind + - name: Run under valgrind-thread + make_option: "" + instrumented_option: valgrind-thread defaults: run: working-directory: src @@ -52,20 +42,19 @@ jobs: - name: Download required linux packages run: | sudo apt update - sudo apt install expect valgrind g++-multilib qemu-user + sudo apt install expect valgrind g++-multilib - name: Download the used network from the fishtest framework - run: | - make net + run: make net - name: Check compiler - run: | - export PATH=$PATH:$ANDROID_NDK_HOME/toolchains/llvm/prebuilt/linux-x86_64/bin - $COMPILER -v + run: $COMPILER -v - name: Test help target - run: | - make help + run: make help + + - name: Check git + run: git --version # Sanitizers @@ -74,4 +63,4 @@ jobs: export CXXFLAGS="-O1 -fno-inline" make clean make -j2 ARCH=x86-64-modern ${{ matrix.sanitizers.make_option }} debug=yes optimize=no build > /dev/null - ../tests/instrumented.sh --${{ matrix.sanitizers.instrumented_option }} \ No newline at end of file + ../tests/instrumented.sh --${{ matrix.sanitizers.instrumented_option }} diff --git a/.github/workflows/stockfish_test.yml b/.github/workflows/stockfish_test.yml index 46b4e26f..28218402 100644 --- a/.github/workflows/stockfish_test.yml +++ b/.github/workflows/stockfish_test.yml @@ -12,95 +12,68 @@ jobs: strategy: matrix: config: - - { - name: "Ubuntu 20.04 GCC", - os: ubuntu-20.04, - compiler: g++, - comp: gcc, - run_32bit_tests: true, - run_64bit_tests: true, - shell: 'bash {0}' - } - - { - name: "Ubuntu 20.04 Clang", - os: ubuntu-20.04, - compiler: clang++, - comp: clang, - run_32bit_tests: true, - run_64bit_tests: true, - shell: 'bash {0}' - } - - { - name: "Ubuntu 20.04 NDK armv8", - os: ubuntu-20.04, - compiler: aarch64-linux-android21-clang++, - comp: ndk, - run_armv8_tests: false, - shell: 'bash {0}' - } - - { - name: "Ubuntu 20.04 NDK armv7", - os: ubuntu-20.04, - compiler: armv7a-linux-androideabi21-clang++, - comp: ndk, - run_armv7_tests: false, - shell: 'bash {0}' - } - - { - name: "MacOS 12 Apple Clang", - os: macos-12, - compiler: clang++, - comp: clang, - run_64bit_tests: true, - shell: 'bash {0}' - } - - { - name: "MacOS 12 GCC 11", - os: macos-12, - compiler: g++-11, - comp: gcc, - run_64bit_tests: true, - shell: 'bash {0}' - } - - { - name: "Windows 2022 Mingw-w64 GCC x86_64", - os: windows-2022, - compiler: g++, - comp: mingw, - run_64bit_tests: true, - msys_sys: 'mingw64', - msys_env: 'x86_64-gcc', - shell: 'msys2 {0}' - } - - { - name: "Windows 2022 Mingw-w64 GCC i686", - os: windows-2022, - compiler: g++, - comp: mingw, - run_32bit_tests: true, - msys_sys: 'mingw32', - msys_env: 'i686-gcc', - shell: 'msys2 {0}' - } - - { - name: "Windows 2022 Mingw-w64 Clang x86_64", - os: windows-2022, - compiler: clang++, - comp: clang, - run_64bit_tests: true, - msys_sys: 'clang64', - msys_env: 'clang-x86_64-clang', - shell: 'msys2 {0}' - } - exclude: - - config: - { - name: "Ubuntu 20.04 NDK armv7" - } - - config: - { - name: "Ubuntu 20.04 NDK armv8" - } + - name: Ubuntu 20.04 GCC + os: ubuntu-20.04 + compiler: g++ + comp: gcc + run_32bit_tests: true + run_64bit_tests: true + shell: bash {0} + - name: Ubuntu 20.04 Clang + os: ubuntu-20.04 + compiler: clang++ + comp: clang + run_32bit_tests: true + run_64bit_tests: true + shell: bash {0} + - name: Android NDK aarch64 + os: ubuntu-22.04 + compiler: aarch64-linux-android21-clang++ + comp: ndk + run_armv8_tests: true + shell: bash {0} + - name: Android NDK arm + os: ubuntu-22.04 + compiler: armv7a-linux-androideabi21-clang++ + comp: ndk + run_armv7_tests: true + shell: bash {0} + - name: MacOS 12 Apple Clang + os: macos-12 + compiler: clang++ + comp: clang + run_64bit_tests: true + shell: bash {0} + - name: MacOS 12 GCC 11 + os: macos-12 + compiler: g++-11 + comp: gcc + run_64bit_tests: true + shell: bash {0} + - name: Windows 2022 Mingw-w64 GCC x86_64 + os: windows-2022 + compiler: g++ + comp: mingw + run_64bit_tests: true + msys_sys: mingw64 + msys_env: x86_64-gcc + shell: msys2 {0} + - name: Windows 2022 Mingw-w64 GCC i686 + os: windows-2022 + compiler: g++ + comp: mingw + run_32bit_tests: true + msys_sys: mingw32 + msys_env: i686-gcc + shell: msys2 {0} + - name: Windows 2022 Mingw-w64 Clang x86_64 + os: windows-2022 + compiler: clang++ + comp: clang + run_64bit_tests: true + msys_sys: clang64 + msys_env: clang-x86_64-clang + shell: msys2 {0} defaults: run: working-directory: src @@ -116,30 +89,47 @@ jobs: sudo apt update sudo apt install expect valgrind g++-multilib qemu-user + - name: Install NDK + if: runner.os == 'Linux' + run: | + if [ $COMP == ndk ]; then + NDKV="21.4.7075529" + ANDROID_ROOT=/usr/local/lib/android + ANDROID_SDK_ROOT=$ANDROID_ROOT/sdk + SDKMANAGER=$ANDROID_SDK_ROOT/cmdline-tools/latest/bin/sdkmanager + echo "y" | $SDKMANAGER "ndk;$NDKV" + ANDROID_NDK_ROOT=$ANDROID_SDK_ROOT/ndk/$NDKV + ANDROID_NDK_BIN=$ANDROID_NDK_ROOT/toolchains/llvm/prebuilt/linux-x86_64/bin + echo "ANDROID_NDK_BIN=$ANDROID_NDK_BIN" >> $GITHUB_ENV + fi + - name: Setup msys and install required packages if: runner.os == 'Windows' uses: msys2/setup-msys2@v2 with: - msystem: ${{matrix.config.msys_sys}} - install: mingw-w64-${{matrix.config.msys_env}} make git expect + msystem: ${{ matrix.config.msys_sys }} + install: mingw-w64-${{ matrix.config.msys_env }} make git expect - name: Download the used network from the fishtest framework - run: | - make net + run: make net - name: Extract the bench number from the commit history run: | - git log HEAD | grep "\b[Bb]ench[ :]\+[0-9]\{7\}" | head -n 1 | sed "s/[^0-9]*\([0-9]*\).*/\1/g" > git_sig + git log HEAD | grep -o "\b[Bb]ench[ :]\+[1-9][0-9]\{5,9\}\b" | head -n 1 | sed "s/[^0-9]//g" > git_sig [ -s git_sig ] && echo "benchref=$(cat git_sig)" >> $GITHUB_ENV && echo "Reference bench:" $(cat git_sig) || echo "No bench found" - name: Check compiler run: | - export PATH=$PATH:$ANDROID_NDK_HOME/toolchains/llvm/prebuilt/linux-x86_64/bin + if [ $COMP == ndk ]; then + export PATH=${{ env.ANDROID_NDK_BIN }}:$PATH + fi $COMPILER -v - name: Test help target - run: | - make help + run: make help + + - name: Check git + run: git --version # x86-32 tests @@ -229,13 +219,7 @@ jobs: - name: Test armv8 build if: ${{ matrix.config.run_armv8_tests }} run: | - ANDROID_ROOT=/usr/local/lib/android - ANDROID_SDK_ROOT=${ANDROID_ROOT}/sdk - SDKMANAGER=${ANDROID_SDK_ROOT}/cmdline-tools/latest/bin/sdkmanager - echo "y" | $SDKMANAGER "ndk;21.4.7075529" - ANDROID_NDK_ROOT=${ANDROID_SDK_ROOT}/ndk-bundle - ln -sfn $ANDROID_SDK_ROOT/ndk/21.4.7075529 $ANDROID_NDK_ROOT - export PATH=$ANDROID_NDK_HOME/toolchains/llvm/prebuilt/linux-x86_64/bin:$PATH + export PATH=${{ env.ANDROID_NDK_BIN }}:$PATH export LDFLAGS="-static -Wno-unused-command-line-argument" make clean make -j2 ARCH=armv8 build @@ -246,13 +230,7 @@ jobs: - name: Test armv7 build if: ${{ matrix.config.run_armv7_tests }} run: | - ANDROID_ROOT=/usr/local/lib/android - ANDROID_SDK_ROOT=${ANDROID_ROOT}/sdk - SDKMANAGER=${ANDROID_SDK_ROOT}/cmdline-tools/latest/bin/sdkmanager - echo "y" | $SDKMANAGER "ndk;21.4.7075529" - ANDROID_NDK_ROOT=${ANDROID_SDK_ROOT}/ndk-bundle - ln -sfn $ANDROID_SDK_ROOT/ndk/21.4.7075529 $ANDROID_NDK_ROOT - export PATH=$ANDROID_NDK_HOME/toolchains/llvm/prebuilt/linux-x86_64/bin:$PATH + export PATH=${{ env.ANDROID_NDK_BIN }}:$PATH export LDFLAGS="-static -Wno-unused-command-line-argument" make clean make -j2 ARCH=armv7 build @@ -261,13 +239,7 @@ jobs: - name: Test armv7-neon build if: ${{ matrix.config.run_armv7_tests }} run: | - ANDROID_ROOT=/usr/local/lib/android - ANDROID_SDK_ROOT=${ANDROID_ROOT}/sdk - SDKMANAGER=${ANDROID_SDK_ROOT}/cmdline-tools/latest/bin/sdkmanager - echo "y" | $SDKMANAGER "ndk;21.4.7075529" - ANDROID_NDK_ROOT=${ANDROID_SDK_ROOT}/ndk-bundle - ln -sfn $ANDROID_SDK_ROOT/ndk/21.4.7075529 $ANDROID_NDK_ROOT - export PATH=$ANDROID_NDK_HOME/toolchains/llvm/prebuilt/linux-x86_64/bin:$PATH + export PATH=${{ env.ANDROID_NDK_BIN }}:$PATH export LDFLAGS="-static -Wno-unused-command-line-argument" make clean make -j2 ARCH=armv7-neon build @@ -281,4 +253,4 @@ jobs: make clean make -j2 ARCH=x86-64-modern build ../tests/perft.sh - ../tests/reprosearch.sh \ No newline at end of file + ../tests/reprosearch.sh diff --git a/AUTHORS b/AUTHORS index 432d9b90..ff224954 100644 --- a/AUTHORS +++ b/AUTHORS @@ -1,17 +1,15 @@ -# List of authors for Stockfish - -# Founders of the Stockfish project and fishtest infrastructure +# Founders of the Stockfish project and Fishtest infrastructure Tord Romstad (romstad) Marco Costalba (mcostalba) Joona Kiiski (zamar) Gary Linscott (glinscott) -# Authors and inventors of NNUE, training, NNUE port +# Authors and inventors of NNUE, training, and NNUE port Yu Nasu (ynasu87) Motohiro Isozaki (yaneurao) Hisayori Noda (nodchip) -# all other authors of the code in alphabetical order +# All other authors of Stockfish code (in alphabetical order) Aditya (absimaldata) Adrian Petrescu (apetresc) Ajith Chandy Jose (ajithcj) @@ -21,6 +19,7 @@ Alexander Kure Alexander Pagel (Lolligerhans) Alfredo Menezes (lonfom169) Ali AlZhrani (Cooffe) +Andreas Matthies (Matthies) Andrei Vetrov (proukornew) Andrew Grant (AndyGrant) Andrey Neporada (nepal) @@ -35,6 +34,7 @@ Ben Chaney (Chaneybenjamini) Ben Koshy (BKSpurgeon) Bill Henry (VoyagerOne) Bojun Guo (noobpwnftw, Nooby) +borg323 Boštjan Mejak (PedanticHacker) braich Brian Sheppard (SapphireBrand, briansheppard-toast) @@ -46,12 +46,12 @@ Chess13234 Chris Cain (ceebo) clefrks Dale Weiler (graphitemaster) -Dan Schmidt (dfannius) Daniel Axtens (daxtens) Daniel Dugovic (ddugovic) +Dan Schmidt (dfannius) Dariusz Orzechowski (dorzechowski) -David Zar David (dav1312) +David Zar Daylen Yang (daylen) Deshawn Mohan-Smith (GoldenRare) Dieter Dobbelaere (ddobbelaere) @@ -65,7 +65,6 @@ Eelco de Groot (KingDefender) Elvin Liu (solarlight2) erbsenzaehler Ernesto Gatti -Linmiao Xu (linrock) Fabian Beuke (madnight) Fabian Fichter (ianfab) Fanael Linithien (Fanael) @@ -78,32 +77,35 @@ George Sobala (gsobala) gguliash Giacomo Lorenzetti (G-Lorenz) Gian-Carlo Pascutto (gcp) +Goh CJ (cj5716) Gontran Lemaire (gonlem) Goodkov Vasiliy Aleksandrovich (goodkov) Gregor Cramer GuardianRM -Günther Demetz (pb00067, pb00068) Guy Vreuls (gvreuls) +Günther Demetz (pb00067, pb00068) Henri Wiechers Hiraoka Takuya (HiraokaTakuya) homoSapiensSapiens Hongzhi Cheng Ivan Ivec (IIvec) Jacques B. (Timshel) +Jake Senne (w1wwwwww) Jan Ondruš (hxim) Jared Kish (Kurtbusch, kurt22i) Jarrod Torriero (DU-jdto) -Jean Gauthier (OuaisBla) Jean-Francois Romang (jromang) +Jean Gauthier (OuaisBla) Jekaa Jerry Donald Watson (jerrydonaldwatson) jjoshua2 -Jonathan Calovski (Mysseno) Jonathan Buladas Dumale (SFisGOD) +Jonathan Calovski (Mysseno) +Jonathan McDermid (jonathanmcdermid) Joost VandeVondele (vondele) -Jörg Oster (joergoster) Joseph Ellis (jhellis3) Joseph R. Prostko +Jörg Oster (joergoster) Julian Willemer (NightlyKing) jundery Justin Blanchard (UncombedCoconut) @@ -117,6 +119,7 @@ Krystian Kuzniarek (kuzkry) Leonardo Ljubičić (ICCF World Champion) Leonid Pechenik (lp--) Liam Keegan (lkeegan) +Linmiao Xu (linrock) Linus Arver (listx) loco-loco Lub van den Berg (ElbertoOne) @@ -131,6 +134,7 @@ Matt Ginsberg (mattginsberg) Matthew Lai (matthewlai) Matthew Sullivan (Matt14916) Max A. (Disservin) +Maxim Masiutin (maximmasiutin) Maxim Molchanov (Maxim) Michael An (man) Michael Byrne (MichaelB7) @@ -145,16 +149,18 @@ Mira Miroslav Fontán (Hexik) Moez Jellouli (MJZ1977) Mohammed Li (tthsqe12) +Muzhen J (XInTheDark) Nathan Rugg (nmrugg) -Nick Pelling (nickpelling) +Nguyen Pham (nguyenpham) Nicklas Persson (NicklasPersson) +Nick Pelling (nickpelling) Niklas Fiekas (niklasf) Nikolay Kostov (NikolayIT) -Nguyen Pham (nguyenpham) Norman Schmidt (FireFather) notruck Ofek Shochat (OfekShochat, ghostway) Ondrej Mosnáček (WOnder93) +Ondřej Mišina (AndrovT) Oskar Werkelin Ahlin Pablo Vazquez Panthee @@ -163,7 +169,9 @@ Pasquale Pigazzini (ppigazzini) Patrick Jansen (mibere) Peter Schneider (pschneider1968) Peter Zsifkovits (CoffeeOne) +PikaCat Praveen Kumar Tummala (praveentml) +Prokop Randáček (ProkopRandacek) Rahul Dsilva (silversolver1) Ralph Stößer (Ralph Stoesser) Raminder Singh @@ -172,8 +180,8 @@ Reuven Peleg (R-Peleg) Richard Lloyd (Richard-Lloyd) Rodrigo Exterckötter Tjäder Rodrigo Roim (roim) -Ron Britvich (Britvich) Ronald de Man (syzygy1, syzygy) +Ron Britvich (Britvich) rqs Rui Coelho (ruicoelhopedro) Ryan Schmitt @@ -184,21 +192,22 @@ Sergei Antonov (saproj) Sergei Ivanov (svivanov72) Sergio Vieri (sergiovieri) sf-x +Shahin M. Shahin (peregrine) Shane Booth (shane31) Shawn Varghese (xXH4CKST3RXx) Siad Daboul (Topologist) Stefan Geschwentner (locutus2) Stefano Cardanobile (Stefano80) +Stefano Di Martino (StefanoD) Steinar Gunderson (sesse) Stéphane Nicolet (snicolet) Syine Mineta (MinetaS) -Prokop Randáček (ProkopRandacek) Thanar2 thaspel theo77186 +Tomasz Sobczyk (Sopel97) Tom Truscott Tom Vijlbrief (tomtor) -Tomasz Sobczyk (Sopel97) Torsten Franz (torfranz, tfranzer) Torsten Hellwig (Torom) Tracey Emery (basepr1me) @@ -206,11 +215,12 @@ tttak Unai Corzo (unaiic) Uri Blass (uriblass) Vince Negri (cuddlestmonkey) +Viren +windfishballad xefoci7612 zz4032 - # Additionally, we acknowledge the authors and maintainers of fishtest, -# an amazing and essential framework for the development of Stockfish! +# an amazing and essential framework for Stockfish development! # # https://github.com/glinscott/fishtest/blob/master/AUTHORS diff --git a/CITATION.cff b/CITATION.cff new file mode 100644 index 00000000..bc0889a8 --- /dev/null +++ b/CITATION.cff @@ -0,0 +1,23 @@ +# This CITATION.cff file was generated with cffinit. +# Visit https://bit.ly/cffinit to generate yours today! + +cff-version: 1.2.0 +title: Stockfish +message: >- + Please cite this software using the metadata from this + file. +type: software +authors: + - name: The Stockfish developers (see AUTHORS file) +repository-code: 'https://github.com/official-stockfish/Stockfish' +url: 'https://stockfishchess.org/' +repository-artifact: 'https://stockfishchess.org/download/' +abstract: Stockfish is a free and strong UCI chess engine. +keywords: + - chess + - artificial intelligence (AI) + - tree search + - alpha-beta search + - neural networks (NN) + - efficiently updatable neural networks (NNUE) +license: GPL-3.0 diff --git a/README.md b/README.md index 06c452d9..3d04fb04 100644 --- a/README.md +++ b/README.md @@ -2,6 +2,21 @@ [![Stockfish][stockfish128-logo]][website-link] +

Stockfish

+ + A free and strong UCI chess engine. +
+ [Explore Stockfish docs »][wiki-link] +
+
+ [Report bug][issue-link] + · + [Open a discussion][discussions-link] + · + [Discord][discord-link] + · + [Blog][website-blog-link] + [![Build][build-badge]][build-link] [![License][license-badge]][license-link]
@@ -16,19 +31,16 @@ ## Overview -[Stockfish][website-link] is a free, powerful UCI chess engine derived from -Glaurung 2.1. Stockfish is not a complete chess program and requires a UCI-compatible -graphical user interface (GUI) (e.g. XBoard with PolyGlot, Scid, Cute Chess, eboard, -Arena, Sigma Chess, Shredder, Chess Partner or Fritz) in order to be used comfortably. -Read the documentation for your GUI of choice for information about how to use +[Stockfish][website-link] is a **free and strong UCI chess engine** derived from +Glaurung 2.1 that analyzes chess positions and computes the optimal moves. + +Stockfish **does not include a graphical user interface** (GUI) that is required +to display a chessboard and to make it easy to input moves. These GUIs are +developed independently from Stockfish and are available online. **Read the +documentation for your GUI** of choice for information about how to use Stockfish with it. -The Stockfish engine features two evaluation functions for chess. The efficiently -updatable neural network (NNUE) based evaluation is the default and by far the strongest. -The classical evaluation based on handcrafted terms remains available. The strongest -network is integrated in the binary and downloaded automatically during the build process. -The NNUE evaluation benefits from the vector intrinsics available on most CPUs (sse2, -avx2, neon, or similar). +See also the Stockfish [documentation][wiki-usage-link] for further usage help. ## Files @@ -36,210 +48,47 @@ This distribution of Stockfish consists of the following files: * [README.md][readme-link], the file you are currently reading. - * [Copying.txt][license-link], a text file containing the GNU General Public License - version 3. + * [Copying.txt][license-link], a text file containing the GNU General Public + License version 3. * [AUTHORS][authors-link], a text file with the list of authors for the project. - * [src][src-link], a subdirectory containing the full source code, including a Makefile - that can be used to compile Stockfish on Unix-like systems. + * [src][src-link], a subdirectory containing the full source code, including a + Makefile that can be used to compile Stockfish on Unix-like systems. - * a file with the .nnue extension, storing the neural network for the NNUE evaluation. - Binary distributions will have this file embedded. + * a file with the .nnue extension, storing the neural network for the NNUE + evaluation. Binary distributions will have this file embedded. -## The UCI protocol and available options +## The UCI protocol -The Universal Chess Interface (UCI) is a standard protocol used to communicate with -a chess engine, and is the recommended way to do so for typical graphical user interfaces -(GUI) or chess tools. Stockfish implements the majority of its options as described -in [the UCI protocol][uci-link]. +The [Universal Chess Interface][uci-link] (UCI) is a standard text-based protocol +used to communicate with a chess engine and is the recommended way to do so for +typical graphical user interfaces (GUI) or chess tools. Stockfish implements the +majority of its options. -Developers can see the default values for UCI options available in Stockfish by typing -`./stockfish uci` in a terminal, but the majority of users will typically see them and -change them via a chess GUI. This is a list of available UCI options in Stockfish: +Developers can see the default values for the UCI options available in Stockfish +by typing `./stockfish uci` in a terminal, but most users should typically use a +chess GUI to interact with Stockfish. - * #### Threads - The number of CPU threads used for searching a position. For best performance, set - this equal to the number of CPU cores available. +For more information on UCI or debug commands, see our [documentation][wiki-commands-link]. - * #### Hash - The size of the hash table in MB. It is recommended to set Hash after setting Threads. +## Compiling Stockfish - * #### Clear Hash - Clear the hash table. +Stockfish has support for 32 or 64-bit CPUs, certain hardware instructions, +big-endian machines such as Power PC, and other platforms. - * #### Ponder - Let Stockfish ponder its next move while the opponent is thinking. +On Unix-like systems, it should be easy to compile Stockfish directly from the +source code with the included Makefile in the folder `src`. In general, it is +recommended to run `make help` to see a list of make targets with corresponding +descriptions. - * #### MultiPV - Output the N best lines (principal variations, PVs) when searching. - Leave at 1 for best performance. - - * #### Use NNUE - Toggle between the NNUE and classical evaluation functions. If set to "true", - the network parameters must be available to load from file (see also EvalFile), - if they are not embedded in the binary. - - * #### EvalFile - The name of the file of the NNUE evaluation parameters. Depending on the GUI the - filename might have to include the full path to the folder/directory that contains - the file. Other locations, such as the directory that contains the binary and the - working directory, are also searched. - - * #### UCI_AnalyseMode - An option handled by your GUI. - - * #### UCI_Chess960 - An option handled by your GUI. If true, Stockfish will play Chess960. - - * #### UCI_ShowWDL - If enabled, show approximate WDL statistics as part of the engine output. - These WDL numbers model expected game outcomes for a given evaluation and - game ply for engine self-play at fishtest LTC conditions (60+0.6s per game). - - * #### UCI_LimitStrength - Enable weaker play aiming for an Elo rating as set by UCI_Elo. This option overrides Skill Level. - - * #### UCI_Elo - If enabled by UCI_LimitStrength, aim for an engine strength of the given Elo. - This Elo rating has been calibrated at a time control of 60s+0.6s and anchored to CCRL 40/4. - - * #### Skill Level - Lower the Skill Level in order to make Stockfish play weaker (see also UCI_LimitStrength). - Internally, MultiPV is enabled, and with a certain probability depending on the Skill Level a - weaker move will be played. - - * #### SyzygyPath - Path to the folders/directories storing the Syzygy tablebase files. Multiple - directories are to be separated by ";" on Windows and by ":" on Unix-based - operating systems. Do not use spaces around the ";" or ":". - - Example: `C:\tablebases\wdl345;C:\tablebases\wdl6;D:\tablebases\dtz345;D:\tablebases\dtz6` - - It is recommended to store .rtbw files on an SSD. There is no loss in storing - the .rtbz files on a regular HDD. It is recommended to verify all md5 checksums - of the downloaded tablebase files (`md5sum -c checksum.md5`) as corruption will - lead to engine crashes. - - * #### SyzygyProbeDepth - Minimum remaining search depth for which a position is probed. Set this option - to a higher value to probe less aggressively if you experience too much slowdown - (in terms of nps) due to tablebase probing. - - * #### Syzygy50MoveRule - Disable to let fifty-move rule draws detected by Syzygy tablebase probes count - as wins or losses. This is useful for ICCF correspondence games. - - * #### SyzygyProbeLimit - Limit Syzygy tablebase probing to positions with at most this many pieces left - (including kings and pawns). - - * #### Move Overhead - Assume a time delay of x ms due to network and GUI overheads. This is useful to - avoid losses on time in those cases. - - * #### Slow Mover - Lower values will make Stockfish take less time in games, higher values will - make it think longer. - - * #### nodestime - Tells the engine to use nodes searched instead of wall time to account for - elapsed time. Useful for engine testing. - - * #### Debug Log File - Write all communication to and from the engine into a text file. - -For developers the following non-standard commands might be of interest, mainly useful for debugging: - - * #### bench *ttSize threads limit fenFile limitType evalType* - Performs a standard benchmark using various options. The signature of a version - (standard node count) is obtained using all defaults. `bench` is currently - `bench 16 1 13 default depth mixed`. - - * #### compiler - Give information about the compiler and environment used for building a binary. - - * #### d - Display the current position, with ascii art and fen. - - * #### eval - Return the evaluation of the current position. - - * #### export_net [filename] - Exports the currently loaded network to a file. - If the currently loaded network is the embedded network and the filename - is not specified then the network is saved to the file matching the name - of the embedded network, as defined in evaluate.h. - If the currently loaded network is not the embedded network (some net set - through the UCI setoption) then the filename parameter is required and the - network is saved into that file. - - * #### flip - Flips the side to move. - - -## A note on classical evaluation versus NNUE evaluation - -Both approaches assign a value to a position that is used in alpha-beta (PVS) search -to find the best move. The classical evaluation computes this value as a function -of various chess concepts, handcrafted by experts, tested and tuned using fishtest. -The NNUE evaluation computes this value with a neural network based on basic -inputs (e.g. piece positions only). The network is optimized and trained -on the evaluations of millions of positions at moderate search depth. - -The NNUE evaluation was first introduced in shogi, and ported to Stockfish afterward. -It can be evaluated efficiently on CPUs, and exploits the fact that only parts -of the neural network need to be updated after a typical chess move. -[The nodchip repository][nodchip-link] provided the first version of the needed tools -to train and develop the NNUE networks. Today, more advanced training tools are -available in [the nnue-pytorch repository][pytorch-link], while data generation tools -are available in [a dedicated branch][tools-link]. - -On CPUs supporting modern vector instructions (avx2 and similar), the NNUE evaluation -results in much stronger playing strength, even if the nodes per second computed by -the engine is somewhat lower (roughly 80% of nps is typical). - -Notes: - -1) the NNUE evaluation depends on the Stockfish binary and the network parameter file -(see the EvalFile UCI option). Not every parameter file is compatible with a given -Stockfish binary, but the default value of the EvalFile UCI option is the name of a -network that is guaranteed to be compatible with that binary. - -2) to use the NNUE evaluation, the additional data file with neural network parameters -needs to be available. Normally, this file is already embedded in the binary or it can -be downloaded. The filename for the default (recommended) net can be found as the default -value of the `EvalFile` UCI option, with the format `nn-[SHA256 first 12 digits].nnue` -(for instance, `nn-c157e0a5755b.nnue`). This file can be downloaded from ``` -https://tests.stockfishchess.org/api/nn/[filename] +cd src +make -j build ARCH=x86-64-modern ``` -replacing `[filename]` as needed. -## What to expect from the Syzygy tablebases? - -If the engine is searching a position that is not in the tablebases (e.g. -a position with 8 pieces), it will access the tablebases during the search. -If the engine reports a very large score (typically 153.xx), this means -it has found a winning line into a tablebase position. - -If the engine is given a position to search that is in the tablebases, it -will use the tablebases at the beginning of the search to preselect all -good moves, i.e. all moves that preserve the win or preserve the draw while -taking into account the 50-move rule. -It will then perform a search only on those moves. **The engine will not move -immediately**, unless there is only a single good move. **The engine likely -will not report a mate score, even if the position is known to be won.** - -It is therefore clear that this behaviour is not identical to what one might -be used to with Nalimov tablebases. There are technical reasons for this -difference, the main technical reason being that Nalimov tablebases use the -DTM metric (distance-to-mate), while the Syzygy tablebases use a variation of the -DTZ metric (distance-to-zero, zero meaning any move that resets the 50-move -counter). This special metric is one of the reasons that the Syzygy tablebases are -more compact than Nalimov tablebases, while still storing all information -needed for optimal play and in addition being able to take into account -the 50-move rule. +Detailed compilation instructions for all platforms can be found in our +[documentation][wiki-compile-link]. ## Stockfish on distributed memory systems @@ -266,137 +115,74 @@ implementation that efficiently supports this. Some MPI implentations might bene from leaving 1 core/thread free for these asynchronous communications, and might require setting additional environment variables. ```mpirun``` should forward stdin/stdout to ```rank 0``` only (e.g. ```srun --input=0 --output=0```). - Refer to your MPI documentation for more info. +Refer to your MPI documentation for more info. -## Large Pages - -Stockfish supports large pages on Linux and Windows. Large pages make -the hash access more efficient, improving the engine speed, especially -on large hash sizes. Typical increases are 5..10% in terms of nodes per -second, but speed increases up to 30% have been measured. The support is -automatic. Stockfish attempts to use large pages when available and -will fall back to regular memory allocation when this is not the case. - -### Support on Linux - -Large page support on Linux is obtained by the Linux kernel -transparent huge pages functionality. Typically, transparent huge pages -are already enabled, and no configuration is needed. - -### Support on Windows - -The use of large pages requires "Lock Pages in Memory" privilege. See -[Enable the Lock Pages in Memory Option (Windows)][lockpages-link] -on how to enable this privilege, then run [RAMMap][rammap-link] -to double-check that large pages are used. We suggest that you reboot -your computer after you have enabled large pages, because long Windows -sessions suffer from memory fragmentation, which may prevent Stockfish -from getting large pages: a fresh session is better in this regard. - -## Compiling Stockfish yourself from the sources - -Stockfish has support for 32 or 64-bit CPUs, certain hardware -instructions, big-endian machines such as Power PC, and other platforms. - -On Unix-like systems, it should be easy to compile Stockfish -directly from the source code with the included Makefile in the folder -`src`. In general it is recommended to run `make help` to see a list of make -targets with corresponding descriptions. - -``` - cd src - make help - make net - make build ARCH=x86-64-modern -``` - -When not using the Makefile to compile (for instance, with Microsoft MSVC) you -need to manually set/unset some switches in the compiler command line; see -file *types.h* for a quick reference. - -When reporting an issue or a bug, please tell us which Stockfish version -and which compiler you used to create your executable. This information -can be found by typing the following command in a console: - -``` - ./stockfish compiler -``` - -## Understanding the code base and participating in the project - -Stockfish's improvement over the last decade has been a great community -effort. There are a few ways to help contribute to its growth. +## Contributing ### Donating hardware -Improving Stockfish requires a massive amount of testing. You can donate -your hardware resources by installing the [Fishtest Worker][worker-link] -and view the current tests on [Fishtest][fishtest-link]. +Improving Stockfish requires a massive amount of testing. You can donate your +hardware resources by installing the [Fishtest Worker][worker-link] and viewing +the current tests on [Fishtest][fishtest-link]. ### Improving the code -If you want to help improve the code, there are several valuable resources: - -* [In this wiki,][programming-link] many techniques used in +In the [chessprogramming wiki][programming-link], many techniques used in Stockfish are explained with a lot of background information. +The [section on Stockfish][programmingsf-link] describes many features +and techniques used by Stockfish. However, it is generic rather than +focused on Stockfish's precise implementation. -* [The section on Stockfish][programmingsf-link] -describes many features and techniques used by Stockfish. However, it is -generic rather than being focused on Stockfish's precise implementation. -Nevertheless, a helpful resource. - -* The latest source can always be found on [GitHub][github-link]. -Discussions about Stockfish take place these days mainly in the [FishCooking][fishcooking-link] -group and on the [Stockfish Discord channel][discord-link]. The engine testing is done on [Fishtest][fishtest-link]. If you want to help improve Stockfish, please read this [guideline][guideline-link] first, where the basics of Stockfish development are explained. +Discussions about Stockfish take place these days mainly in the Stockfish +[Discord server][discord-link]. This is also the best place to ask questions +about the codebase and how to improve it. ## Terms of use -Stockfish is free, and distributed under the **GNU General Public License version 3** -(GPL v3). Essentially, this means you are free to do almost exactly -what you want with the program, including distributing it among your -friends, making it available for download from your website, selling -it (either by itself or as part of some bigger software package), or -using it as the starting point for a software project of your own. +Stockfish is free and distributed under the +[**GNU General Public License version 3**][license-link] (GPL v3). Essentially, +this means you are free to do almost exactly what you want with the program, +including distributing it among your friends, making it available for download +from your website, selling it (either by itself or as part of some bigger +software package), or using it as the starting point for a software project of +your own. -The only real limitation is that whenever you distribute Stockfish in -some way, you MUST always include the license and the full source code -(or a pointer to where the source code can be found) to generate the -exact binary you are distributing. If you make any changes to the -source code, these changes must also be made available under the GPL v3. - -For full details, read the copy of the GPL v3 found in the file named -[*Copying.txt*][license-link]. +The only real limitation is that whenever you distribute Stockfish in some way, +you MUST always include the license and the full source code (or a pointer to +where the source code can be found) to generate the exact binary you are +distributing. If you make any changes to the source code, these changes must +also be made available under GPL v3. [authors-link]: https://github.com/official-stockfish/Stockfish/blob/master/AUTHORS [build-link]: https://github.com/official-stockfish/Stockfish/actions/workflows/stockfish.yml [commits-link]: https://github.com/official-stockfish/Stockfish/commits/master [discord-link]: https://discord.gg/GWDRS3kU6R -[fishcooking-link]: https://groups.google.com/g/fishcooking +[issue-link]: https://github.com/official-stockfish/Stockfish/issues/new?assignees=&labels=&template=BUG-REPORT.yml +[discussions-link]: https://github.com/official-stockfish/Stockfish/discussions/new [fishtest-link]: https://tests.stockfishchess.org/tests -[github-link]: https://github.com/official-stockfish/Stockfish [guideline-link]: https://github.com/glinscott/fishtest/wiki/Creating-my-first-test [license-link]: https://github.com/official-stockfish/Stockfish/blob/master/Copying.txt -[lockpages-link]: https://docs.microsoft.com/en-us/sql/database-engine/configure-windows/enable-the-lock-pages-in-memory-option-windows -[nodchip-link]: https://github.com/nodchip/Stockfish [programming-link]: https://www.chessprogramming.org/Main_Page [programmingsf-link]: https://www.chessprogramming.org/Stockfish -[pytorch-link]: https://github.com/glinscott/nnue-pytorch -[rammap-link]: https://docs.microsoft.com/en-us/sysinternals/downloads/rammap [readme-link]: https://github.com/official-stockfish/Stockfish/blob/master/README.md [release-link]: https://github.com/official-stockfish/Stockfish/releases/latest [src-link]: https://github.com/official-stockfish/Stockfish/tree/master/src [stockfish128-logo]: https://stockfishchess.org/images/logo/icon_128x128.png -[tools-link]: https://github.com/official-stockfish/Stockfish/tree/tools -[uci-link]: https://www.shredderchess.com/download/div/uci.zip +[uci-link]: https://backscattering.de/chess/uci/ [website-link]: https://stockfishchess.org -[worker-link]: https://github.com/glinscott/fishtest/wiki/Running-the-worker:-overview +[website-blog-link]: https://stockfishchess.org/blog/ +[wiki-link]: https://github.com/official-stockfish/Stockfish/wiki +[wiki-usage-link]: https://github.com/official-stockfish/Stockfish/wiki/Download-and-usage +[wiki-compile-link]: https://github.com/official-stockfish/Stockfish/wiki/Compiling-from-source +[wiki-commands-link]: https://github.com/official-stockfish/Stockfish/wiki/Commands +[worker-link]: https://github.com/glinscott/fishtest/wiki/Running-the-worker -[build-badge]: https://img.shields.io/github/workflow/status/official-stockfish/Stockfish/Stockfish?style=for-the-badge&label=stockfish&logo=github +[build-badge]: https://img.shields.io/github/actions/workflow/status/official-stockfish/Stockfish/stockfish.yml?branch=master&style=for-the-badge&label=stockfish&logo=github [commits-badge]: https://img.shields.io/github/commits-since/official-stockfish/Stockfish/latest?style=for-the-badge [discord-badge]: https://img.shields.io/discord/435943710472011776?style=for-the-badge&label=discord&logo=Discord [fishtest-badge]: https://img.shields.io/website?style=for-the-badge&down_color=red&down_message=Offline&label=Fishtest&up_color=success&up_message=Online&url=https%3A%2F%2Ftests.stockfishchess.org%2Ftests%2Ffinished diff --git a/Top CPU Contributors.txt b/Top CPU Contributors.txt index 30c963d7..74c471b7 100644 --- a/Top CPU Contributors.txt +++ b/Top CPU Contributors.txt @@ -1,160 +1,167 @@ -Contributors to Fishtest with >10,000 CPU hours, as of 2022-11-19. +Contributors to Fishtest with >10,000 CPU hours, as of 2023-06-20. Thank you! Username CPU Hours Games played ------------------------------------------------------------------ -noobpwnftw 36475307 2748033975 -technologov 14570711 760073590 +noobpwnftw 37457426 2850540907 +technologov 14135647 742892808 +linrock 4423514 303254809 mlang 3026000 200065824 -dew 1689222 100034318 -grandphish2 1442171 86798057 -okrout 1439985 133471766 -pemo 1405374 44189811 -linrock 1299003 28382783 -TueRens 1163420 71159522 -JojoM 897158 55177114 +dew 1689162 100033738 +okrout 1578136 148855886 +pemo 1508508 48814305 +grandphish2 1461406 91540343 +TueRens 1194790 70400852 +JojoM 947612 61773190 tvijlbrief 796125 51897690 +sebastronomy 742434 38218524 mibere 703840 46867607 -gvreuls 635982 40652394 -oz 590763 41201352 -sebastronomy 581517 23307132 -cw 517915 34865769 -fastgm 504266 30264740 -CSU_Dynasty 479901 31846710 -ctoks 433503 28180725 +gvreuls 651026 42988582 +oz 543438 39314736 +cw 517858 34869755 +fastgm 503862 30260818 +leszek 467278 33514883 +CSU_Dynasty 464940 31177118 +ctoks 434416 28506889 crunchy 427035 27344275 -leszek 416883 27493447 -bcross 409982 28062127 -velislav 345954 22232274 +maximmasiutin 424795 26577722 +bcross 415722 29060963 +olafm 395922 32268020 +rpngn 348378 24560289 +velislav 342567 22138992 Fisherman 327231 21829379 +mgrabiak 300612 20608380 Dantist 296386 18031762 -mgrabiak 288928 18869896 -rpngn 259965 16281463 -robal 237653 15148350 -ncfish1 231764 15275003 -nordlandia 226923 14624832 +nordlandia 246201 16189678 +robal 241300 15656382 +marrco 234581 17714473 +ncfish1 227517 15233777 glinscott 208125 13277240 drabel 204167 13930674 mhoram 202894 12601997 bking_US 198894 11876016 -thirdlife 198844 5453268 Thanar 179852 12365359 vdv 175544 9904472 -armo9494 168201 11136452 spams 157128 10319326 -marrco 151599 9551115 sqrt2 147963 9724586 -vdbergh 137690 8971569 +DesolatedDodo 146350 9536172 +Calis007 143165 9478764 +vdbergh 138650 9064413 CoffeeOne 137100 5024116 +armo9494 136191 9460264 malala 136182 8002293 -DesolatedDodo 135276 8657464 xoto 133759 9159372 davar 129023 8376525 +DMBK 122960 8980062 dsmith 122059 7570238 amicic 119661 7938029 Data 113305 8220352 BrunoBanani 112960 7436849 CypressChess 108331 7759788 -skiminki 106518 7062598 +skiminki 107583 7218170 +jcAEie 105675 8238962 MaZePallas 102823 6633619 sterni1971 100532 5880772 sunu 100167 7040199 zeryl 99331 6221261 +thirdlife 99124 2242380 ElbertoOne 99028 7023771 -DMBK 97572 6950312 -Calis007 96779 5611552 -cuistot 93111 5536500 +cuistot 98853 6069816 +bigpen0r 94809 6529203 brabos 92118 6186135 -Wolfgang 91769 5720158 +Wolfgang 91939 6105872 psk 89957 5984901 +sschnee 88235 5268000 racerschmacer 85805 6122790 -jcAEie 85527 5630616 +Fifis 85722 5709729 +Dubslow 84986 6042456 Vizvezdenec 83761 5344740 -sschnee 83557 4853690 0x3C33 82614 5271253 BRAVONE 81239 5054681 -Dubslow 78461 5042980 nssy 76497 5259388 jromang 76106 5236025 teddybaer 75125 5407666 -yurikvelo 73933 5031096 -tolkki963 73885 4721430 +tolkki963 74762 5149662 +megaman7de 74351 4940352 +Wencey 74181 4711488 Pking_cda 73776 5293873 -Bobo1239 71675 4860987 +yurikvelo 73150 5004382 +markkulix 72607 5304642 +Bobo1239 70579 4794999 solarlight 70517 5028306 dv8silencer 70287 3883992 -Gelma 69304 3980932 manap 66273 4121774 -megaman7de 65419 4120200 -markkulix 65331 4114860 -bigpen0r 64932 4683883 tinker 64333 4268790 qurashee 61208 3429862 -AGI 58325 4258646 +Mineta 59357 4418202 +Spprtr 58723 3911011 +AGI 58147 4325994 robnjr 57262 4053117 Freja 56938 3733019 MaxKlaxxMiner 56879 3423958 +MarcusTullius 56746 3762951 ttruscott 56010 3680085 rkl 55132 4164467 renouve 53811 3501516 -Spprtr 52736 3410019 +javran 53785 4627608 finfish 51360 3370515 eva42 51272 3599691 eastorwest 51117 3454811 rap 49985 3219146 -unixwizard 49734 2536230 -pb00067 49727 3298270 +pb00067 49733 3298934 +OuaisBla 48626 3445134 ronaldjerum 47654 3240695 biffhero 46564 3111352 -GPUex 45861 2926502 -Fifis 45843 3088497 -oryx 45578 3493978 VoyagerOne 45476 3452465 -Wencey 44943 2654490 +jmdana 44893 3065205 +maposora 44597 4039578 +oryx 44570 3454238 speedycpu 43842 3003273 jbwiebe 43305 2805433 +GPUex 42378 3133332 Antihistamine 41788 2761312 mhunt 41735 2691355 -olafm 41277 3284344 homyur 39893 2850481 gri 39871 2515779 -MarcusTullius 38303 2251097 Garf 37741 2999686 -kdave 37424 2557406 SC 37299 2731694 csnodgrass 36207 2688994 -jmdana 36157 2210661 strelock 34716 2074055 +szupaw 34102 2880346 EthanOConnor 33370 2090311 slakovv 32915 2021889 -gopeto 31669 2060958 +Gelma 31771 1551204 +gopeto 31671 2060990 +kdave 31157 2198362 manapbk 30987 1810399 Prcuvu 30377 2170122 anst 30301 2190091 jkiiski 30136 1904470 -spcc 30135 1903728 +spcc 29925 1901692 hyperbolic.tom 29840 2017394 -xwziegtm 29763 2347412 chuckstablers 29659 2093438 Pyafue 29650 1902349 belzedar94 28846 1811530 -OuaisBla 27636 1578800 chriswk 26902 1868317 +xwziegtm 26897 2124586 achambord 26582 1767323 Patrick_G 26276 1801617 yorkman 26193 1992080 -Ulysses 25289 1674274 +Ulysses 25288 1689730 SFTUser 25182 1675689 nabildanial 24942 1519409 Sharaf_DG 24765 1786697 +Maxim 24705 1502062 rodneyc 24376 1416402 agg177 23890 1395014 -Ente 23747 1674582 -Karpovbot 23629 1313186 +Goatminola 23763 1956036 +Ente 23639 1671638 +Jopo12321 23467 1483172 JanErik 23408 1703875 Isidor 23388 1680691 Norabor 23371 1603244 -cisco2015 22934 1763773 +cisco2015 22920 1763301 +jsys14 22824 1591906 Zirie 22542 1472937 team-oh 22272 1636708 Roady 22220 1465606 @@ -165,96 +172,96 @@ xor12 21628 1680365 dex 21612 1467203 nesoneg 21494 1463031 user213718 21454 1404128 -AndreasKrug 21227 1577833 sphinx 21211 1384728 +AndreasKrug 21097 1634811 jjoshua2 21001 1423089 +Zake9298 20938 1565848 horst.prack 20878 1465656 -jsys14 20729 1221010 0xB00B1ES 20590 1208666 j3corre 20405 941444 Adrian.Schmidt123 20316 1281436 -bonsi 20022 1300682 wei 19973 1745989 -dapper 19754 1167758 -Zake9298 19745 1458416 +notchris 19958 1800128 +Serpensin 19840 1697528 +Gaster319 19712 1677310 fishtester 19617 1257388 rstoesser 19569 1293588 eudhan 19274 1283717 +votoanthuan 19108 1609992 vulcan 18871 1729392 -Jopo12321 18803 1036284 +Karpovbot 18766 1053178 +qoo_charly_cai 18543 1284937 jundery 18445 1115855 ville 17883 1384026 -5t0ckf15hTr4in3r 17809 1105858 chris 17698 1487385 -dju 17697 994333 purplefishies 17595 1092533 +dju 17414 981289 iisiraider 17275 1049015 DragonLord 17014 1162790 -Karby 16457 1010138 -Goatminola 16278 1145026 +redstone59 16842 1461780 +Alb11747 16787 1213926 IgorLeMasson 16064 1147232 -Gaster319 16056 1109070 -redstone59 15953 1161664 +Karby 15982 979610 scuzzi 15757 968735 ako027ako 15671 1173203 Nikolay.IT 15154 1068349 Andrew Grant 15114 895539 Naven94 15054 834762 OssumOpossum 14857 1007129 -qoo_charly_cai 14490 847865 +ZacHFX 14783 1021842 enedene 14476 905279 -szupaw 14252 929130 bpfliegel 14233 882523 mpx86 14019 759568 jpulman 13982 870599 +Skiff84 13826 721996 crocogoat 13803 1117422 Nesa92 13786 1114691 joster 13710 946160 mbeier 13650 1044928 Hjax 13535 915487 +Nullvalue 13468 1140498 Dark_wizzie 13422 1007152 Rudolphous 13244 883140 +pirt 13100 1009897 Machariel 13010 863104 infinigon 12991 943216 -pirt 12925 985437 -Skiff84 12923 649994 mabichito 12903 749391 thijsk 12886 722107 AdrianSA 12860 804972 Flopzee 12698 894821 +korposzczur 12606 838168 fatmurphy 12547 853210 -woutboat 12419 836696 SapphireBrand 12416 969604 -Oakwen 12406 840961 +Oakwen 12399 844109 deflectooor 12386 579392 modolief 12386 896470 Farseer 12249 694108 +Jackfish 12213 805008 pgontarz 12151 848794 +dbernier 12103 860824 +getraideBFF 12072 1024966 stocky 11954 699440 mschmidt 11941 803401 -MooTheCow 11871 773654 -Jackfish 11867 773550 -dbernier 11705 821780 +MooTheCow 11870 773598 +FormazChar 11766 885707 whelanh 11557 245188 -Maxim 11543 836024 -Nullvalue 11534 731410 -icewulf 11528 650470 -FormazChar 11523 861599 +3cho 11494 1031076 infinity 11470 727027 aga 11412 695127 torbjo 11395 729145 Thomas A. Anderson 11372 732094 savage84 11358 670860 -ali-al-zhrani 11272 781310 d64 11263 789184 -Bourbaki 11108 709144 +ali-al-zhrani 11245 779246 snicolet 11106 869170 -Alb11747 10855 696920 +dapper 11032 771402 +ols 10947 624903 +Karmatron 10828 677458 basepi 10637 744851 Cubox 10621 826448 -Karmatron 10616 674818 michaelrpg 10509 739239 OIVAS7572 10420 995586 -Garruk 10348 704905 +jojo2357 10419 929708 +WoodMan777 10380 873720 +Garruk 10365 706465 dzjp 10343 732529 -ols 10259 570669 diff --git a/src/Makefile b/src/Makefile index 530ea83f..fd29f915 100644 --- a/src/Makefile +++ b/src/Makefile @@ -1,5 +1,5 @@ # Stockfish, a UCI chess playing engine derived from Glaurung 2.1 -# Copyright (C) 2004-2022 The Stockfish developers (see AUTHORS file) +# Copyright (C) 2004-2023 The Stockfish developers (see AUTHORS file) # # Stockfish is free software: you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by @@ -69,33 +69,34 @@ VPATH = syzygy:nnue:nnue/features ### Section 2. High-level Configuration ### ========================================================================== # -# flag --- Comp switch --- Description +# flag --- Comp switch --- Description # ---------------------------------------------------------------------------- # -# debug = yes/no --- -DNDEBUG --- Enable/Disable debug mode +# debug = yes/no --- -DNDEBUG --- Enable/Disable debug mode # sanitize = none/ ... (-fsanitize ) -# --- ( undefined ) --- enable undefined behavior checks -# --- ( thread ) --- enable threading error checks -# --- ( address ) --- enable memory access checks -# --- ...etc... --- see compiler documentation for supported sanitizers -# optimize = yes/no --- (-O3/-fast etc.) --- Enable/Disable optimizations -# arch = (name) --- (-arch) --- Target architecture -# bits = 64/32 --- -DIS_64BIT --- 64-/32-bit operating system -# prefetch = yes/no --- -DUSE_PREFETCH --- Use prefetch asm-instruction -# popcnt = yes/no --- -DUSE_POPCNT --- Use popcnt asm-instruction -# pext = yes/no --- -DUSE_PEXT --- Use pext x86_64 asm-instruction -# sse = yes/no --- -msse --- Use Intel Streaming SIMD Extensions -# mmx = yes/no --- -mmmx --- Use Intel MMX instructions -# sse2 = yes/no --- -msse2 --- Use Intel Streaming SIMD Extensions 2 -# ssse3 = yes/no --- -mssse3 --- Use Intel Supplemental Streaming SIMD Extensions 3 -# sse41 = yes/no --- -msse4.1 --- Use Intel Streaming SIMD Extensions 4.1 -# avx2 = yes/no --- -mavx2 --- Use Intel Advanced Vector Extensions 2 -# avxvnni = yes/no --- -mavxvnni --- Use Intel Vector Neural Network Instructions AVX -# avx512 = yes/no --- -mavx512bw --- Use Intel Advanced Vector Extensions 512 -# vnni256 = yes/no --- -mavx512vnni --- Use Intel Vector Neural Network Instructions 256 -# vnni512 = yes/no --- -mavx512vnni --- Use Intel Vector Neural Network Instructions 512 -# neon = yes/no --- -DUSE_NEON --- Use ARM SIMD architecture +# --- ( undefined ) --- enable undefined behavior checks +# --- ( thread ) --- enable threading error checks +# --- ( address ) --- enable memory access checks +# --- ...etc... --- see compiler documentation for supported sanitizers +# optimize = yes/no --- (-O3/-fast etc.) --- Enable/Disable optimizations +# arch = (name) --- (-arch) --- Target architecture +# bits = 64/32 --- -DIS_64BIT --- 64-/32-bit operating system +# prefetch = yes/no --- -DUSE_PREFETCH --- Use prefetch asm-instruction +# popcnt = yes/no --- -DUSE_POPCNT --- Use popcnt asm-instruction +# pext = yes/no --- -DUSE_PEXT --- Use pext x86_64 asm-instruction +# sse = yes/no --- -msse --- Use Intel Streaming SIMD Extensions +# mmx = yes/no --- -mmmx --- Use Intel MMX instructions +# sse2 = yes/no --- -msse2 --- Use Intel Streaming SIMD Extensions 2 +# ssse3 = yes/no --- -mssse3 --- Use Intel Supplemental Streaming SIMD Extensions 3 +# sse41 = yes/no --- -msse4.1 --- Use Intel Streaming SIMD Extensions 4.1 +# avx2 = yes/no --- -mavx2 --- Use Intel Advanced Vector Extensions 2 +# avxvnni = yes/no --- -mavxvnni --- Use Intel Vector Neural Network Instructions AVX +# avx512 = yes/no --- -mavx512bw --- Use Intel Advanced Vector Extensions 512 +# vnni256 = yes/no --- -mavx256vnni --- Use Intel Vector Neural Network Instructions 512 with 256bit operands +# vnni512 = yes/no --- -mavx512vnni --- Use Intel Vector Neural Network Instructions 512 +# neon = yes/no --- -DUSE_NEON --- Use ARM SIMD architecture # mpi = yes/no --- -DUSE_MPI --- Use Message Passing Interface +# dotprod = yes/no --- -DUSE_NEON_DOTPROD --- Use ARM advanced SIMD Int8 dot product instructions # # Note that Makefile is space sensitive, so when adding new architectures # or modifying existing flags, you have to make sure there are no extra spaces @@ -117,7 +118,7 @@ ifeq ($(ARCH), $(filter $(ARCH), \ x86-64-vnni512 x86-64-vnni256 x86-64-avx512 x86-64-avxvnni x86-64-bmi2 \ x86-64-avx2 x86-64-sse41-popcnt x86-64-modern x86-64-ssse3 x86-64-sse3-popcnt \ x86-64 x86-32-sse41-popcnt x86-32-sse2 x86-32 ppc-64 ppc-32 e2k \ - armv7 armv7-neon armv8 apple-silicon general-64 general-32 riscv64)) + armv7 armv7-neon armv8 armv8-dotprod apple-silicon general-64 general-32 riscv64)) SUPPORTED_ARCH=true else SUPPORTED_ARCH=false @@ -142,6 +143,7 @@ vnni256 = no vnni512 = no neon = no mpi = no +dotprod = no arm_version = 0 STRIP = strip @@ -310,11 +312,21 @@ ifeq ($(ARCH),armv8) arm_version = 8 endif +ifeq ($(ARCH),armv8-dotprod) + arch = armv8 + prefetch = yes + popcnt = yes + neon = yes + dotprod = yes + arm_version = 8 +endif + ifeq ($(ARCH),apple-silicon) arch = arm64 prefetch = yes popcnt = yes neon = yes + dotprod = yes arm_version = 8 endif @@ -368,7 +380,7 @@ endif ifeq ($(COMP),gcc) comp=gcc CXX=g++ - CXXFLAGS += -pedantic -Wextra -Wshadow + CXXFLAGS += -pedantic -Wextra -Wshadow -Wmissing-declarations ifeq ($(arch),$(filter $(arch),armv7 armv8 riscv64)) ifeq ($(OS),Android) @@ -412,13 +424,14 @@ ifeq ($(COMP),mingw) CXX=i686-w64-mingw32-c++-posix endif endif - CXXFLAGS += -pedantic -Wextra -Wshadow + CXXFLAGS += -pedantic -Wextra -Wshadow -Wmissing-declarations endif -ifeq ($(COMP),icc) - comp=icc - CXX=icpc - CXXFLAGS += -diag-disable 1476,10120 -Wcheck -Wabi -Wdeprecated -strict-ansi +ifeq ($(COMP),icx) + comp=icx + CXX=icpx + CXXFLAGS += --intel -pedantic -Wextra -Wshadow -Wmissing-prototypes \ + -Wconditional-uninitialized -Wabi -Wdeprecated endif ifeq ($(COMP),clang) @@ -428,7 +441,8 @@ ifeq ($(COMP),clang) CXX=x86_64-w64-mingw32-clang++ endif - CXXFLAGS += -pedantic -Wextra -Wshadow + CXXFLAGS += -pedantic -Wextra -Wshadow -Wmissing-prototypes \ + -Wconditional-uninitialized ifeq ($(filter $(KERNEL),Darwin OpenBSD FreeBSD),) ifeq ($(target_windows),) @@ -488,9 +502,9 @@ ifeq ($(COMP),ndk) LDFLAGS += -static-libstdc++ -pie -lm -latomic endif -ifeq ($(comp),icc) - profile_make = icc-profile-make - profile_use = icc-profile-use +ifeq ($(comp),icx) + profile_make = icx-profile-make + profile_use = icx-profile-use else ifeq ($(comp),clang) profile_make = clang-profile-make profile_use = clang-profile-use @@ -514,7 +528,7 @@ endif ### Sometimes gcc is really clang ifeq ($(COMP),gcc) - gccversion = $(shell $(CXX) --version) + gccversion = $(shell $(CXX) --version 2>/dev/null) gccisclang = $(findstring clang,$(gccversion)) ifneq ($(gccisclang),) profile_make = clang-profile-make @@ -561,7 +575,7 @@ ifeq ($(optimize),yes) endif ifeq ($(KERNEL),Darwin) - ifeq ($(comp),$(filter $(comp),clang icc)) + ifeq ($(comp),$(filter $(comp),clang icx)) CXXFLAGS += -mdynamic-no-pic endif @@ -573,7 +587,10 @@ ifeq ($(optimize),yes) endif ifeq ($(comp),clang) - CXXFLAGS += -fexperimental-new-pass-manager + clangmajorversion = $(shell $(CXX) -dumpversion 2>/dev/null | cut -f1 -d.) + ifeq ($(shell expr $(clangmajorversion) \< 16),1) + CXXFLAGS += -fexperimental-new-pass-manager + endif endif endif @@ -594,8 +611,6 @@ endif ifeq ($(popcnt),yes) ifeq ($(arch),$(filter $(arch),ppc64 armv7 armv8 arm64)) CXXFLAGS += -DUSE_POPCNT - else ifeq ($(comp),icc) - CXXFLAGS += -msse3 -DUSE_POPCNT else CXXFLAGS += -msse3 -mpopcnt -DUSE_POPCNT endif @@ -604,63 +619,63 @@ endif ### 3.6 SIMD architectures ifeq ($(avx2),yes) CXXFLAGS += -DUSE_AVX2 - ifeq ($(comp),$(filter $(comp),gcc clang mingw)) + ifeq ($(comp),$(filter $(comp),gcc clang mingw icx)) CXXFLAGS += -mavx2 -mbmi endif endif ifeq ($(avxvnni),yes) CXXFLAGS += -DUSE_VNNI -DUSE_AVXVNNI - ifeq ($(comp),$(filter $(comp),gcc clang mingw)) + ifeq ($(comp),$(filter $(comp),gcc clang mingw icx)) CXXFLAGS += -mavxvnni endif endif ifeq ($(avx512),yes) CXXFLAGS += -DUSE_AVX512 - ifeq ($(comp),$(filter $(comp),gcc clang mingw)) + ifeq ($(comp),$(filter $(comp),gcc clang mingw icx)) CXXFLAGS += -mavx512f -mavx512bw endif endif ifeq ($(vnni256),yes) CXXFLAGS += -DUSE_VNNI - ifeq ($(comp),$(filter $(comp),gcc clang mingw)) + ifeq ($(comp),$(filter $(comp),gcc clang mingw icx)) CXXFLAGS += -mavx512f -mavx512bw -mavx512vnni -mavx512dq -mavx512vl -mprefer-vector-width=256 endif endif ifeq ($(vnni512),yes) CXXFLAGS += -DUSE_VNNI - ifeq ($(comp),$(filter $(comp),gcc clang mingw)) - CXXFLAGS += -mavx512vnni -mavx512dq -mavx512vl + ifeq ($(comp),$(filter $(comp),gcc clang mingw icx)) + CXXFLAGS += -mavx512f -mavx512bw -mavx512vnni -mavx512dq -mavx512vl -mprefer-vector-width=512 endif endif ifeq ($(sse41),yes) CXXFLAGS += -DUSE_SSE41 - ifeq ($(comp),$(filter $(comp),gcc clang mingw)) + ifeq ($(comp),$(filter $(comp),gcc clang mingw icx)) CXXFLAGS += -msse4.1 endif endif ifeq ($(ssse3),yes) CXXFLAGS += -DUSE_SSSE3 - ifeq ($(comp),$(filter $(comp),gcc clang mingw)) + ifeq ($(comp),$(filter $(comp),gcc clang mingw icx)) CXXFLAGS += -mssse3 endif endif ifeq ($(sse2),yes) CXXFLAGS += -DUSE_SSE2 - ifeq ($(comp),$(filter $(comp),gcc clang mingw)) + ifeq ($(comp),$(filter $(comp),gcc clang mingw icx)) CXXFLAGS += -msse2 endif endif ifeq ($(mmx),yes) CXXFLAGS += -DUSE_MMX - ifeq ($(comp),$(filter $(comp),gcc clang mingw)) + ifeq ($(comp),$(filter $(comp),gcc clang mingw icx)) CXXFLAGS += -mmmx endif endif @@ -676,24 +691,28 @@ ifeq ($(neon),yes) endif endif +ifeq ($(dotprod),yes) + CXXFLAGS += -march=armv8.2-a+dotprod -DUSE_NEON_DOTPROD +endif + ### 3.7 pext ifeq ($(pext),yes) CXXFLAGS += -DUSE_PEXT - ifeq ($(comp),$(filter $(comp),gcc clang mingw)) + ifeq ($(comp),$(filter $(comp),gcc clang mingw icx)) CXXFLAGS += -mbmi2 endif endif ### 3.7.1 Try to include git commit sha for versioning -GIT_SHA = $(shell git rev-parse --short HEAD 2>/dev/null) +GIT_SHA = $(shell git rev-parse HEAD 2>/dev/null | cut -c 1-8) ifneq ($(GIT_SHA), ) - CXXFLAGS += -DGIT_SHA=\"$(GIT_SHA)\" + CXXFLAGS += -DGIT_SHA=$(GIT_SHA) endif ### 3.7.2 Try to include git commit date for versioning GIT_DATE = $(shell git show -s --date=format:'%Y%m%d' --format=%cd HEAD 2>/dev/null) ifneq ($(GIT_DATE), ) - CXXFLAGS += -DGIT_DATE=\"$(GIT_DATE)\" + CXXFLAGS += -DGIT_DATE=$(GIT_DATE) endif ### 3.8 Link Time Optimization @@ -701,8 +720,11 @@ endif ### needs access to the optimization flags. ifeq ($(optimize),yes) ifeq ($(debug), no) - ifeq ($(comp),clang) + ifeq ($(comp),$(filter $(comp),clang icx)) CXXFLAGS += -flto=full + ifeq ($(comp),icx) + CXXFLAGS += -fwhole-program-vtables + endif ifeq ($(target_windows),yes) CXXFLAGS += -fuse-ld=lld endif @@ -758,19 +780,19 @@ help: @echo "Supported targets:" @echo "" @echo "help > Display architecture details" - @echo "build > Standard build" + @echo "profile-build > standard build with profile-guided optimization" + @echo "build > skip profile-guided optimization" @echo "net > Download the default nnue net" - @echo "profile-build > Faster build (with profile-guided optimization)" @echo "strip > Strip executable" @echo "install > Install executable" @echo "clean > Clean up" @echo "" @echo "Supported archs:" @echo "" - @echo "x86-64-vnni512 > x86 64-bit with vnni support 512bit wide" - @echo "x86-64-vnni256 > x86 64-bit with vnni support 256bit wide" + @echo "x86-64-vnni512 > x86 64-bit with vnni 512bit support" + @echo "x86-64-vnni256 > x86 64-bit with vnni 512bit support, limit operands to 256bit wide" @echo "x86-64-avx512 > x86 64-bit with avx512 support" - @echo "x86-64-avxvnni > x86 64-bit with avxvnni support" + @echo "x86-64-avxvnni > x86 64-bit with vnni 256bit support" @echo "x86-64-bmi2 > x86 64-bit with bmi2 support" @echo "x86-64-avx2 > x86 64-bit with avx2 support" @echo "x86-64-sse41-popcnt > x86 64-bit with sse41 and popcnt support" @@ -786,6 +808,7 @@ help: @echo "armv7 > ARMv7 32-bit" @echo "armv7-neon > ARMv7 32-bit with popcnt and neon" @echo "armv8 > ARMv8 64-bit with popcnt and neon" + @echo "armv8-dotprod > ARMv8 64-bit with popcnt, neon and dot product support" @echo "e2k > Elbrus 2000" @echo "apple-silicon > Apple silicon ARM64" @echo "general-64 > unspecified 64-bit" @@ -797,17 +820,18 @@ help: @echo "gcc > Gnu compiler (default)" @echo "mingw > Gnu compiler with MinGW under Windows" @echo "clang > LLVM Clang compiler" - @echo "icc > Intel compiler" + @echo "icx > Intel oneAPI DPC++/C++ Compiler" @echo "ndk > Google NDK to cross-compile for Android" @echo "" - @echo "Simple examples. If you don't know what to do, you likely want to run: " + @echo "Simple examples. If you don't know what to do, you likely want to run one of: " @echo "" - @echo "make -j build ARCH=x86-64 (A portable, slow compile for 64-bit systems)" - @echo "make -j build ARCH=x86-32 (A portable, slow compile for 32-bit systems)" + @echo "make -j profile-build ARCH=x86-64-avx2 # typically a fast compile for common systems " + @echo "make -j profile-build ARCH=x86-64-modern # A more portable compile for 64-bit systems " + @echo "make -j profile-build ARCH=x86-64 # A portable compile for 64-bit systems " @echo "" - @echo "Advanced examples, for experienced users looking for performance: " + @echo "Advanced examples, for experienced users: " @echo "" - @echo "make help ARCH=x86-64-bmi2" + @echo "make -j profile-build ARCH=x86-64-bmi2" @echo "make -j profile-build ARCH=x86-64-bmi2 COMP=gcc COMPCXX=g++-9.0" @echo "make -j build ARCH=x86-64-ssse3 COMP=clang" @echo "" @@ -822,8 +846,10 @@ endif .PHONY: help build profile-build strip install clean net objclean profileclean \ - config-sanity icc-profile-use icc-profile-make gcc-profile-use gcc-profile-make \ - clang-profile-use clang-profile-make FORCE + config-sanity \ + icx-profile-use icx-profile-make \ + gcc-profile-use gcc-profile-make \ + clang-profile-use clang-profile-make FORCE build: net config-sanity $(MAKE) ARCH=$(ARCH) COMP=$(COMP) all @@ -863,7 +889,7 @@ net: $(eval nnuedownloadurl2 := https://github.com/official-stockfish/networks/raw/master/$(nnuenet)) $(eval curl_or_wget := $(shell if hash curl 2>/dev/null; then echo "curl -skL"; elif hash wget 2>/dev/null; then echo "wget -qO-"; fi)) @if [ "x$(curl_or_wget)" = "x" ]; then \ - echo "Automatic download failed: neither curl nor wget is installed. Install one of these tools or download the net manually"; exit 1; \ + echo "Neither curl nor wget is installed. Install one of these tools unless the net has been downloaded manually"; \ fi $(eval shasum_command := $(shell if hash shasum 2>/dev/null; then echo "shasum -a 256 "; elif hash sha256sum 2>/dev/null; then echo "sha256sum "; fi)) @if [ "x$(shasum_command)" = "x" ]; then \ @@ -874,7 +900,9 @@ net: echo "$(nnuenet) available."; \ else \ if [ "x$(curl_or_wget)" != "x" ]; then \ - echo "Downloading $${nnuedownloadurl}"; $(curl_or_wget) $${nnuedownloadurl} > $(nnuenet);\ + echo "Downloading $${nnuedownloadurl}"; $(curl_or_wget) $${nnuedownloadurl} > $(nnuenet);\ + else \ + echo "No net found and download not possible"; exit 1;\ fi; \ fi; \ if [ "x$(shasum_command)" != "x" ]; then \ @@ -937,7 +965,9 @@ config-sanity: net @echo "vnni512: '$(vnni512)'" @echo "neon: '$(neon)'" @echo "mpi: '$(mpi)'" + @echo "dotprod: '$(dotprod)'" @echo "arm_version: '$(arm_version)'" + @echo "target_windows: '$(target_windows)'" @echo "" @echo "Flags:" @echo "CXX: $(CXX)" @@ -966,7 +996,7 @@ config-sanity: net @test "$(vnni256)" = "yes" || test "$(vnni256)" = "no" @test "$(vnni512)" = "yes" || test "$(vnni512)" = "no" @test "$(neon)" = "yes" || test "$(neon)" = "no" - @test "$(comp)" = "gcc" || test "$(comp)" = "icc" || test "$(comp)" = "mingw" || test "$(comp)" = "clang" \ + @test "$(comp)" = "gcc" || test "$(comp)" = "icx" || test "$(comp)" = "mingw" || test "$(comp)" = "clang" \ || test "$(comp)" = "armv7a-linux-androideabi16-clang" || test "$(comp)" = "aarch64-linux-android21-clang" $(EXE): $(OBJS) @@ -1004,18 +1034,22 @@ gcc-profile-use: EXTRALDFLAGS='-lgcov' \ all -icc-profile-make: - @mkdir -p profdir +icx-profile-make: $(MAKE) ARCH=$(ARCH) COMP=$(COMP) \ - EXTRACXXFLAGS='-prof-gen=srcpos -prof_dir ./profdir' \ + EXTRACXXFLAGS='-fprofile-instr-generate ' \ + EXTRALDFLAGS=' -fprofile-instr-generate' \ all -icc-profile-use: +icx-profile-use: + $(XCRUN) llvm-profdata merge -output=stockfish.profdata *.profraw $(MAKE) ARCH=$(ARCH) COMP=$(COMP) \ - EXTRACXXFLAGS='-prof_use -prof_dir ./profdir' \ + EXTRACXXFLAGS='-fprofile-instr-use=stockfish.profdata' \ + EXTRALDFLAGS='-fprofile-use ' \ all .depend: $(SRCS) -@$(CXX) $(DEPENDFLAGS) -MM $(SRCS) > $@ 2> /dev/null +ifeq (, $(filter $(MAKECMDGOALS), help strip install clean net objclean profileclean config-sanity)) -include .depend +endif diff --git a/src/benchmark.cpp b/src/benchmark.cpp index e1c025ad..a1ad0550 100644 --- a/src/benchmark.cpp +++ b/src/benchmark.cpp @@ -1,6 +1,6 @@ /* Stockfish, a UCI chess playing engine derived from Glaurung 2.1 - Copyright (C) 2004-2022 The Stockfish developers (see AUTHORS file) + Copyright (C) 2004-2023 The Stockfish developers (see AUTHORS file) Stockfish is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -16,6 +16,8 @@ along with this program. If not, see . */ +#include "benchmark.h" + #include #include #include diff --git a/src/benchmark.h b/src/benchmark.h new file mode 100644 index 00000000..64acf833 --- /dev/null +++ b/src/benchmark.h @@ -0,0 +1,34 @@ +/* + Stockfish, a UCI chess playing engine derived from Glaurung 2.1 + Copyright (C) 2004-2023 The Stockfish developers (see AUTHORS file) + + Stockfish is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + Stockfish is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see . +*/ + +#ifndef BENCHMARK_H_INCLUDED +#define BENCHMARK_H_INCLUDED + +#include +#include +#include + +namespace Stockfish { + +class Position; + +std::vector setup_bench(const Position&, std::istream&); + +} // namespace Stockfish + +#endif // #ifndef BENCHMARK_H_INCLUDED diff --git a/src/bitbase.cpp b/src/bitbase.cpp index 84300baf..e21d1fe9 100644 --- a/src/bitbase.cpp +++ b/src/bitbase.cpp @@ -1,6 +1,6 @@ /* Stockfish, a UCI chess playing engine derived from Glaurung 2.1 - Copyright (C) 2004-2022 The Stockfish developers (see AUTHORS file) + Copyright (C) 2004-2023 The Stockfish developers (see AUTHORS file) Stockfish is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by diff --git a/src/bitboard.cpp b/src/bitboard.cpp index fd0ba235..fd5c3c22 100644 --- a/src/bitboard.cpp +++ b/src/bitboard.cpp @@ -1,6 +1,6 @@ /* Stockfish, a UCI chess playing engine derived from Glaurung 2.1 - Copyright (C) 2004-2022 The Stockfish developers (see AUTHORS file) + Copyright (C) 2004-2023 The Stockfish developers (see AUTHORS file) Stockfish is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -27,7 +27,6 @@ namespace Stockfish { uint8_t PopCnt16[1 << 16]; uint8_t SquareDistance[SQUARE_NB][SQUARE_NB]; -Bitboard SquareBB[SQUARE_NB]; Bitboard LineBB[SQUARE_NB][SQUARE_NB]; Bitboard BetweenBB[SQUARE_NB][SQUARE_NB]; Bitboard PseudoAttacks[PIECE_TYPE_NB][SQUARE_NB]; @@ -82,9 +81,6 @@ void Bitboards::init() { for (unsigned i = 0; i < (1 << 16); ++i) PopCnt16[i] = uint8_t(std::bitset<16>(i).count()); - for (Square s = SQ_A1; s <= SQ_H8; ++s) - SquareBB[s] = (1ULL << s); - for (Square s1 = SQ_A1; s1 <= SQ_H8; ++s1) for (Square s2 = SQ_A1; s2 <= SQ_H8; ++s2) SquareDistance[s1][s2] = std::max(distance(s1, s2), distance(s1, s2)); diff --git a/src/bitboard.h b/src/bitboard.h index 2b6e2a69..42fd0e97 100644 --- a/src/bitboard.h +++ b/src/bitboard.h @@ -1,6 +1,6 @@ /* Stockfish, a UCI chess playing engine derived from Glaurung 2.1 - Copyright (C) 2004-2022 The Stockfish developers (see AUTHORS file) + Copyright (C) 2004-2023 The Stockfish developers (see AUTHORS file) Stockfish is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -74,7 +74,6 @@ constexpr Bitboard KingFlank[FILE_NB] = { extern uint8_t PopCnt16[1 << 16]; extern uint8_t SquareDistance[SQUARE_NB][SQUARE_NB]; -extern Bitboard SquareBB[SQUARE_NB]; extern Bitboard BetweenBB[SQUARE_NB][SQUARE_NB]; extern Bitboard LineBB[SQUARE_NB][SQUARE_NB]; extern Bitboard PseudoAttacks[PIECE_TYPE_NB][SQUARE_NB]; @@ -108,7 +107,7 @@ extern Magic BishopMagics[SQUARE_NB]; inline Bitboard square_bb(Square s) { assert(is_ok(s)); - return SquareBB[s]; + return (1ULL << s); } diff --git a/src/endgame.cpp b/src/endgame.cpp index e773e7a9..9021f242 100644 --- a/src/endgame.cpp +++ b/src/endgame.cpp @@ -1,6 +1,6 @@ /* Stockfish, a UCI chess playing engine derived from Glaurung 2.1 - Copyright (C) 2004-2022 The Stockfish developers (see AUTHORS file) + Copyright (C) 2004-2023 The Stockfish developers (see AUTHORS file) Stockfish is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by diff --git a/src/endgame.h b/src/endgame.h index e79f696f..c184cb3f 100644 --- a/src/endgame.h +++ b/src/endgame.h @@ -1,6 +1,6 @@ /* Stockfish, a UCI chess playing engine derived from Glaurung 2.1 - Copyright (C) 2004-2022 The Stockfish developers (see AUTHORS file) + Copyright (C) 2004-2023 The Stockfish developers (see AUTHORS file) Stockfish is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by diff --git a/src/evaluate.cpp b/src/evaluate.cpp index ec53796f..d572fe0c 100644 --- a/src/evaluate.cpp +++ b/src/evaluate.cpp @@ -1,6 +1,6 @@ /* Stockfish, a UCI chess playing engine derived from Glaurung 2.1 - Copyright (C) 2004-2022 The Stockfish developers (see AUTHORS file) + Copyright (C) 2004-2023 The Stockfish developers (see AUTHORS file) Stockfish is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -37,7 +37,7 @@ #include "timeman.h" #include "uci.h" #include "incbin/incbin.h" - +#include "nnue/evaluate_nnue.h" // Macro to embed the default efficiently updatable neural network (NNUE) file // data in the engine binary (using incbin.h, by Dale Weiler). @@ -83,20 +83,18 @@ namespace Eval { eval_file = EvalFileDefaultName; #if defined(DEFAULT_NNUE_DIRECTORY) - #define stringify2(x) #x - #define stringify(x) stringify2(x) vector dirs = { "" , "" , CommandLine::binaryDirectory , stringify(DEFAULT_NNUE_DIRECTORY) }; #else vector dirs = { "" , "" , CommandLine::binaryDirectory }; #endif - for (string directory : dirs) + for (const string& directory : dirs) if (currentEvalFileName != eval_file) { if (directory != "") { ifstream stream(directory + eval_file, ios::binary); - if (load_eval(eval_file, stream)) + if (NNUE::load_eval(eval_file, stream)) currentEvalFileName = eval_file; } @@ -112,7 +110,7 @@ namespace Eval { (void) gEmbeddedNNUEEnd; // Silence warning on unused variable istream stream(&buffer); - if (load_eval(eval_file, stream)) + if (NNUE::load_eval(eval_file, stream)) currentEvalFileName = eval_file; } } @@ -163,24 +161,24 @@ namespace Trace { Score scores[TERM_NB][COLOR_NB]; - double to_cp(Value v) { return double(v) / UCI::NormalizeToPawnValue; } + static double to_cp(Value v) { return double(v) / UCI::NormalizeToPawnValue; } - void add(int idx, Color c, Score s) { + static void add(int idx, Color c, Score s) { scores[idx][c] = s; } - void add(int idx, Score w, Score b = SCORE_ZERO) { + static void add(int idx, Score w, Score b = SCORE_ZERO) { scores[idx][WHITE] = w; scores[idx][BLACK] = b; } - std::ostream& operator<<(std::ostream& os, Score s) { + static std::ostream& operator<<(std::ostream& os, Score s) { os << std::setw(5) << to_cp(mg_value(s)) << " " << std::setw(5) << to_cp(eg_value(s)); return os; } - std::ostream& operator<<(std::ostream& os, Term t) { + static std::ostream& operator<<(std::ostream& os, Term t) { if (t == MATERIAL || t == IMBALANCE || t == WINNABLE || t == TOTAL) os << " ---- ----" << " | " << " ---- ----"; @@ -197,8 +195,8 @@ using namespace Trace; namespace { // Threshold for lazy and space evaluation - constexpr Value LazyThreshold1 = Value(3631); - constexpr Value LazyThreshold2 = Value(2084); + constexpr Value LazyThreshold1 = Value(3622); + constexpr Value LazyThreshold2 = Value(1962); constexpr Value SpaceThreshold = Value(11551); // KingAttackWeights[PieceType] contains king attack weights by piece type @@ -392,10 +390,10 @@ namespace { template template Score Evaluation::pieces() { - constexpr Color Them = ~Us; - constexpr Direction Down = -pawn_push(Us); - constexpr Bitboard OutpostRanks = (Us == WHITE ? Rank4BB | Rank5BB | Rank6BB - : Rank5BB | Rank4BB | Rank3BB); + constexpr Color Them = ~Us; + [[maybe_unused]] constexpr Direction Down = -pawn_push(Us); + [[maybe_unused]] constexpr Bitboard OutpostRanks = (Us == WHITE ? Rank4BB | Rank5BB | Rank6BB + : Rank5BB | Rank4BB | Rank3BB); Bitboard b1 = pos.pieces(Us, Pt); Bitboard b, bb; Score score = SCORE_ZERO; @@ -434,7 +432,7 @@ namespace { int mob = popcount(b & mobilityArea[Us]); mobility[Us] += MobilityBonus[Pt - 2][mob]; - if (Pt == BISHOP || Pt == KNIGHT) + if constexpr (Pt == BISHOP || Pt == KNIGHT) { // Bonus if the piece is on an outpost square or can reach one // Bonus for knights (UncontestedOutpost) if few relevant targets @@ -1052,52 +1050,41 @@ make_v: /// evaluate() is the evaluator for the outer world. It returns a static /// evaluation of the position from the point of view of the side to move. -Value Eval::evaluate(const Position& pos, int* complexity) { +Value Eval::evaluate(const Position& pos) { + + assert(!pos.checkers()); Value v; Value psq = pos.psq_eg_stm(); // We use the much less accurate but faster Classical eval when the NNUE // option is set to false. Otherwise we use the NNUE eval unless the - // PSQ advantage is decisive and several pieces remain. (~3 Elo) - bool useClassical = !useNNUE || (pos.count() > 7 && abs(psq) > 1760); + // PSQ advantage is decisive. (~4 Elo at STC, 1 Elo at LTC) + bool useClassical = !useNNUE || abs(psq) > 2048; if (useClassical) v = Evaluation(pos).value(); else { int nnueComplexity; - int scale = 1064 + 106 * pos.non_pawn_material() / 5120; + int npm = pos.non_pawn_material() / 64; Color stm = pos.side_to_move(); Value optimism = pos.this_thread()->optimism[stm]; Value nnue = NNUE::evaluate(pos, true, &nnueComplexity); - // Blend nnue complexity with (semi)classical complexity - nnueComplexity = ( 416 * nnueComplexity - + 424 * abs(psq - nnue) - + (optimism > 0 ? int(optimism) * int(psq - nnue) : 0) - ) / 1024; - - // Return hybrid NNUE complexity to caller - if (complexity) - *complexity = nnueComplexity; - - optimism = optimism * (269 + nnueComplexity) / 256; - v = (nnue * scale + optimism * (scale - 754)) / 1024; + // Blend optimism with nnue complexity and (semi)classical complexity + optimism += optimism * (nnueComplexity + abs(psq - nnue)) / 512; + v = (nnue * (945 + npm) + optimism * (150 + npm)) / 1024; } // Damp down the evaluation linearly when shuffling - v = v * (195 - pos.rule50_count()) / 211; + v = v * (200 - pos.rule50_count()) / 214; // Guarantee evaluation does not hit the tablebase range v = std::clamp(v, VALUE_TB_LOSS_IN_MAX_PLY + 1, VALUE_TB_WIN_IN_MAX_PLY - 1); - // When not using NNUE, return classical complexity to caller - if (complexity && (!useNNUE || useClassical)) - *complexity = abs(v - psq); - return v; } diff --git a/src/evaluate.h b/src/evaluate.h index f5ac3263..b9d7231d 100644 --- a/src/evaluate.h +++ b/src/evaluate.h @@ -1,6 +1,6 @@ /* Stockfish, a UCI chess playing engine derived from Glaurung 2.1 - Copyright (C) 2004-2022 The Stockfish developers (see AUTHORS file) + Copyright (C) 2004-2023 The Stockfish developers (see AUTHORS file) Stockfish is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -31,7 +31,7 @@ class Position; namespace Eval { std::string trace(Position& pos); - Value evaluate(const Position& pos, int* complexity = nullptr); + Value evaluate(const Position& pos); extern bool useNNUE; extern std::string currentEvalFileName; @@ -39,20 +39,13 @@ namespace Eval { // The default net name MUST follow the format nn-[SHA256 first 12 digits].nnue // for the build process (profile-build and fishtest) to work. Do not change the // name of the macro, as it is used in the Makefile. - #define EvalFileDefaultName "nn-ad9b42354671.nnue" + #define EvalFileDefaultName "nn-5af11540bbfe.nnue" namespace NNUE { - std::string trace(Position& pos); - Value evaluate(const Position& pos, bool adjusted = false, int* complexity = nullptr); - void init(); void verify(); - bool load_eval(std::string name, std::istream& stream); - bool save_eval(std::ostream& stream); - bool save_eval(const std::optional& filename); - } // namespace NNUE } // namespace Eval diff --git a/src/incbin/incbin.h b/src/incbin/incbin.h old mode 100755 new mode 100644 diff --git a/src/main.cpp b/src/main.cpp index 8c67021c..7f70aa17 100644 --- a/src/main.cpp +++ b/src/main.cpp @@ -1,6 +1,6 @@ /* Stockfish, a UCI chess playing engine derived from Glaurung 2.1 - Copyright (C) 2004-2022 The Stockfish developers (see AUTHORS file) + Copyright (C) 2004-2023 The Stockfish developers (see AUTHORS file) Stockfish is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by diff --git a/src/material.cpp b/src/material.cpp index 1567358a..7102f879 100644 --- a/src/material.cpp +++ b/src/material.cpp @@ -1,6 +1,6 @@ /* Stockfish, a UCI chess playing engine derived from Glaurung 2.1 - Copyright (C) 2004-2022 The Stockfish developers (see AUTHORS file) + Copyright (C) 2004-2023 The Stockfish developers (see AUTHORS file) Stockfish is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by diff --git a/src/material.h b/src/material.h index 3ca169ce..9acf78f5 100644 --- a/src/material.h +++ b/src/material.h @@ -1,6 +1,6 @@ /* Stockfish, a UCI chess playing engine derived from Glaurung 2.1 - Copyright (C) 2004-2022 The Stockfish developers (see AUTHORS file) + Copyright (C) 2004-2023 The Stockfish developers (see AUTHORS file) Stockfish is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -28,7 +28,7 @@ namespace Stockfish::Material { /// Material::Entry contains various information about a material configuration. /// It contains a material imbalance evaluation, a function pointer to a special -/// endgame evaluation function (which in most cases is NULL, meaning that the +/// endgame evaluation function (which in most cases is nullptr, meaning that the /// standard evaluation function will be used), and scale factors. /// /// The scale factors are used to scale the evaluation score up or down. For @@ -62,7 +62,7 @@ struct Entry { uint8_t factor[COLOR_NB]; }; -typedef HashTable Table; +using Table = HashTable; Entry* probe(const Position& pos); diff --git a/src/misc.cpp b/src/misc.cpp index 2d86969f..bbfa4061 100644 --- a/src/misc.cpp +++ b/src/misc.cpp @@ -1,6 +1,6 @@ /* Stockfish, a UCI chess playing engine derived from Glaurung 2.1 - Copyright (C) 2004-2022 The Stockfish developers (see AUTHORS file) + Copyright (C) 2004-2023 The Stockfish developers (see AUTHORS file) Stockfish is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -32,21 +32,26 @@ // the calls at compile time), try to load them at runtime. To do this we need // first to define the corresponding function pointers. extern "C" { -typedef bool(*fun1_t)(LOGICAL_PROCESSOR_RELATIONSHIP, - PSYSTEM_LOGICAL_PROCESSOR_INFORMATION_EX, PDWORD); -typedef bool(*fun2_t)(USHORT, PGROUP_AFFINITY); -typedef bool(*fun3_t)(HANDLE, CONST GROUP_AFFINITY*, PGROUP_AFFINITY); -typedef bool(*fun4_t)(USHORT, PGROUP_AFFINITY, USHORT, PUSHORT); -typedef WORD(*fun5_t)(); +using fun1_t = bool(*)(LOGICAL_PROCESSOR_RELATIONSHIP, + PSYSTEM_LOGICAL_PROCESSOR_INFORMATION_EX, PDWORD); +using fun2_t = bool(*)(USHORT, PGROUP_AFFINITY); +using fun3_t = bool(*)(HANDLE, CONST GROUP_AFFINITY*, PGROUP_AFFINITY); +using fun4_t = bool(*)(USHORT, PGROUP_AFFINITY, USHORT, PUSHORT); +using fun5_t = WORD(*)(); +using fun6_t = bool(*)(HANDLE, DWORD, PHANDLE); +using fun7_t = bool(*)(LPCSTR, LPCSTR, PLUID); +using fun8_t = bool(*)(HANDLE, BOOL, PTOKEN_PRIVILEGES, DWORD, PTOKEN_PRIVILEGES, PDWORD); } #endif +#include +#include #include #include #include #include +#include #include -#include #if defined(__linux__) && !defined(__ANDROID__) #include @@ -68,7 +73,7 @@ namespace Stockfish { namespace { /// Version number or dev. -const string version = "15.1"; +constexpr string_view version = "16"; /// Our fancy logging facility. The trick here is to replace cin.rdbuf() and /// cout.rdbuf() with two Tie objects that tie cin and cout to a file stream. We @@ -151,13 +156,13 @@ string engine_info(bool to_uci) { stringstream ss; ss << "Stockfish " << version << setfill('0'); - if (version == "dev") + if constexpr (version == "dev") { ss << "-"; #ifdef GIT_DATE - ss << GIT_DATE; + ss << stringify(GIT_DATE); #else - const string months("Jan Feb Mar Apr May Jun Jul Aug Sep Oct Nov Dec"); + constexpr string_view months("Jan Feb Mar Apr May Jun Jul Aug Sep Oct Nov Dec"); string month, day, year; stringstream date(__DATE__); // From compiler, format is "Sep 21 2008" @@ -168,7 +173,7 @@ string engine_info(bool to_uci) { ss << "-"; #ifdef GIT_SHA - ss << GIT_SHA; + ss << stringify(GIT_SHA); #else ss << "nogit"; #endif @@ -185,8 +190,6 @@ string engine_info(bool to_uci) { std::string compiler_info() { - #define stringify2(x) #x - #define stringify(x) stringify2(x) #define make_version_string(major, minor, patch) stringify(major) "." stringify(minor) "." stringify(patch) /// Predefined macros hell: @@ -298,21 +301,94 @@ std::string compiler_info() { /// Debug functions used mainly to collect run-time statistics -static std::atomic hits[2], means[2]; +constexpr int MaxDebugSlots = 32; -void dbg_hit_on(bool b) { ++hits[0]; if (b) ++hits[1]; } -void dbg_hit_on(bool c, bool b) { if (c) dbg_hit_on(b); } -void dbg_mean_of(int v) { ++means[0]; means[1] += v; } +namespace { + +template +struct DebugInfo { + std::atomic data[N] = { 0 }; + + constexpr inline std::atomic& operator[](int index) { return data[index]; } +}; + +DebugInfo<2> hit[MaxDebugSlots]; +DebugInfo<2> mean[MaxDebugSlots]; +DebugInfo<3> stdev[MaxDebugSlots]; +DebugInfo<6> correl[MaxDebugSlots]; + +} // namespace + +void dbg_hit_on(bool cond, int slot) { + + ++hit[slot][0]; + if (cond) + ++hit[slot][1]; +} + +void dbg_mean_of(int64_t value, int slot) { + + ++mean[slot][0]; + mean[slot][1] += value; +} + +void dbg_stdev_of(int64_t value, int slot) { + + ++stdev[slot][0]; + stdev[slot][1] += value; + stdev[slot][2] += value * value; +} + +void dbg_correl_of(int64_t value1, int64_t value2, int slot) { + + ++correl[slot][0]; + correl[slot][1] += value1; + correl[slot][2] += value1 * value1; + correl[slot][3] += value2; + correl[slot][4] += value2 * value2; + correl[slot][5] += value1 * value2; +} void dbg_print() { - if (hits[0]) - cerr << "Total " << hits[0] << " Hits " << hits[1] - << " hit rate (%) " << 100 * hits[1] / hits[0] << endl; + int64_t n; + auto E = [&n](int64_t x) { return double(x) / n; }; + auto sqr = [](double x) { return x * x; }; - if (means[0]) - cerr << "Total " << means[0] << " Mean " - << (double)means[1] / means[0] << endl; + for (int i = 0; i < MaxDebugSlots; ++i) + if ((n = hit[i][0])) + std::cerr << "Hit #" << i + << ": Total " << n << " Hits " << hit[i][1] + << " Hit Rate (%) " << 100.0 * E(hit[i][1]) + << std::endl; + + for (int i = 0; i < MaxDebugSlots; ++i) + if ((n = mean[i][0])) + { + std::cerr << "Mean #" << i + << ": Total " << n << " Mean " << E(mean[i][1]) + << std::endl; + } + + for (int i = 0; i < MaxDebugSlots; ++i) + if ((n = stdev[i][0])) + { + double r = sqrtl(E(stdev[i][2]) - sqr(E(stdev[i][1]))); + std::cerr << "Stdev #" << i + << ": Total " << n << " Stdev " << r + << std::endl; + } + + for (int i = 0; i < MaxDebugSlots; ++i) + if ((n = correl[i][0])) + { + double r = (E(correl[i][5]) - E(correl[i][1]) * E(correl[i][3])) + / ( sqrtl(E(correl[i][2]) - sqr(E(correl[i][1]))) + * sqrtl(E(correl[i][4]) - sqr(E(correl[i][3])))); + std::cerr << "Correl. #" << i + << ": Total " << n << " Coefficient " << r + << std::endl; + } } @@ -373,8 +449,10 @@ void* std_aligned_alloc(size_t alignment, size_t size) { #if defined(POSIXALIGNEDALLOC) void *mem; return posix_memalign(&mem, alignment, size) ? nullptr : mem; -#elif defined(_WIN32) +#elif defined(_WIN32) && !defined(_M_ARM) && !defined(_M_ARM64) return _mm_malloc(size, alignment); +#elif defined(_WIN32) + return _aligned_malloc(size, alignment); #else return std::aligned_alloc(alignment, size); #endif @@ -384,8 +462,10 @@ void std_aligned_free(void* ptr) { #if defined(POSIXALIGNEDALLOC) free(ptr); -#elif defined(_WIN32) +#elif defined(_WIN32) && !defined(_M_ARM) && !defined(_M_ARM64) _mm_free(ptr); +#elif defined(_WIN32) + _aligned_free(ptr); #else free(ptr); #endif @@ -409,11 +489,30 @@ static void* aligned_large_pages_alloc_windows([[maybe_unused]] size_t allocSize if (!largePageSize) return nullptr; - // We need SeLockMemoryPrivilege, so try to enable it for the process - if (!OpenProcessToken(GetCurrentProcess(), TOKEN_ADJUST_PRIVILEGES | TOKEN_QUERY, &hProcessToken)) + // Dynamically link OpenProcessToken, LookupPrivilegeValue and AdjustTokenPrivileges + + HMODULE hAdvapi32 = GetModuleHandle(TEXT("advapi32.dll")); + + if (!hAdvapi32) + hAdvapi32 = LoadLibrary(TEXT("advapi32.dll")); + + auto fun6 = (fun6_t)(void(*)())GetProcAddress(hAdvapi32, "OpenProcessToken"); + if (!fun6) + return nullptr; + auto fun7 = (fun7_t)(void(*)())GetProcAddress(hAdvapi32, "LookupPrivilegeValueA"); + if (!fun7) + return nullptr; + auto fun8 = (fun8_t)(void(*)())GetProcAddress(hAdvapi32, "AdjustTokenPrivileges"); + if (!fun8) return nullptr; - if (LookupPrivilegeValue(NULL, SE_LOCK_MEMORY_NAME, &luid)) + // We need SeLockMemoryPrivilege, so try to enable it for the process + if (!fun6( // OpenProcessToken() + GetCurrentProcess(), TOKEN_ADJUST_PRIVILEGES | TOKEN_QUERY, &hProcessToken)) + return nullptr; + + if (fun7( // LookupPrivilegeValue(nullptr, SE_LOCK_MEMORY_NAME, &luid) + nullptr, "SeLockMemoryPrivilege", &luid)) { TOKEN_PRIVILEGES tp { }; TOKEN_PRIVILEGES prevTp { }; @@ -425,17 +524,18 @@ static void* aligned_large_pages_alloc_windows([[maybe_unused]] size_t allocSize // Try to enable SeLockMemoryPrivilege. Note that even if AdjustTokenPrivileges() succeeds, // we still need to query GetLastError() to ensure that the privileges were actually obtained. - if (AdjustTokenPrivileges( + if (fun8( // AdjustTokenPrivileges() hProcessToken, FALSE, &tp, sizeof(TOKEN_PRIVILEGES), &prevTp, &prevTpLen) && GetLastError() == ERROR_SUCCESS) { // Round up size to full pages and allocate allocSize = (allocSize + largePageSize - 1) & ~size_t(largePageSize - 1); mem = VirtualAlloc( - NULL, allocSize, MEM_RESERVE | MEM_COMMIT | MEM_LARGE_PAGES, PAGE_READWRITE); + nullptr, allocSize, MEM_RESERVE | MEM_COMMIT | MEM_LARGE_PAGES, PAGE_READWRITE); // Privilege no longer needed, restore previous state - AdjustTokenPrivileges(hProcessToken, FALSE, &prevTp, 0, NULL, NULL); + fun8( // AdjustTokenPrivileges () + hProcessToken, FALSE, &prevTp, 0, nullptr, nullptr); } } @@ -453,7 +553,7 @@ void* aligned_large_pages_alloc(size_t allocSize) { // Fall back to regular, page aligned, allocation if necessary if (!mem) - mem = VirtualAlloc(NULL, allocSize, MEM_RESERVE | MEM_COMMIT, PAGE_READWRITE); + mem = VirtualAlloc(nullptr, allocSize, MEM_RESERVE | MEM_COMMIT, PAGE_READWRITE); return mem; } @@ -517,7 +617,7 @@ void bindThisThread(size_t) {} /// API and returns the best node id for the thread with index idx. Original /// code from Texel by Peter Österlund. -int best_node(size_t idx) { +static int best_node(size_t idx) { int threads = 0; int nodes = 0; @@ -526,7 +626,7 @@ int best_node(size_t idx) { DWORD byteOffset = 0; // Early exit if the needed API is not available at runtime - HMODULE k32 = GetModuleHandle("Kernel32.dll"); + HMODULE k32 = GetModuleHandle(TEXT("Kernel32.dll")); auto fun1 = (fun1_t)(void(*)())GetProcAddress(k32, "GetLogicalProcessorInformationEx"); if (!fun1) return -1; @@ -596,7 +696,7 @@ void bindThisThread(size_t idx) { return; // Early exit if the needed API are not available at runtime - HMODULE k32 = GetModuleHandle("Kernel32.dll"); + HMODULE k32 = GetModuleHandle(TEXT("Kernel32.dll")); auto fun2 = (fun2_t)(void(*)())GetProcAddress(k32, "GetNumaNodeProcessorMaskEx"); auto fun3 = (fun3_t)(void(*)())GetProcAddress(k32, "SetThreadGroupAffinity"); auto fun4 = (fun4_t)(void(*)())GetProcAddress(k32, "GetNumaNodeProcessorMask2"); diff --git a/src/misc.h b/src/misc.h index 77b81d50..69d470c2 100644 --- a/src/misc.h +++ b/src/misc.h @@ -1,6 +1,6 @@ /* Stockfish, a UCI chess playing engine derived from Glaurung 2.1 - Copyright (C) 2004-2022 The Stockfish developers (see AUTHORS file) + Copyright (C) 2004-2023 The Stockfish developers (see AUTHORS file) Stockfish is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -28,6 +28,9 @@ #include "types.h" +#define stringify2(x) #x +#define stringify(x) stringify2(x) + namespace Stockfish { std::string engine_info(bool to_uci = false); @@ -39,12 +42,13 @@ void std_aligned_free(void* ptr); void* aligned_large_pages_alloc(size_t size); // memory aligned by page size, min alignment: 4096 bytes void aligned_large_pages_free(void* mem); // nop if mem == nullptr -void dbg_hit_on(bool b); -void dbg_hit_on(bool c, bool b); -void dbg_mean_of(int v); +void dbg_hit_on(bool cond, int slot = 0); +void dbg_mean_of(int64_t value, int slot = 0); +void dbg_stdev_of(int64_t value, int slot = 0); +void dbg_correl_of(int64_t value1, int64_t value2, int slot = 0); void dbg_print(); -typedef std::chrono::milliseconds::rep TimePoint; // A value in milliseconds +using TimePoint = std::chrono::milliseconds::rep; // A value in milliseconds static_assert(sizeof(TimePoint) == sizeof(int64_t), "TimePoint should be 64 bits"); inline TimePoint now() { return std::chrono::duration_cast @@ -85,32 +89,6 @@ static inline const union { uint32_t i; char c[4]; } Le = { 0x01020304 }; static inline const bool IsLittleEndian = (Le.c[0] == 4); -// RunningAverage : a class to calculate a running average of a series of values. -// For efficiency, all computations are done with integers. -class RunningAverage { - public: - - // Reset the running average to rational value p / q - void set(int64_t p, int64_t q) - { average = p * PERIOD * RESOLUTION / q; } - - // Update average with value v - void update(int64_t v) - { average = RESOLUTION * v + (PERIOD - 1) * average / PERIOD; } - - // Test if average is strictly greater than rational a / b - bool is_greater(int64_t a, int64_t b) const - { return b * average > a * (PERIOD * RESOLUTION); } - - int64_t value() const - { return average / (PERIOD * RESOLUTION); } - - private : - static constexpr int64_t PERIOD = 4096; - static constexpr int64_t RESOLUTION = 1024; - int64_t average; -}; - template class ValueList { @@ -164,7 +142,7 @@ public: inline uint64_t mul_hi64(uint64_t a, uint64_t b) { #if defined(__GNUC__) && defined(IS_64BIT) - __extension__ typedef unsigned __int128 uint128; + __extension__ using uint128 = unsigned __int128; return ((uint128)a * (uint128)b) >> 64; #else uint64_t aL = (uint32_t)a, aH = a >> 32; diff --git a/src/movegen.cpp b/src/movegen.cpp index c7a3c29b..6b28a52e 100644 --- a/src/movegen.cpp +++ b/src/movegen.cpp @@ -1,6 +1,6 @@ /* Stockfish, a UCI chess playing engine derived from Glaurung 2.1 - Copyright (C) 2004-2022 The Stockfish developers (see AUTHORS file) + Copyright (C) 2004-2023 The Stockfish developers (see AUTHORS file) Stockfish is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -25,13 +25,21 @@ namespace Stockfish { namespace { - template - ExtMove* make_promotions(ExtMove* moveList, Square to) { + template + ExtMove* make_promotions(ExtMove* moveList, [[maybe_unused]] Square to) { - if (Type == CAPTURES || Type == EVASIONS || Type == NON_EVASIONS) + if constexpr (Type == CAPTURES || Type == EVASIONS || Type == NON_EVASIONS) + { *moveList++ = make(to - D, to, QUEEN); + if constexpr (Enemy && Type == CAPTURES) + { + *moveList++ = make(to - D, to, ROOK); + *moveList++ = make(to - D, to, BISHOP); + *moveList++ = make(to - D, to, KNIGHT); + } + } - if (Type == QUIETS || Type == EVASIONS || Type == NON_EVASIONS) + if constexpr ((Type == QUIETS && !Enemy) || Type == EVASIONS || Type == NON_EVASIONS) { *moveList++ = make(to - D, to, ROOK); *moveList++ = make(to - D, to, BISHOP); @@ -60,18 +68,18 @@ namespace { Bitboard pawnsNotOn7 = pos.pieces(Us, PAWN) & ~TRank7BB; // Single and double pawn pushes, no promotions - if (Type != CAPTURES) + if constexpr (Type != CAPTURES) { Bitboard b1 = shift(pawnsNotOn7) & emptySquares; Bitboard b2 = shift(b1 & TRank3BB) & emptySquares; - if (Type == EVASIONS) // Consider only blocking squares + if constexpr (Type == EVASIONS) // Consider only blocking squares { b1 &= target; b2 &= target; } - if (Type == QUIET_CHECKS) + if constexpr (Type == QUIET_CHECKS) { // To make a quiet check, you either make a direct check by pushing a pawn // or push a blocker pawn that is not on the same file as the enemy king. @@ -102,21 +110,21 @@ namespace { Bitboard b2 = shift(pawnsOn7) & enemies; Bitboard b3 = shift(pawnsOn7) & emptySquares; - if (Type == EVASIONS) + if constexpr (Type == EVASIONS) b3 &= target; while (b1) - moveList = make_promotions(moveList, pop_lsb(b1)); + moveList = make_promotions(moveList, pop_lsb(b1)); while (b2) - moveList = make_promotions(moveList, pop_lsb(b2)); + moveList = make_promotions(moveList, pop_lsb(b2)); while (b3) - moveList = make_promotions(moveList, pop_lsb(b3)); + moveList = make_promotions(moveList, pop_lsb(b3)); } // Standard and en passant captures - if (Type == CAPTURES || Type == EVASIONS || Type == NON_EVASIONS) + if constexpr (Type == CAPTURES || Type == EVASIONS || Type == NON_EVASIONS) { Bitboard b1 = shift(pawnsNotOn7) & enemies; Bitboard b2 = shift(pawnsNotOn7) & enemies; @@ -264,7 +272,7 @@ ExtMove* generate(const Position& pos, ExtMove* moveList) { moveList = pos.checkers() ? generate(pos, moveList) : generate(pos, moveList); while (cur != moveList) - if ( ((pinned && pinned & from_sq(*cur)) || from_sq(*cur) == ksq || type_of(*cur) == EN_PASSANT) + if ( ((pinned & from_sq(*cur)) || from_sq(*cur) == ksq || type_of(*cur) == EN_PASSANT) && !pos.legal(*cur)) *cur = (--moveList)->move; else diff --git a/src/movegen.h b/src/movegen.h index bbb35b39..b8df3e65 100644 --- a/src/movegen.h +++ b/src/movegen.h @@ -1,6 +1,6 @@ /* Stockfish, a UCI chess playing engine derived from Glaurung 2.1 - Copyright (C) 2004-2022 The Stockfish developers (see AUTHORS file) + Copyright (C) 2004-2023 The Stockfish developers (see AUTHORS file) Stockfish is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by diff --git a/src/movepick.cpp b/src/movepick.cpp index 188d6bd8..6fbcb2c3 100644 --- a/src/movepick.cpp +++ b/src/movepick.cpp @@ -1,6 +1,6 @@ /* Stockfish, a UCI chess playing engine derived from Glaurung 2.1 - Copyright (C) 2004-2022 The Stockfish developers (see AUTHORS file) + Copyright (C) 2004-2023 The Stockfish developers (see AUTHORS file) Stockfish is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -69,7 +69,6 @@ MovePicker::MovePicker(const Position& p, Move ttm, Depth d, const ButterflyHist stage = (pos.checkers() ? EVASION_TT : MAIN_TT) + !(ttm && pos.pseudo_legal(ttm)); - threatenedPieces = 0; } /// MovePicker constructor for quiescence search @@ -93,20 +92,20 @@ MovePicker::MovePicker(const Position& p, Move ttm, Value th, const CapturePiece { assert(!pos.checkers()); - stage = PROBCUT_TT + !(ttm && pos.capture(ttm) + stage = PROBCUT_TT + !(ttm && pos.capture_stage(ttm) && pos.pseudo_legal(ttm) && pos.see_ge(ttm, threshold)); } /// MovePicker::score() assigns a numerical value to each move in a list, used /// for sorting. Captures are ordered by Most Valuable Victim (MVV), preferring -/// captures with a good history. Quiets moves are ordered using the histories. +/// captures with a good history. Quiets moves are ordered using the history tables. template void MovePicker::score() { static_assert(Type == CAPTURES || Type == QUIETS || Type == EVASIONS, "Wrong type"); - [[maybe_unused]] Bitboard threatenedByPawn, threatenedByMinor, threatenedByRook; + [[maybe_unused]] Bitboard threatenedByPawn, threatenedByMinor, threatenedByRook, threatenedPieces; if constexpr (Type == QUIETS) { Color us = pos.side_to_move(); @@ -123,8 +122,8 @@ void MovePicker::score() { for (auto& m : *this) if constexpr (Type == CAPTURES) - m.value = 6 * int(PieceValue[MG][pos.piece_on(to_sq(m))]) - + (*captureHistory)[pos.moved_piece(m)][to_sq(m)][type_of(pos.piece_on(to_sq(m)))]; + m.value = (7 * int(PieceValue[MG][pos.piece_on(to_sq(m))]) + + (*captureHistory)[pos.moved_piece(m)][to_sq(m)][type_of(pos.piece_on(to_sq(m)))]) / 16; else if constexpr (Type == QUIETS) m.value = 2 * (*mainHistory)[pos.side_to_move()][from_to(m)] @@ -141,7 +140,7 @@ void MovePicker::score() { + bool(pos.check_squares(type_of(pos.moved_piece(m))) & to_sq(m)) * 16384; else // Type == EVASIONS { - if (pos.capture(m)) + if (pos.capture_stage(m)) m.value = PieceValue[MG][pos.piece_on(to_sq(m))] - Value(type_of(pos.moved_piece(m))) + (1 << 28); @@ -158,7 +157,7 @@ Move MovePicker::select(Pred filter) { while (cur < endMoves) { - if (T == Best) + if constexpr (T == Best) std::swap(*cur, *std::max_element(cur, endMoves)); if (*cur != ttMove && filter()) @@ -197,7 +196,7 @@ top: case GOOD_CAPTURE: if (select([&](){ - return pos.see_ge(*cur, Value(-69 * cur->value / 1024)) ? + return pos.see_ge(*cur, Value(-cur->value)) ? // Move losing capture to endBadCaptures to be tried later true : (*endBadCaptures++ = *cur, false); })) return *(cur - 1); @@ -216,7 +215,7 @@ top: case REFUTATION: if (select([&](){ return *cur != MOVE_NONE - && !pos.capture(*cur) + && !pos.capture_stage(*cur) && pos.pseudo_legal(*cur); })) return *(cur - 1); ++stage; diff --git a/src/movepick.h b/src/movepick.h index e4c4a5bf..0b44557f 100644 --- a/src/movepick.h +++ b/src/movepick.h @@ -1,6 +1,6 @@ /* Stockfish, a UCI chess playing engine derived from Glaurung 2.1 - Copyright (C) 2004-2022 The Stockfish developers (see AUTHORS file) + Copyright (C) 2004-2023 The Stockfish developers (see AUTHORS file) Stockfish is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -62,14 +62,14 @@ public: template struct Stats : public std::array, Size> { - typedef Stats stats; + using stats = Stats; void fill(const T& v) { // For standard-layout 'this' points to first struct member assert(std::is_standard_layout::value); - typedef StatsEntry entry; + using entry = StatsEntry; entry* p = reinterpret_cast(this); std::fill(p, p + sizeof(*this) / sizeof(entry), v); } @@ -87,23 +87,23 @@ enum StatsType { NoCaptures, Captures }; /// ordering decisions. It uses 2 tables (one for each color) indexed by /// the move's from and to squares, see www.chessprogramming.org/Butterfly_Boards /// (~11 elo) -typedef Stats ButterflyHistory; +using ButterflyHistory = Stats; /// CounterMoveHistory stores counter moves indexed by [piece][to] of the previous /// move, see www.chessprogramming.org/Countermove_Heuristic -typedef Stats CounterMoveHistory; +using CounterMoveHistory = Stats; /// CapturePieceToHistory is addressed by a move's [piece][to][captured piece type] -typedef Stats CapturePieceToHistory; +using CapturePieceToHistory = Stats; /// PieceToHistory is like ButterflyHistory but is addressed by a move's [piece][to] -typedef Stats PieceToHistory; +using PieceToHistory = Stats; /// ContinuationHistory is the combined history of a given pair of moves, usually /// the current one given a previous one. The nested history table is based on /// PieceToHistory instead of ButterflyBoards. /// (~63 elo) -typedef Stats ContinuationHistory; +using ContinuationHistory = Stats; /// MovePicker class is used to pick one pseudo-legal move at a time from the @@ -131,8 +131,6 @@ public: MovePicker(const Position&, Move, Value, const CapturePieceToHistory*); Move next_move(bool skipQuiets = false); - Bitboard threatenedPieces; - private: template Move select(Pred); template void score(); diff --git a/src/nnue/evaluate_nnue.cpp b/src/nnue/evaluate_nnue.cpp index 4715fed0..329adfda 100644 --- a/src/nnue/evaluate_nnue.cpp +++ b/src/nnue/evaluate_nnue.cpp @@ -1,6 +1,6 @@ /* Stockfish, a UCI chess playing engine derived from Glaurung 2.1 - Copyright (C) 2004-2022 The Stockfish developers (see AUTHORS file) + Copyright (C) 2004-2023 The Stockfish developers (see AUTHORS file) Stockfish is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -18,15 +18,15 @@ // Code for calculating NNUE evaluation function +#include +#include #include #include #include -#include -#include +#include #include "../evaluate.h" #include "../position.h" -#include "../misc.h" #include "../uci.h" #include "../types.h" @@ -83,7 +83,7 @@ namespace Stockfish::Eval::NNUE { } // namespace Detail // Initialize the evaluation function parameters - void initialize() { + static void initialize() { Detail::initialize(featureTransformer); for (std::size_t i = 0; i < LayerStacks; ++i) @@ -91,7 +91,7 @@ namespace Stockfish::Eval::NNUE { } // Read network header - bool read_header(std::istream& stream, std::uint32_t* hashValue, std::string* desc) + static bool read_header(std::istream& stream, std::uint32_t* hashValue, std::string* desc) { std::uint32_t version, size; @@ -105,7 +105,7 @@ namespace Stockfish::Eval::NNUE { } // Write network header - bool write_header(std::ostream& stream, std::uint32_t hashValue, const std::string& desc) + static bool write_header(std::ostream& stream, std::uint32_t hashValue, const std::string& desc) { write_little_endian(stream, Version); write_little_endian(stream, hashValue); @@ -115,7 +115,7 @@ namespace Stockfish::Eval::NNUE { } // Read network parameters - bool read_parameters(std::istream& stream) { + static bool read_parameters(std::istream& stream) { std::uint32_t hashValue; if (!read_header(stream, &hashValue, &netDescription)) return false; @@ -127,7 +127,7 @@ namespace Stockfish::Eval::NNUE { } // Write network parameters - bool write_parameters(std::ostream& stream) { + static bool write_parameters(std::ostream& stream) { if (!write_header(stream, HashValue, netDescription)) return false; if (!Detail::write_parameters(stream, *featureTransformer)) return false; @@ -136,6 +136,11 @@ namespace Stockfish::Eval::NNUE { return (bool)stream; } + void hint_common_parent_position(const Position& pos) { + if (Eval::useNNUE) + featureTransformer->hint_common_access(pos); + } + // Evaluation function. Perform differential calculation. Value evaluate(const Position& pos, bool adjusted, int* complexity) { @@ -143,7 +148,7 @@ namespace Stockfish::Eval::NNUE { // overaligning stack variables with alignas() doesn't work correctly. constexpr uint64_t alignment = CacheLineSize; - int delta = 24 - pos.non_pawn_material() / 9560; + constexpr int delta = 24; #if defined(ALIGNAS_ON_STACK_VARIABLES_BROKEN) TransformedFeatureType transformedFeaturesUnaligned[ @@ -211,7 +216,7 @@ namespace Stockfish::Eval::NNUE { return t; } - static const std::string PieceToChar(" PNBRQK pnbrqk"); + constexpr std::string_view PieceToChar(" PNBRQK pnbrqk"); // format_cp_compact() converts a Value into (centi)pawns and writes it in a buffer. @@ -245,14 +250,15 @@ namespace Stockfish::Eval::NNUE { } - // format_cp_aligned_dot() converts a Value into (centi)pawns and writes it in a buffer, - // always keeping two decimals. The buffer must have capacity for at least 7 chars. - static void format_cp_aligned_dot(Value v, char* buffer) { + // format_cp_aligned_dot() converts a Value into (centi)pawns, always keeping two decimals. + static void format_cp_aligned_dot(Value v, std::stringstream &stream) { + const double cp = 1.0 * std::abs(int(v)) / UCI::NormalizeToPawnValue; - buffer[0] = (v < 0 ? '-' : v > 0 ? '+' : ' '); - - double cp = 1.0 * std::abs(int(v)) / UCI::NormalizeToPawnValue; - sprintf(&buffer[1], "%6.2f", cp); + stream << (v < 0 ? '-' : v > 0 ? '+' : ' ') + << std::setiosflags(std::ios::fixed) + << std::setw(6) + << std::setprecision(2) + << cp; } @@ -332,17 +338,10 @@ namespace Stockfish::Eval::NNUE { for (std::size_t bucket = 0; bucket < LayerStacks; ++bucket) { - char buffer[3][8]; - std::memset(buffer, '\0', sizeof(buffer)); - - format_cp_aligned_dot(t.psqt[bucket], buffer[0]); - format_cp_aligned_dot(t.positional[bucket], buffer[1]); - format_cp_aligned_dot(t.psqt[bucket] + t.positional[bucket], buffer[2]); - - ss << "| " << bucket << " " - << " | " << buffer[0] << " " - << " | " << buffer[1] << " " - << " | " << buffer[2] << " " + ss << "| " << bucket << " "; + ss << " | "; format_cp_aligned_dot(t.psqt[bucket], ss); ss << " " + << " | "; format_cp_aligned_dot(t.positional[bucket], ss); ss << " " + << " | "; format_cp_aligned_dot(t.psqt[bucket] + t.positional[bucket], ss); ss << " " << " |"; if (bucket == t.correctBucket) ss << " <-- this bucket is used"; diff --git a/src/nnue/evaluate_nnue.h b/src/nnue/evaluate_nnue.h index 2e4f1f50..b84bed8b 100644 --- a/src/nnue/evaluate_nnue.h +++ b/src/nnue/evaluate_nnue.h @@ -1,6 +1,6 @@ /* Stockfish, a UCI chess playing engine derived from Glaurung 2.1 - Copyright (C) 2004-2022 The Stockfish developers (see AUTHORS file) + Copyright (C) 2004-2023 The Stockfish developers (see AUTHORS file) Stockfish is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -31,6 +31,7 @@ namespace Stockfish::Eval::NNUE { constexpr std::uint32_t HashValue = FeatureTransformer::get_hash_value() ^ Network::get_hash_value(); + // Deleter for automating release of memory area template struct AlignedDeleter { @@ -54,6 +55,14 @@ namespace Stockfish::Eval::NNUE { template using LargePagePtr = std::unique_ptr>; + std::string trace(Position& pos); + Value evaluate(const Position& pos, bool adjusted = false, int* complexity = nullptr); + void hint_common_parent_position(const Position& pos); + + bool load_eval(std::string name, std::istream& stream); + bool save_eval(std::ostream& stream); + bool save_eval(const std::optional& filename); + } // namespace Stockfish::Eval::NNUE #endif // #ifndef NNUE_EVALUATE_NNUE_H_INCLUDED diff --git a/src/nnue/features/half_ka_v2_hm.cpp b/src/nnue/features/half_ka_v2_hm.cpp index 7dbd3415..19ebb15f 100644 --- a/src/nnue/features/half_ka_v2_hm.cpp +++ b/src/nnue/features/half_ka_v2_hm.cpp @@ -1,6 +1,6 @@ /* Stockfish, a UCI chess playing engine derived from Glaurung 2.1 - Copyright (C) 2004-2022 The Stockfish developers (see AUTHORS file) + Copyright (C) 2004-2023 The Stockfish developers (see AUTHORS file) Stockfish is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by diff --git a/src/nnue/features/half_ka_v2_hm.h b/src/nnue/features/half_ka_v2_hm.h index a95d4328..78063c36 100644 --- a/src/nnue/features/half_ka_v2_hm.h +++ b/src/nnue/features/half_ka_v2_hm.h @@ -1,6 +1,6 @@ /* Stockfish, a UCI chess playing engine derived from Glaurung 2.1 - Copyright (C) 2004-2022 The Stockfish developers (see AUTHORS file) + Copyright (C) 2004-2023 The Stockfish developers (see AUTHORS file) Stockfish is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by diff --git a/src/nnue/layers/affine_transform.h b/src/nnue/layers/affine_transform.h index 461a7b83..9e2f2f97 100644 --- a/src/nnue/layers/affine_transform.h +++ b/src/nnue/layers/affine_transform.h @@ -1,6 +1,6 @@ /* Stockfish, a UCI chess playing engine derived from Glaurung 2.1 - Copyright (C) 2004-2022 The Stockfish developers (see AUTHORS file) + Copyright (C) 2004-2023 The Stockfish developers (see AUTHORS file) Stockfish is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -31,7 +31,7 @@ This file contains the definition for a fully connected layer (aka affine transform). Two approaches are employed, depending on the sizes of the transform. - Approach 1: + Approach 1 (a specialization for large inputs): - used when the PaddedInputDimensions >= 128 - uses AVX512 if possible - processes inputs in batches of 2*InputSimdWidth @@ -42,9 +42,8 @@ depends on the architecture (the amount of registers) - accumulate + hadd is used - Approach 2: + Approach 2 (a specialization for small inputs): - used when the PaddedInputDimensions < 128 - - does not use AVX512 - expected use-case is for when PaddedInputDimensions == 32 and InputDimensions <= 32. - that's why AVX512 is hard to implement - expected use-case is small layers @@ -72,6 +71,10 @@ namespace Stockfish::Eval::NNUE::Layers { const __m64 Zeros = _mm_setzero_si64(); const auto inputVector = reinterpret_cast(input); +# elif defined(USE_NEON_DOTPROD) + constexpr IndexType NumChunks = ceil_to_multiple(InputDimensions, 16) / 16; + const auto inputVector = reinterpret_cast(input); + # elif defined(USE_NEON) constexpr IndexType NumChunks = ceil_to_multiple(InputDimensions, 16) / 16; const auto inputVector = reinterpret_cast(input); @@ -123,6 +126,14 @@ namespace Stockfish::Eval::NNUE::Layers { sum = _mm_add_pi32(sum, _mm_unpackhi_pi32(sum, sum)); output[i] = _mm_cvtsi64_si32(sum); +# elif defined(USE_NEON_DOTPROD) + int32x4_t sum = {biases[i]}; + const auto row = reinterpret_cast(&weights[offset]); + for (IndexType j = 0; j < NumChunks; ++j) { + sum = vdotq_s32(sum, inputVector[j], row[j]); + } + output[i] = vaddvq_s32(sum); + # elif defined(USE_NEON) int32x4_t sum = {biases[i]}; const auto row = reinterpret_cast(&weights[offset]); @@ -157,7 +168,7 @@ namespace Stockfish::Eval::NNUE::Layers { constexpr IndexType LargeInputSize = std::numeric_limits::max(); #endif - // A specialization for large inputs. + // A specialization for large inputs template class AffineTransform(InDims, MaxSimdWidth) >= LargeInputSize)>> { public: @@ -176,36 +187,39 @@ namespace Stockfish::Eval::NNUE::Layers { using OutputBuffer = OutputType[PaddedOutputDimensions]; - static_assert(PaddedInputDimensions >= LargeInputSize, "Something went wrong. This specialization should not have been chosen."); + static_assert(PaddedInputDimensions >= LargeInputSize, "Something went wrong. This specialization (for large inputs) should not have been chosen."); #if defined (USE_AVX512) - static constexpr const IndexType InputSimdWidth = 64; - static constexpr const IndexType MaxNumOutputRegs = 16; + static constexpr IndexType InputSimdWidth = 64; + static constexpr IndexType MaxNumOutputRegs = 16; #elif defined (USE_AVX2) - static constexpr const IndexType InputSimdWidth = 32; - static constexpr const IndexType MaxNumOutputRegs = 8; + static constexpr IndexType InputSimdWidth = 32; + static constexpr IndexType MaxNumOutputRegs = 8; #elif defined (USE_SSSE3) - static constexpr const IndexType InputSimdWidth = 16; - static constexpr const IndexType MaxNumOutputRegs = 8; + static constexpr IndexType InputSimdWidth = 16; + static constexpr IndexType MaxNumOutputRegs = 8; +#elif defined (USE_NEON_DOTPROD) + static constexpr IndexType InputSimdWidth = 16; + static constexpr IndexType MaxNumOutputRegs = 8; #elif defined (USE_NEON) - static constexpr const IndexType InputSimdWidth = 8; - static constexpr const IndexType MaxNumOutputRegs = 8; + static constexpr IndexType InputSimdWidth = 8; + static constexpr IndexType MaxNumOutputRegs = 8; #else // The fallback implementation will not have permuted weights. // We define these to avoid a lot of ifdefs later. - static constexpr const IndexType InputSimdWidth = 1; - static constexpr const IndexType MaxNumOutputRegs = 1; + static constexpr IndexType InputSimdWidth = 1; + static constexpr IndexType MaxNumOutputRegs = 1; #endif // A big block is a region in the weight matrix of the size [PaddedInputDimensions, NumOutputRegs]. // A small block is a region of size [InputSimdWidth, 1] - static constexpr const IndexType NumOutputRegs = std::min(MaxNumOutputRegs, OutputDimensions); - static constexpr const IndexType SmallBlockSize = InputSimdWidth; - static constexpr const IndexType BigBlockSize = NumOutputRegs * PaddedInputDimensions; - static constexpr const IndexType NumSmallBlocksInBigBlock = BigBlockSize / SmallBlockSize; - static constexpr const IndexType NumSmallBlocksPerOutput = PaddedInputDimensions / SmallBlockSize; - static constexpr const IndexType NumBigBlocks = OutputDimensions / NumOutputRegs; + static constexpr IndexType NumOutputRegs = std::min(MaxNumOutputRegs, OutputDimensions); + static constexpr IndexType SmallBlockSize = InputSimdWidth; + static constexpr IndexType BigBlockSize = NumOutputRegs * PaddedInputDimensions; + static constexpr IndexType NumSmallBlocksInBigBlock = BigBlockSize / SmallBlockSize; + static constexpr IndexType NumSmallBlocksPerOutput = PaddedInputDimensions / SmallBlockSize; + static constexpr IndexType NumBigBlocks = OutputDimensions / NumOutputRegs; static_assert(OutputDimensions % NumOutputRegs == 0); @@ -241,8 +255,7 @@ namespace Stockfish::Eval::NNUE::Layers { // Read network parameters bool read_parameters(std::istream& stream) { - for (IndexType i = 0; i < OutputDimensions; ++i) - biases[i] = read_little_endian(stream); + read_little_endian(stream, biases, OutputDimensions); for (IndexType i = 0; i < OutputDimensions * PaddedInputDimensions; ++i) weights[get_weight_index(i)] = read_little_endian(stream); @@ -252,8 +265,7 @@ namespace Stockfish::Eval::NNUE::Layers { // Write network parameters bool write_parameters(std::ostream& stream) const { - for (IndexType i = 0; i < OutputDimensions; ++i) - write_little_endian(stream, biases[i]); + write_little_endian(stream, biases, OutputDimensions); for (IndexType i = 0; i < OutputDimensions * PaddedInputDimensions; ++i) write_little_endian(stream, weights[get_weight_index(i)]); @@ -292,6 +304,15 @@ namespace Stockfish::Eval::NNUE::Layers { #define vec_add_dpbusd_32x2 Simd::m128_add_dpbusd_epi32x2 #define vec_hadd Simd::m128_hadd #define vec_haddx4 Simd::m128_haddx4 +#elif defined (USE_NEON_DOTPROD) + using acc_vec_t = int32x4_t; + using bias_vec_t = int32x4_t; + using weight_vec_t = int8x16_t; + using in_vec_t = int8x16_t; + #define vec_zero {0} + #define vec_add_dpbusd_32x2 Simd::dotprod_m128_add_dpbusd_epi32x2 + #define vec_hadd Simd::neon_m128_hadd + #define vec_haddx4 Simd::neon_m128_haddx4 #elif defined (USE_NEON) using acc_vec_t = int32x4_t; using bias_vec_t = int32x4_t; @@ -374,6 +395,7 @@ namespace Stockfish::Eval::NNUE::Layers { alignas(CacheLineSize) WeightType weights[OutputDimensions * PaddedInputDimensions]; }; + // A specialization for small inputs template class AffineTransform(InDims, MaxSimdWidth) < LargeInputSize)>> { public: @@ -393,12 +415,7 @@ namespace Stockfish::Eval::NNUE::Layers { using OutputBuffer = OutputType[PaddedOutputDimensions]; - static_assert(PaddedInputDimensions < LargeInputSize, "Something went wrong. This specialization should not have been chosen."); - -#if defined (USE_SSSE3) - static constexpr const IndexType OutputSimdWidth = SimdWidth / 4; - static constexpr const IndexType InputSimdWidth = SimdWidth; -#endif + static_assert(PaddedInputDimensions < LargeInputSize, "Something went wrong. This specialization (for small inputs) should not have been chosen."); // Hash value embedded in the evaluation file static constexpr std::uint32_t get_hash_value(std::uint32_t prevHash) { @@ -428,8 +445,7 @@ namespace Stockfish::Eval::NNUE::Layers { // Read network parameters bool read_parameters(std::istream& stream) { - for (IndexType i = 0; i < OutputDimensions; ++i) - biases[i] = read_little_endian(stream); + read_little_endian(stream, biases, OutputDimensions); for (IndexType i = 0; i < OutputDimensions * PaddedInputDimensions; ++i) weights[get_weight_index(i)] = read_little_endian(stream); @@ -438,8 +454,7 @@ namespace Stockfish::Eval::NNUE::Layers { // Write network parameters bool write_parameters(std::ostream& stream) const { - for (IndexType i = 0; i < OutputDimensions; ++i) - write_little_endian(stream, biases[i]); + write_little_endian(stream, biases, OutputDimensions); for (IndexType i = 0; i < OutputDimensions * PaddedInputDimensions; ++i) write_little_endian(stream, weights[get_weight_index(i)]); @@ -450,29 +465,34 @@ namespace Stockfish::Eval::NNUE::Layers { const OutputType* propagate( const InputType* input, OutputType* output) const { -#if defined (USE_AVX2) +#if defined (USE_AVX512) + using vec_t = __m512i; + #define vec_setzero _mm512_setzero_si512 + #define vec_set_32 _mm512_set1_epi32 + #define vec_add_dpbusd_32 Simd::m512_add_dpbusd_epi32 + #define vec_add_dpbusd_32x2 Simd::m512_add_dpbusd_epi32x2 + #define vec_hadd Simd::m512_hadd +#elif defined (USE_AVX2) using vec_t = __m256i; #define vec_setzero _mm256_setzero_si256 #define vec_set_32 _mm256_set1_epi32 #define vec_add_dpbusd_32 Simd::m256_add_dpbusd_epi32 #define vec_add_dpbusd_32x2 Simd::m256_add_dpbusd_epi32x2 - #define vec_add_dpbusd_32x4 Simd::m256_add_dpbusd_epi32x4 #define vec_hadd Simd::m256_hadd - #define vec_haddx4 Simd::m256_haddx4 #elif defined (USE_SSSE3) using vec_t = __m128i; #define vec_setzero _mm_setzero_si128 #define vec_set_32 _mm_set1_epi32 #define vec_add_dpbusd_32 Simd::m128_add_dpbusd_epi32 #define vec_add_dpbusd_32x2 Simd::m128_add_dpbusd_epi32x2 - #define vec_add_dpbusd_32x4 Simd::m128_add_dpbusd_epi32x4 #define vec_hadd Simd::m128_hadd - #define vec_haddx4 Simd::m128_haddx4 #endif #if defined (USE_SSSE3) const auto inputVector = reinterpret_cast(input); + static constexpr IndexType OutputSimdWidth = sizeof(vec_t) / sizeof(OutputType); + static_assert(OutputDimensions % OutputSimdWidth == 0 || OutputDimensions == 1); if constexpr (OutputDimensions % OutputSimdWidth == 0) @@ -518,9 +538,7 @@ namespace Stockfish::Eval::NNUE::Layers { # undef vec_set_32 # undef vec_add_dpbusd_32 # undef vec_add_dpbusd_32x2 -# undef vec_add_dpbusd_32x4 # undef vec_hadd -# undef vec_haddx4 #else // Use old implementation for the other architectures. affine_transform_non_ssse3< diff --git a/src/nnue/layers/affine_transform_sparse_input.h b/src/nnue/layers/affine_transform_sparse_input.h new file mode 100644 index 00000000..e0c3a8a0 --- /dev/null +++ b/src/nnue/layers/affine_transform_sparse_input.h @@ -0,0 +1,286 @@ +/* + Stockfish, a UCI chess playing engine derived from Glaurung 2.1 + Copyright (C) 2004-2023 The Stockfish developers (see AUTHORS file) + + Stockfish is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + Stockfish is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see . +*/ + +// Definition of layer AffineTransformSparseInput of NNUE evaluation function + +#ifndef NNUE_LAYERS_AFFINE_TRANSFORM_SPARSE_INPUT_H_INCLUDED +#define NNUE_LAYERS_AFFINE_TRANSFORM_SPARSE_INPUT_H_INCLUDED + +#include +#include +#include +#include +#include "../nnue_common.h" +#include "affine_transform.h" +#include "simd.h" + +/* + This file contains the definition for a fully connected layer (aka affine transform) with block sparse input. +*/ + +namespace Stockfish::Eval::NNUE::Layers { +#if defined(__GNUC__) // GCC, Clang, ICC + + static inline IndexType lsb_(std::uint32_t b) { + assert(b); + return IndexType(__builtin_ctzl(b)); + } + +#elif defined(_MSC_VER) // MSVC + + static inline IndexType lsb_(std::uint32_t b) { + assert(b); + unsigned long idx; + _BitScanForward(&idx, b); + return (IndexType) idx; + } + +#else // Compiler is neither GCC nor MSVC compatible + +#error "Compiler not supported." + +#endif + + +#if defined(USE_SSSE3) + alignas(CacheLineSize) static inline const std::array, 256> lookup_indices = [](){ + std::array, 256> v{}; + for (int i = 0; i < 256; ++i) + { + int j = i; + int k = 0; + while(j) + { + const IndexType lsbIndex = lsb_(std::uint32_t(j)); + j &= j - 1; + v[i][k] = lsbIndex; + ++k; + } + } + return v; + }(); + alignas(CacheLineSize) static inline const std::array lookup_count = [](){ + std::array v; + for (int i = 0; i < 256; ++i) + { + int j = i; + int k = 0; + while(j) + { + j &= j - 1; + ++k; + } + v[i] = k; + } + return v; + }(); + + // Find indices of nonzero numbers in an int32_t array + template + void find_nnz(const std::int32_t* input, std::uint16_t* out, IndexType& count_out) { +#if defined (USE_AVX512) + using vec_t = __m512i; + #define vec_nnz(a) _mm512_cmpgt_epi32_mask(a, _mm512_setzero_si512()) +#elif defined (USE_AVX2) + using vec_t = __m256i; + #define vec_nnz(a) _mm256_movemask_ps(_mm256_castsi256_ps(_mm256_cmpgt_epi32(a, _mm256_setzero_si256()))) +#elif defined (USE_SSSE3) + using vec_t = __m128i; + #define vec_nnz(a) _mm_movemask_ps(_mm_castsi128_ps(_mm_cmpgt_epi32(a, _mm_setzero_si128()))) +#endif + constexpr IndexType InputSimdWidth = sizeof(vec_t) / sizeof(std::int32_t); + // Inputs are processed InputSimdWidth at a time and outputs are processed 8 at a time so we process in chunks of max(InputSimdWidth, 8) + constexpr IndexType ChunkSize = std::max(InputSimdWidth, 8); + constexpr IndexType NumChunks = InputDimensions / ChunkSize; + constexpr IndexType InputsPerChunk = ChunkSize / InputSimdWidth; + constexpr IndexType OutputsPerChunk = ChunkSize / 8; + + const auto inputVector = reinterpret_cast(input); + IndexType count = 0; + __m128i base = _mm_set1_epi16(0); + __m128i increment = _mm_set1_epi16(8); + for (IndexType i = 0; i < NumChunks; ++i) + { + // bitmask of nonzero values in this chunk + unsigned nnz = 0; + for (IndexType j = 0; j < InputsPerChunk; ++j) + { + const vec_t inputChunk = inputVector[i * InputsPerChunk + j]; + nnz |= (unsigned)vec_nnz(inputChunk) << (j * InputSimdWidth); + } + for (IndexType j = 0; j < OutputsPerChunk; ++j) + { + const auto lookup = (nnz >> (j * 8)) & 0xFF; + const auto offsets = _mm_loadu_si128(reinterpret_cast(&lookup_indices[lookup])); + _mm_storeu_si128(reinterpret_cast<__m128i*>(out + count), _mm_add_epi16(base, offsets)); + count += lookup_count[lookup]; + base = _mm_add_epi16(base, increment); + } + } + count_out = count; + } +# undef vec_nnz +#endif + + // Sparse input implementation + template + class AffineTransformSparseInput { + public: + // Input/output type + // Input/output type + using InputType = std::uint8_t; + using OutputType = std::int32_t; + + // Number of input/output dimensions + static constexpr IndexType InputDimensions = InDims; + static constexpr IndexType OutputDimensions = OutDims; + + static_assert(OutputDimensions % 16 == 0, "Only implemented for OutputDimensions divisible by 16."); + + static constexpr IndexType PaddedInputDimensions = + ceil_to_multiple(InputDimensions, MaxSimdWidth); + static constexpr IndexType PaddedOutputDimensions = + ceil_to_multiple(OutputDimensions, MaxSimdWidth); + +#if defined (USE_SSSE3) + static constexpr IndexType ChunkSize = 4; +#else + static constexpr IndexType ChunkSize = 1; +#endif + + using OutputBuffer = OutputType[PaddedOutputDimensions]; + + // Hash value embedded in the evaluation file + static constexpr std::uint32_t get_hash_value(std::uint32_t prevHash) { + std::uint32_t hashValue = 0xCC03DAE4u; + hashValue += OutputDimensions; + hashValue ^= prevHash >> 1; + hashValue ^= prevHash << 31; + return hashValue; + } + + static IndexType get_weight_index_scrambled(IndexType i) + { + return + (i / ChunkSize) % (PaddedInputDimensions / ChunkSize) * OutputDimensions * ChunkSize + + i / PaddedInputDimensions * ChunkSize + + i % ChunkSize; + } + + static IndexType get_weight_index(IndexType i) + { +#if defined (USE_SSSE3) + return get_weight_index_scrambled(i); +#else + return i; +#endif + } + + // Read network parameters + bool read_parameters(std::istream& stream) { + read_little_endian(stream, biases, OutputDimensions); + for (IndexType i = 0; i < OutputDimensions * PaddedInputDimensions; ++i) + weights[get_weight_index(i)] = read_little_endian(stream); + + return !stream.fail(); + } + + // Write network parameters + bool write_parameters(std::ostream& stream) const { + write_little_endian(stream, biases, OutputDimensions); + + for (IndexType i = 0; i < OutputDimensions * PaddedInputDimensions; ++i) + write_little_endian(stream, weights[get_weight_index(i)]); + + return !stream.fail(); + } + // Forward propagation + const OutputType* propagate( + const InputType* input, OutputType* output) const { + +#if defined (USE_SSSE3) +#if defined (USE_AVX512) + using vec_t = __m512i; + #define vec_setzero _mm512_setzero_si512 + #define vec_set_32 _mm512_set1_epi32 + #define vec_add_dpbusd_32 Simd::m512_add_dpbusd_epi32 +#elif defined (USE_AVX2) + using vec_t = __m256i; + #define vec_setzero _mm256_setzero_si256 + #define vec_set_32 _mm256_set1_epi32 + #define vec_add_dpbusd_32 Simd::m256_add_dpbusd_epi32 +#elif defined (USE_SSSE3) + using vec_t = __m128i; + #define vec_setzero _mm_setzero_si128 + #define vec_set_32 _mm_set1_epi32 + #define vec_add_dpbusd_32 Simd::m128_add_dpbusd_epi32 +#endif + static constexpr IndexType OutputSimdWidth = sizeof(vec_t) / sizeof(OutputType); + + constexpr IndexType NumChunks = ceil_to_multiple(InputDimensions, 8) / ChunkSize; + constexpr IndexType NumRegs = OutputDimensions / OutputSimdWidth; + std::uint16_t nnz[NumChunks]; + IndexType count; + + const auto input32 = reinterpret_cast(input); + + // Find indices of nonzero 32bit blocks + find_nnz(input32, nnz, count); + + const vec_t* biasvec = reinterpret_cast(biases); + vec_t acc[NumRegs]; + for (IndexType k = 0; k < NumRegs; ++k) + acc[k] = biasvec[k]; + + for (IndexType j = 0; j < count; ++j) + { + const auto i = nnz[j]; + const vec_t in = vec_set_32(input32[i]); + const auto col = reinterpret_cast(&weights[i * OutputDimensions * ChunkSize]); + for (IndexType k = 0; k < NumRegs; ++k) + vec_add_dpbusd_32(acc[k], in, col[k]); + } + + vec_t* outptr = reinterpret_cast(output); + for (IndexType k = 0; k < NumRegs; ++k) + outptr[k] = acc[k]; +# undef vec_setzero +# undef vec_set_32 +# undef vec_add_dpbusd_32 +#else + // Use dense implementation for the other architectures. + affine_transform_non_ssse3< + InputDimensions, + PaddedInputDimensions, + OutputDimensions>(output, weights, biases, input); +#endif + + return output; + } + + private: + using BiasType = OutputType; + using WeightType = std::int8_t; + + alignas(CacheLineSize) BiasType biases[OutputDimensions]; + alignas(CacheLineSize) WeightType weights[OutputDimensions * PaddedInputDimensions]; + }; + +} // namespace Stockfish::Eval::NNUE::Layers + +#endif // #ifndef NNUE_LAYERS_AFFINE_TRANSFORM_SPARSE_INPUT_H_INCLUDED diff --git a/src/nnue/layers/clipped_relu.h b/src/nnue/layers/clipped_relu.h index f94d3082..51e562da 100644 --- a/src/nnue/layers/clipped_relu.h +++ b/src/nnue/layers/clipped_relu.h @@ -1,6 +1,6 @@ /* Stockfish, a UCI chess playing engine derived from Glaurung 2.1 - Copyright (C) 2004-2022 The Stockfish developers (see AUTHORS file) + Copyright (C) 2004-2023 The Stockfish developers (see AUTHORS file) Stockfish is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by diff --git a/src/nnue/layers/simd.h b/src/nnue/layers/simd.h index 7b9e8fb2..22c51980 100644 --- a/src/nnue/layers/simd.h +++ b/src/nnue/layers/simd.h @@ -1,6 +1,6 @@ /* Stockfish, a UCI chess playing engine derived from Glaurung 2.1 - Copyright (C) 2004-2022 The Stockfish developers (see AUTHORS file) + Copyright (C) 2004-2023 The Stockfish developers (see AUTHORS file) Stockfish is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -153,7 +153,7 @@ namespace Stockfish::Simd { asm( "vpdpbusd %[b0], %[a0], %[acc]\n\t" "vpdpbusd %[b1], %[a1], %[acc]\n\t" - : [acc]"+v"(acc) + : [acc]"+&v"(acc) : [a0]"v"(a0), [b0]"vm"(b0), [a1]"v"(a1), [b1]"vm"(b1) ); # else @@ -165,18 +165,19 @@ namespace Stockfish::Simd { __m512i tmp0 = _mm512_maddubs_epi16(a0, b0); __m512i tmp1 = _mm512_maddubs_epi16(a1, b1); asm( - "vpaddsw %[tmp0], %[tmp1], %[tmp0]\n\t" "vpmaddwd %[tmp0], %[ones], %[tmp0]\n\t" + "vpmaddwd %[tmp1], %[ones], %[tmp1]\n\t" + "vpaddd %[tmp0], %[tmp1], %[tmp0]\n\t" "vpaddd %[acc], %[tmp0], %[acc]\n\t" - : [acc]"+v"(acc), [tmp0]"+&v"(tmp0) - : [tmp1]"v"(tmp1), [ones]"v"(_mm512_set1_epi16(1)) + : [acc]"+v"(acc), [tmp0]"+&v"(tmp0), [tmp1]"+&v"(tmp1) + : [ones]"v"(_mm512_set1_epi16(1)) ); # else __m512i product0 = _mm512_maddubs_epi16(a0, b0); __m512i product1 = _mm512_maddubs_epi16(a1, b1); - product0 = _mm512_adds_epi16(product0, product1); product0 = _mm512_madd_epi16(product0, _mm512_set1_epi16(1)); - acc = _mm512_add_epi32(acc, product0); + product1 = _mm512_madd_epi16(product1, _mm512_set1_epi16(1)); + acc = _mm512_add_epi32(acc, _mm512_add_epi32(product0, product1)); # endif # endif } @@ -249,7 +250,7 @@ namespace Stockfish::Simd { asm( VNNI_PREFIX "vpdpbusd %[b0], %[a0], %[acc]\n\t" VNNI_PREFIX "vpdpbusd %[b1], %[a1], %[acc]\n\t" - : [acc]"+v"(acc) + : [acc]"+&v"(acc) : [a0]"v"(a0), [b0]"vm"(b0), [a1]"v"(a1), [b1]"vm"(b1) ); # else @@ -261,18 +262,19 @@ namespace Stockfish::Simd { __m256i tmp0 = _mm256_maddubs_epi16(a0, b0); __m256i tmp1 = _mm256_maddubs_epi16(a1, b1); asm( - "vpaddsw %[tmp0], %[tmp1], %[tmp0]\n\t" "vpmaddwd %[tmp0], %[ones], %[tmp0]\n\t" + "vpmaddwd %[tmp1], %[ones], %[tmp1]\n\t" + "vpaddd %[tmp0], %[tmp1], %[tmp0]\n\t" "vpaddd %[acc], %[tmp0], %[acc]\n\t" - : [acc]"+v"(acc), [tmp0]"+&v"(tmp0) - : [tmp1]"v"(tmp1), [ones]"v"(_mm256_set1_epi16(1)) + : [acc]"+v"(acc), [tmp0]"+&v"(tmp0), [tmp1]"+&v"(tmp1) + : [ones]"v"(_mm256_set1_epi16(1)) ); # else __m256i product0 = _mm256_maddubs_epi16(a0, b0); __m256i product1 = _mm256_maddubs_epi16(a1, b1); - product0 = _mm256_adds_epi16(product0, product1); product0 = _mm256_madd_epi16(product0, _mm256_set1_epi16(1)); - acc = _mm256_add_epi32(acc, product0); + product1 = _mm256_madd_epi16(product1, _mm256_set1_epi16(1)); + acc = _mm256_add_epi32(acc, _mm256_add_epi32(product0, product1)); # endif # endif } @@ -326,23 +328,37 @@ namespace Stockfish::Simd { __m128i tmp0 = _mm_maddubs_epi16(a0, b0); __m128i tmp1 = _mm_maddubs_epi16(a1, b1); asm( - "paddsw %[tmp1], %[tmp0]\n\t" "pmaddwd %[ones], %[tmp0]\n\t" + "pmaddwd %[ones], %[tmp1]\n\t" + "paddd %[tmp1], %[tmp0]\n\t" "paddd %[tmp0], %[acc]\n\t" - : [acc]"+v"(acc), [tmp0]"+&v"(tmp0) - : [tmp1]"v"(tmp1), [ones]"v"(_mm_set1_epi16(1)) + : [acc]"+v"(acc), [tmp0]"+&v"(tmp0), [tmp1]"+&v"(tmp1) + : [ones]"v"(_mm_set1_epi16(1)) ); # else __m128i product0 = _mm_maddubs_epi16(a0, b0); __m128i product1 = _mm_maddubs_epi16(a1, b1); - product0 = _mm_adds_epi16(product0, product1); product0 = _mm_madd_epi16(product0, _mm_set1_epi16(1)); - acc = _mm_add_epi32(acc, product0); + product1 = _mm_madd_epi16(product1, _mm_set1_epi16(1)); + acc = _mm_add_epi32(acc, _mm_add_epi32(product0, product1)); # endif } #endif +#if defined (USE_NEON_DOTPROD) + + [[maybe_unused]] static void dotprod_m128_add_dpbusd_epi32x2( + int32x4_t& acc, + int8x16_t a0, int8x16_t b0, + int8x16_t a1, int8x16_t b1) { + + acc = vdotq_s32(acc, a0, b0); + acc = vdotq_s32(acc, a1, b1); + } + +#endif + #if defined (USE_NEON) [[maybe_unused]] static int neon_m128_reduce_add_epi32(int32x4_t s) { diff --git a/src/nnue/layers/sqr_clipped_relu.h b/src/nnue/layers/sqr_clipped_relu.h index b603a277..3fbb243c 100644 --- a/src/nnue/layers/sqr_clipped_relu.h +++ b/src/nnue/layers/sqr_clipped_relu.h @@ -1,6 +1,6 @@ /* Stockfish, a UCI chess playing engine derived from Glaurung 2.1 - Copyright (C) 2004-2022 The Stockfish developers (see AUTHORS file) + Copyright (C) 2004-2023 The Stockfish developers (see AUTHORS file) Stockfish is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -106,7 +106,7 @@ namespace Stockfish::Eval::NNUE::Layers { for (IndexType i = Start; i < InputDimensions; ++i) { output[i] = static_cast( - // realy should be /127 but we need to make it fast + // really should be /127 but we need to make it fast // needs to be accounted for in the trainer std::max(0ll, std::min(127ll, (((long long)input[i] * input[i]) >> (2 * WeightScaleBits)) / 128))); } diff --git a/src/nnue/nnue_accumulator.h b/src/nnue/nnue_accumulator.h index 600483b5..8eba4497 100644 --- a/src/nnue/nnue_accumulator.h +++ b/src/nnue/nnue_accumulator.h @@ -1,6 +1,6 @@ /* Stockfish, a UCI chess playing engine derived from Glaurung 2.1 - Copyright (C) 2004-2022 The Stockfish developers (see AUTHORS file) + Copyright (C) 2004-2023 The Stockfish developers (see AUTHORS file) Stockfish is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by diff --git a/src/nnue/nnue_architecture.h b/src/nnue/nnue_architecture.h index cac83730..413dbb3d 100644 --- a/src/nnue/nnue_architecture.h +++ b/src/nnue/nnue_architecture.h @@ -1,6 +1,6 @@ /* Stockfish, a UCI chess playing engine derived from Glaurung 2.1 - Copyright (C) 2004-2022 The Stockfish developers (see AUTHORS file) + Copyright (C) 2004-2023 The Stockfish developers (see AUTHORS file) Stockfish is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -27,6 +27,7 @@ #include "features/half_ka_v2_hm.h" +#include "layers/affine_transform_sparse_input.h" #include "layers/affine_transform.h" #include "layers/clipped_relu.h" #include "layers/sqr_clipped_relu.h" @@ -39,7 +40,7 @@ namespace Stockfish::Eval::NNUE { using FeatureSet = Features::HalfKAv2_hm; // Number of input feature dimensions after conversion -constexpr IndexType TransformedFeatureDimensions = 1024; +constexpr IndexType TransformedFeatureDimensions = 1536; constexpr IndexType PSQTBuckets = 8; constexpr IndexType LayerStacks = 8; @@ -48,7 +49,7 @@ struct Network static constexpr int FC_0_OUTPUTS = 15; static constexpr int FC_1_OUTPUTS = 32; - Layers::AffineTransform fc_0; + Layers::AffineTransformSparseInput fc_0; Layers::SqrClippedReLU ac_sqr_0; Layers::ClippedReLU ac_0; Layers::AffineTransform fc_1; @@ -72,22 +73,20 @@ struct Network // Read network parameters bool read_parameters(std::istream& stream) { - if (!fc_0.read_parameters(stream)) return false; - if (!ac_0.read_parameters(stream)) return false; - if (!fc_1.read_parameters(stream)) return false; - if (!ac_1.read_parameters(stream)) return false; - if (!fc_2.read_parameters(stream)) return false; - return true; + return fc_0.read_parameters(stream) + && ac_0.read_parameters(stream) + && fc_1.read_parameters(stream) + && ac_1.read_parameters(stream) + && fc_2.read_parameters(stream); } - // Read network parameters + // Write network parameters bool write_parameters(std::ostream& stream) const { - if (!fc_0.write_parameters(stream)) return false; - if (!ac_0.write_parameters(stream)) return false; - if (!fc_1.write_parameters(stream)) return false; - if (!ac_1.write_parameters(stream)) return false; - if (!fc_2.write_parameters(stream)) return false; - return true; + return fc_0.write_parameters(stream) + && ac_0.write_parameters(stream) + && fc_1.write_parameters(stream) + && ac_1.write_parameters(stream) + && fc_2.write_parameters(stream); } std::int32_t propagate(const TransformedFeatureType* transformedFeatures) diff --git a/src/nnue/nnue_common.h b/src/nnue/nnue_common.h index 17956189..d338527d 100644 --- a/src/nnue/nnue_common.h +++ b/src/nnue/nnue_common.h @@ -1,6 +1,6 @@ /* Stockfish, a UCI chess playing engine derived from Glaurung 2.1 - Copyright (C) 2004-2022 The Stockfish developers (see AUTHORS file) + Copyright (C) 2004-2023 The Stockfish developers (see AUTHORS file) Stockfish is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -57,6 +57,9 @@ namespace Stockfish::Eval::NNUE { // Size of cache line (in bytes) constexpr std::size_t CacheLineSize = 64; + constexpr const char Leb128MagicString[] = "COMPRESSED_LEB128"; + constexpr const std::size_t Leb128MagicStringSize = sizeof(Leb128MagicString) - 1; + // SIMD width (in bytes) #if defined(USE_AVX2) constexpr std::size_t SimdWidth = 32; @@ -159,6 +162,80 @@ namespace Stockfish::Eval::NNUE { write_little_endian(stream, values[i]); } + template + inline void read_leb_128(std::istream& stream, IntType* out, std::size_t count) { + static_assert(std::is_signed_v, "Not implemented for unsigned types"); + char leb128MagicString[Leb128MagicStringSize]; + stream.read(leb128MagicString, Leb128MagicStringSize); + assert(strncmp(Leb128MagicString, leb128MagicString, Leb128MagicStringSize) == 0); + const std::uint32_t BUF_SIZE = 4096; + std::uint8_t buf[BUF_SIZE]; + auto bytes_left = read_little_endian(stream); + std::uint32_t buf_pos = BUF_SIZE; + for (std::size_t i = 0; i < count; ++i) { + IntType result = 0; + size_t shift = 0; + do { + if (buf_pos == BUF_SIZE) { + stream.read(reinterpret_cast(buf), std::min(bytes_left, BUF_SIZE)); + buf_pos = 0; + } + std::uint8_t byte = buf[buf_pos++]; + --bytes_left; + result |= (byte & 0x7f) << shift; + shift += 7; + if ((byte & 0x80) == 0) { + out[i] = sizeof(IntType) * 8 <= shift || (byte & 0x40) == 0 ? result : result | ~((1 << shift) - 1); + break; + } + } while (shift < sizeof(IntType) * 8); + } + assert(bytes_left == 0); + } + + template + inline void write_leb_128(std::ostream& stream, const IntType* values, std::size_t count) { + static_assert(std::is_signed_v, "Not implemented for unsigned types"); + stream.write(Leb128MagicString, Leb128MagicStringSize); + std::uint32_t byte_count = 0; + for (std::size_t i = 0; i < count; ++i) { + IntType value = values[i]; + std::uint8_t byte; + do { + byte = value & 0x7f; + value >>= 7; + ++byte_count; + } while ((byte & 0x40) == 0 ? value != 0 : value != -1); + } + write_little_endian(stream, byte_count); + const std::uint32_t BUF_SIZE = 4096; + std::uint8_t buf[BUF_SIZE]; + std::uint32_t buf_pos = 0; + auto flush = [&]() { + if (buf_pos > 0) { + stream.write(reinterpret_cast(buf), buf_pos); + buf_pos = 0; + } + }; + auto write = [&](std::uint8_t byte) { + buf[buf_pos++] = byte; + if (buf_pos == BUF_SIZE) flush(); + }; + for (std::size_t i = 0; i < count; ++i) { + IntType value = values[i]; + while (true) { + std::uint8_t byte = value & 0x7f; + value >>= 7; + if ((byte & 0x40) == 0 ? value == 0 : value == -1) { + write(byte); + break; + } + write(byte | 0x80); + } + } + flush(); + } + } // namespace Stockfish::Eval::NNUE #endif // #ifndef NNUE_COMMON_H_INCLUDED diff --git a/src/nnue/nnue_feature_transformer.h b/src/nnue/nnue_feature_transformer.h index b6dd54d3..7571f398 100644 --- a/src/nnue/nnue_feature_transformer.h +++ b/src/nnue/nnue_feature_transformer.h @@ -1,6 +1,6 @@ /* Stockfish, a UCI chess playing engine derived from Glaurung 2.1 - Copyright (C) 2004-2022 The Stockfish developers (see AUTHORS file) + Copyright (C) 2004-2023 The Stockfish developers (see AUTHORS file) Stockfish is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -25,6 +25,7 @@ #include "nnue_architecture.h" #include // std::memset() +#include // std::pair namespace Stockfish::Eval::NNUE { @@ -41,8 +42,8 @@ namespace Stockfish::Eval::NNUE { "Per feature PSQT values cannot be processed at granularity lower than 8 at a time."); #ifdef USE_AVX512 - typedef __m512i vec_t; - typedef __m256i psqt_vec_t; + using vec_t = __m512i; + using psqt_vec_t = __m256i; #define vec_load(a) _mm512_load_si512(a) #define vec_store(a,b) _mm512_store_si512(a,b) #define vec_add_16(a,b) _mm512_add_epi16(a,b) @@ -65,8 +66,8 @@ namespace Stockfish::Eval::NNUE { #define MaxChunkSize 64 #elif USE_AVX2 - typedef __m256i vec_t; - typedef __m256i psqt_vec_t; + using vec_t = __m256i; + using psqt_vec_t = __m256i; #define vec_load(a) _mm256_load_si256(a) #define vec_store(a,b) _mm256_store_si256(a,b) #define vec_add_16(a,b) _mm256_add_epi16(a,b) @@ -89,8 +90,8 @@ namespace Stockfish::Eval::NNUE { #define MaxChunkSize 32 #elif USE_SSE2 - typedef __m128i vec_t; - typedef __m128i psqt_vec_t; + using vec_t = __m128i; + using psqt_vec_t = __m128i; #define vec_load(a) (*(a)) #define vec_store(a,b) *(a)=(b) #define vec_add_16(a,b) _mm_add_epi16(a,b) @@ -110,8 +111,8 @@ namespace Stockfish::Eval::NNUE { #define MaxChunkSize 16 #elif USE_MMX - typedef __m64 vec_t; - typedef __m64 psqt_vec_t; + using vec_t = __m64; + using psqt_vec_t = __m64; #define vec_load(a) (*(a)) #define vec_store(a,b) *(a)=(b) #define vec_add_16(a,b) _mm_add_pi16(a,b) @@ -138,8 +139,8 @@ namespace Stockfish::Eval::NNUE { #define MaxChunkSize 8 #elif USE_NEON - typedef int16x8_t vec_t; - typedef int32x4_t psqt_vec_t; + using vec_t = int16x8_t; + using psqt_vec_t = int32x4_t; #define vec_load(a) (*(a)) #define vec_store(a,b) *(a)=(b) #define vec_add_16(a,b) vaddq_s16(a,b) @@ -252,9 +253,9 @@ namespace Stockfish::Eval::NNUE { // Read network parameters bool read_parameters(std::istream& stream) { - read_little_endian(stream, biases , HalfDimensions ); - read_little_endian(stream, weights , HalfDimensions * InputDimensions); - read_little_endian(stream, psqtWeights, PSQTBuckets * InputDimensions); + read_leb_128(stream, biases , HalfDimensions ); + read_leb_128(stream, weights , HalfDimensions * InputDimensions); + read_leb_128(stream, psqtWeights, PSQTBuckets * InputDimensions); return !stream.fail(); } @@ -262,9 +263,9 @@ namespace Stockfish::Eval::NNUE { // Write network parameters bool write_parameters(std::ostream& stream) const { - write_little_endian(stream, biases , HalfDimensions ); - write_little_endian(stream, weights , HalfDimensions * InputDimensions); - write_little_endian(stream, psqtWeights, PSQTBuckets * InputDimensions); + write_leb_128(stream, biases , HalfDimensions ); + write_leb_128(stream, weights , HalfDimensions * InputDimensions); + write_leb_128(stream, psqtWeights, PSQTBuckets * InputDimensions); return !stream.fail(); } @@ -332,27 +333,16 @@ namespace Stockfish::Eval::NNUE { #endif return psqt; + } // end of function transform() - } // end of function transform() - - + void hint_common_access(const Position& pos) const { + hint_common_access_for_perspective(pos); + hint_common_access_for_perspective(pos); + } private: template - void update_accumulator(const Position& pos) const { - - // The size must be enough to contain the largest possible update. - // That might depend on the feature set and generally relies on the - // feature set's update cost calculation to be correct and never - // allow updates with more added/removed features than MaxActiveDimensions. - - #ifdef VECTOR - // Gcc-10.2 unnecessarily spills AVX2 registers if this array - // is defined in the VECTOR code below, once in each branch - vec_t acc[NumRegs]; - psqt_vec_t psqt[NumPsqtRegs]; - #endif - + [[nodiscard]] std::pair try_find_computed_accumulator(const Position& pos) const { // Look for a usable accumulator of an earlier position. We keep track // of the estimated gain in terms of features to be added/subtracted. StateInfo *st = pos.state(), *next = nullptr; @@ -367,218 +357,313 @@ namespace Stockfish::Eval::NNUE { next = st; st = st->previous; } + return { st, next }; + } - if (st->accumulator.computed[Perspective]) - { - if (next == nullptr) - return; + // NOTE: The parameter states_to_update is an array of position states, ending with nullptr. + // All states must be sequential, that is states_to_update[i] must either be reachable + // by repeatedly applying ->previous from states_to_update[i+1] or states_to_update[i] == nullptr. + // computed_st must be reachable by repeatedly applying ->previous on states_to_update[0], if not nullptr. + template + void update_accumulator_incremental(const Position& pos, StateInfo* computed_st, StateInfo* states_to_update[N]) const { + static_assert(N > 0); + assert(states_to_update[N-1] == nullptr); - // Update incrementally in two steps. First, we update the "next" - // accumulator. Then, we update the current accumulator (pos.state()). - - // Gather all features to be updated. - const Square ksq = pos.square(Perspective); - FeatureSet::IndexList removed[2], added[2]; - FeatureSet::append_changed_indices( - ksq, next->dirtyPiece, removed[0], added[0]); - for (StateInfo *st2 = pos.state(); st2 != next; st2 = st2->previous) - FeatureSet::append_changed_indices( - ksq, st2->dirtyPiece, removed[1], added[1]); - - // Mark the accumulators as computed. - next->accumulator.computed[Perspective] = true; - pos.state()->accumulator.computed[Perspective] = true; - - // Now update the accumulators listed in states_to_update[], where the last element is a sentinel. - StateInfo *states_to_update[3] = - { next, next == pos.state() ? nullptr : pos.state(), nullptr }; #ifdef VECTOR - for (IndexType j = 0; j < HalfDimensions / TileHeight; ++j) + // Gcc-10.2 unnecessarily spills AVX2 registers if this array + // is defined in the VECTOR code below, once in each branch + vec_t acc[NumRegs]; + psqt_vec_t psqt[NumPsqtRegs]; + #endif + + if (states_to_update[0] == nullptr) + return; + + // Update incrementally going back through states_to_update. + + // Gather all features to be updated. + const Square ksq = pos.square(Perspective); + + // The size must be enough to contain the largest possible update. + // That might depend on the feature set and generally relies on the + // feature set's update cost calculation to be correct and never + // allow updates with more added/removed features than MaxActiveDimensions. + FeatureSet::IndexList removed[N-1], added[N-1]; + + { + int i = N-2; // last potential state to update. Skip last element because it must be nullptr. + while (states_to_update[i] == nullptr) + --i; + + StateInfo *st2 = states_to_update[i]; + + for (; i >= 0; --i) { - // Load accumulator - auto accTile = reinterpret_cast( - &st->accumulator.accumulation[Perspective][j * TileHeight]); - for (IndexType k = 0; k < NumRegs; ++k) - acc[k] = vec_load(&accTile[k]); + states_to_update[i]->accumulator.computed[Perspective] = true; - for (IndexType i = 0; states_to_update[i]; ++i) - { - // Difference calculation for the deactivated features - for (const auto index : removed[i]) - { - const IndexType offset = HalfDimensions * index + j * TileHeight; - auto column = reinterpret_cast(&weights[offset]); - for (IndexType k = 0; k < NumRegs; ++k) - acc[k] = vec_sub_16(acc[k], column[k]); - } + StateInfo* end_state = i == 0 ? computed_st : states_to_update[i - 1]; - // Difference calculation for the activated features - for (const auto index : added[i]) - { - const IndexType offset = HalfDimensions * index + j * TileHeight; - auto column = reinterpret_cast(&weights[offset]); - for (IndexType k = 0; k < NumRegs; ++k) - acc[k] = vec_add_16(acc[k], column[k]); - } - - // Store accumulator - accTile = reinterpret_cast( - &states_to_update[i]->accumulator.accumulation[Perspective][j * TileHeight]); - for (IndexType k = 0; k < NumRegs; ++k) - vec_store(&accTile[k], acc[k]); - } + for (; st2 != end_state; st2 = st2->previous) + FeatureSet::append_changed_indices( + ksq, st2->dirtyPiece, removed[i], added[i]); } + } - for (IndexType j = 0; j < PSQTBuckets / PsqtTileHeight; ++j) - { - // Load accumulator - auto accTilePsqt = reinterpret_cast( - &st->accumulator.psqtAccumulation[Perspective][j * PsqtTileHeight]); - for (std::size_t k = 0; k < NumPsqtRegs; ++k) - psqt[k] = vec_load_psqt(&accTilePsqt[k]); + StateInfo* st = computed_st; - for (IndexType i = 0; states_to_update[i]; ++i) - { - // Difference calculation for the deactivated features - for (const auto index : removed[i]) - { - const IndexType offset = PSQTBuckets * index + j * PsqtTileHeight; - auto columnPsqt = reinterpret_cast(&psqtWeights[offset]); - for (std::size_t k = 0; k < NumPsqtRegs; ++k) - psqt[k] = vec_sub_psqt_32(psqt[k], columnPsqt[k]); - } + // Now update the accumulators listed in states_to_update[], where the last element is a sentinel. +#ifdef VECTOR + for (IndexType j = 0; j < HalfDimensions / TileHeight; ++j) + { + // Load accumulator + auto accTile = reinterpret_cast( + &st->accumulator.accumulation[Perspective][j * TileHeight]); + for (IndexType k = 0; k < NumRegs; ++k) + acc[k] = vec_load(&accTile[k]); - // Difference calculation for the activated features - for (const auto index : added[i]) - { - const IndexType offset = PSQTBuckets * index + j * PsqtTileHeight; - auto columnPsqt = reinterpret_cast(&psqtWeights[offset]); - for (std::size_t k = 0; k < NumPsqtRegs; ++k) - psqt[k] = vec_add_psqt_32(psqt[k], columnPsqt[k]); - } - - // Store accumulator - accTilePsqt = reinterpret_cast( - &states_to_update[i]->accumulator.psqtAccumulation[Perspective][j * PsqtTileHeight]); - for (std::size_t k = 0; k < NumPsqtRegs; ++k) - vec_store_psqt(&accTilePsqt[k], psqt[k]); - } - } - - #else for (IndexType i = 0; states_to_update[i]; ++i) { - std::memcpy(states_to_update[i]->accumulator.accumulation[Perspective], - st->accumulator.accumulation[Perspective], - HalfDimensions * sizeof(BiasType)); - - for (std::size_t k = 0; k < PSQTBuckets; ++k) - states_to_update[i]->accumulator.psqtAccumulation[Perspective][k] = st->accumulator.psqtAccumulation[Perspective][k]; - - st = states_to_update[i]; - // Difference calculation for the deactivated features for (const auto index : removed[i]) { - const IndexType offset = HalfDimensions * index; - - for (IndexType j = 0; j < HalfDimensions; ++j) - st->accumulator.accumulation[Perspective][j] -= weights[offset + j]; - - for (std::size_t k = 0; k < PSQTBuckets; ++k) - st->accumulator.psqtAccumulation[Perspective][k] -= psqtWeights[index * PSQTBuckets + k]; + const IndexType offset = HalfDimensions * index + j * TileHeight; + auto column = reinterpret_cast(&weights[offset]); + for (IndexType k = 0; k < NumRegs; ++k) + acc[k] = vec_sub_16(acc[k], column[k]); } // Difference calculation for the activated features for (const auto index : added[i]) - { - const IndexType offset = HalfDimensions * index; - - for (IndexType j = 0; j < HalfDimensions; ++j) - st->accumulator.accumulation[Perspective][j] += weights[offset + j]; - - for (std::size_t k = 0; k < PSQTBuckets; ++k) - st->accumulator.psqtAccumulation[Perspective][k] += psqtWeights[index * PSQTBuckets + k]; - } - } - #endif - } - else - { - // Refresh the accumulator - auto& accumulator = pos.state()->accumulator; - accumulator.computed[Perspective] = true; - FeatureSet::IndexList active; - FeatureSet::append_active_indices(pos, active); - - #ifdef VECTOR - for (IndexType j = 0; j < HalfDimensions / TileHeight; ++j) - { - auto biasesTile = reinterpret_cast( - &biases[j * TileHeight]); - for (IndexType k = 0; k < NumRegs; ++k) - acc[k] = biasesTile[k]; - - for (const auto index : active) { const IndexType offset = HalfDimensions * index + j * TileHeight; auto column = reinterpret_cast(&weights[offset]); - - for (unsigned k = 0; k < NumRegs; ++k) + for (IndexType k = 0; k < NumRegs; ++k) acc[k] = vec_add_16(acc[k], column[k]); } - auto accTile = reinterpret_cast( - &accumulator.accumulation[Perspective][j * TileHeight]); - for (unsigned k = 0; k < NumRegs; k++) + // Store accumulator + accTile = reinterpret_cast( + &states_to_update[i]->accumulator.accumulation[Perspective][j * TileHeight]); + for (IndexType k = 0; k < NumRegs; ++k) vec_store(&accTile[k], acc[k]); } + } - for (IndexType j = 0; j < PSQTBuckets / PsqtTileHeight; ++j) + for (IndexType j = 0; j < PSQTBuckets / PsqtTileHeight; ++j) + { + // Load accumulator + auto accTilePsqt = reinterpret_cast( + &st->accumulator.psqtAccumulation[Perspective][j * PsqtTileHeight]); + for (std::size_t k = 0; k < NumPsqtRegs; ++k) + psqt[k] = vec_load_psqt(&accTilePsqt[k]); + + for (IndexType i = 0; states_to_update[i]; ++i) { - for (std::size_t k = 0; k < NumPsqtRegs; ++k) - psqt[k] = vec_zero_psqt(); - - for (const auto index : active) + // Difference calculation for the deactivated features + for (const auto index : removed[i]) { const IndexType offset = PSQTBuckets * index + j * PsqtTileHeight; auto columnPsqt = reinterpret_cast(&psqtWeights[offset]); + for (std::size_t k = 0; k < NumPsqtRegs; ++k) + psqt[k] = vec_sub_psqt_32(psqt[k], columnPsqt[k]); + } + // Difference calculation for the activated features + for (const auto index : added[i]) + { + const IndexType offset = PSQTBuckets * index + j * PsqtTileHeight; + auto columnPsqt = reinterpret_cast(&psqtWeights[offset]); for (std::size_t k = 0; k < NumPsqtRegs; ++k) psqt[k] = vec_add_psqt_32(psqt[k], columnPsqt[k]); } - auto accTilePsqt = reinterpret_cast( - &accumulator.psqtAccumulation[Perspective][j * PsqtTileHeight]); + // Store accumulator + accTilePsqt = reinterpret_cast( + &states_to_update[i]->accumulator.psqtAccumulation[Perspective][j * PsqtTileHeight]); for (std::size_t k = 0; k < NumPsqtRegs; ++k) vec_store_psqt(&accTilePsqt[k], psqt[k]); } + } - #else - std::memcpy(accumulator.accumulation[Perspective], biases, +#else + for (IndexType i = 0; states_to_update[i]; ++i) + { + std::memcpy(states_to_update[i]->accumulator.accumulation[Perspective], + st->accumulator.accumulation[Perspective], HalfDimensions * sizeof(BiasType)); for (std::size_t k = 0; k < PSQTBuckets; ++k) - accumulator.psqtAccumulation[Perspective][k] = 0; + states_to_update[i]->accumulator.psqtAccumulation[Perspective][k] = st->accumulator.psqtAccumulation[Perspective][k]; - for (const auto index : active) + st = states_to_update[i]; + + // Difference calculation for the deactivated features + for (const auto index : removed[i]) { const IndexType offset = HalfDimensions * index; for (IndexType j = 0; j < HalfDimensions; ++j) - accumulator.accumulation[Perspective][j] += weights[offset + j]; + st->accumulator.accumulation[Perspective][j] -= weights[offset + j]; for (std::size_t k = 0; k < PSQTBuckets; ++k) - accumulator.psqtAccumulation[Perspective][k] += psqtWeights[index * PSQTBuckets + k]; + st->accumulator.psqtAccumulation[Perspective][k] -= psqtWeights[index * PSQTBuckets + k]; + } + + // Difference calculation for the activated features + for (const auto index : added[i]) + { + const IndexType offset = HalfDimensions * index; + + for (IndexType j = 0; j < HalfDimensions; ++j) + st->accumulator.accumulation[Perspective][j] += weights[offset + j]; + + for (std::size_t k = 0; k < PSQTBuckets; ++k) + st->accumulator.psqtAccumulation[Perspective][k] += psqtWeights[index * PSQTBuckets + k]; } - #endif } +#endif #if defined(USE_MMX) _mm_empty(); #endif } + template + void update_accumulator_refresh(const Position& pos) const { + #ifdef VECTOR + // Gcc-10.2 unnecessarily spills AVX2 registers if this array + // is defined in the VECTOR code below, once in each branch + vec_t acc[NumRegs]; + psqt_vec_t psqt[NumPsqtRegs]; + #endif + + // Refresh the accumulator + // Could be extracted to a separate function because it's done in 2 places, + // but it's unclear if compilers would correctly handle register allocation. + auto& accumulator = pos.state()->accumulator; + accumulator.computed[Perspective] = true; + FeatureSet::IndexList active; + FeatureSet::append_active_indices(pos, active); + +#ifdef VECTOR + for (IndexType j = 0; j < HalfDimensions / TileHeight; ++j) + { + auto biasesTile = reinterpret_cast( + &biases[j * TileHeight]); + for (IndexType k = 0; k < NumRegs; ++k) + acc[k] = biasesTile[k]; + + for (const auto index : active) + { + const IndexType offset = HalfDimensions * index + j * TileHeight; + auto column = reinterpret_cast(&weights[offset]); + + for (unsigned k = 0; k < NumRegs; ++k) + acc[k] = vec_add_16(acc[k], column[k]); + } + + auto accTile = reinterpret_cast( + &accumulator.accumulation[Perspective][j * TileHeight]); + for (unsigned k = 0; k < NumRegs; k++) + vec_store(&accTile[k], acc[k]); + } + + for (IndexType j = 0; j < PSQTBuckets / PsqtTileHeight; ++j) + { + for (std::size_t k = 0; k < NumPsqtRegs; ++k) + psqt[k] = vec_zero_psqt(); + + for (const auto index : active) + { + const IndexType offset = PSQTBuckets * index + j * PsqtTileHeight; + auto columnPsqt = reinterpret_cast(&psqtWeights[offset]); + + for (std::size_t k = 0; k < NumPsqtRegs; ++k) + psqt[k] = vec_add_psqt_32(psqt[k], columnPsqt[k]); + } + + auto accTilePsqt = reinterpret_cast( + &accumulator.psqtAccumulation[Perspective][j * PsqtTileHeight]); + for (std::size_t k = 0; k < NumPsqtRegs; ++k) + vec_store_psqt(&accTilePsqt[k], psqt[k]); + } + +#else + std::memcpy(accumulator.accumulation[Perspective], biases, + HalfDimensions * sizeof(BiasType)); + + for (std::size_t k = 0; k < PSQTBuckets; ++k) + accumulator.psqtAccumulation[Perspective][k] = 0; + + for (const auto index : active) + { + const IndexType offset = HalfDimensions * index; + + for (IndexType j = 0; j < HalfDimensions; ++j) + accumulator.accumulation[Perspective][j] += weights[offset + j]; + + for (std::size_t k = 0; k < PSQTBuckets; ++k) + accumulator.psqtAccumulation[Perspective][k] += psqtWeights[index * PSQTBuckets + k]; + } +#endif + + #if defined(USE_MMX) + _mm_empty(); + #endif + } + + template + void hint_common_access_for_perspective(const Position& pos) const { + + // Works like update_accumulator, but performs less work. + // Updates ONLY the accumulator for pos. + + // Look for a usable accumulator of an earlier position. We keep track + // of the estimated gain in terms of features to be added/subtracted. + // Fast early exit. + if (pos.state()->accumulator.computed[Perspective]) + return; + + auto [oldest_st, _] = try_find_computed_accumulator(pos); + + if (oldest_st->accumulator.computed[Perspective]) + { + // Only update current position accumulator to minimize work. + StateInfo* states_to_update[2] = { pos.state(), nullptr }; + update_accumulator_incremental(pos, oldest_st, states_to_update); + } + else + { + update_accumulator_refresh(pos); + } + } + + template + void update_accumulator(const Position& pos) const { + + auto [oldest_st, next] = try_find_computed_accumulator(pos); + + if (oldest_st->accumulator.computed[Perspective]) + { + if (next == nullptr) + return; + + // Now update the accumulators listed in states_to_update[], where the last element is a sentinel. + // Currently we update 2 accumulators. + // 1. for the current position + // 2. the next accumulator after the computed one + // The heuristic may change in the future. + StateInfo *states_to_update[3] = + { next, next == pos.state() ? nullptr : pos.state(), nullptr }; + + update_accumulator_incremental(pos, oldest_st, states_to_update); + } + else + { + update_accumulator_refresh(pos); + } + } + alignas(CacheLineSize) BiasType biases[HalfDimensions]; alignas(CacheLineSize) WeightType weights[HalfDimensions * InputDimensions]; alignas(CacheLineSize) PSQTWeightType psqtWeights[InputDimensions * PSQTBuckets]; diff --git a/src/pawns.cpp b/src/pawns.cpp index fdcfa022..0ccafd9e 100644 --- a/src/pawns.cpp +++ b/src/pawns.cpp @@ -1,6 +1,6 @@ /* Stockfish, a UCI chess playing engine derived from Glaurung 2.1 - Copyright (C) 2004-2022 The Stockfish developers (see AUTHORS file) + Copyright (C) 2004-2023 The Stockfish developers (see AUTHORS file) Stockfish is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by diff --git a/src/pawns.h b/src/pawns.h index af0370fc..d20e7c2e 100644 --- a/src/pawns.h +++ b/src/pawns.h @@ -1,6 +1,6 @@ /* Stockfish, a UCI chess playing engine derived from Glaurung 2.1 - Copyright (C) 2004-2022 The Stockfish developers (see AUTHORS file) + Copyright (C) 2004-2023 The Stockfish developers (see AUTHORS file) Stockfish is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -61,7 +61,7 @@ struct Entry { int blockedCount; }; -typedef HashTable Table; +using Table = HashTable; Entry* probe(const Position& pos); diff --git a/src/position.cpp b/src/position.cpp index 5befcaf2..2a9d798f 100644 --- a/src/position.cpp +++ b/src/position.cpp @@ -1,6 +1,6 @@ /* Stockfish, a UCI chess playing engine derived from Glaurung 2.1 - Copyright (C) 2004-2022 The Stockfish developers (see AUTHORS file) + Copyright (C) 2004-2023 The Stockfish developers (see AUTHORS file) Stockfish is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -22,6 +22,7 @@ #include // For std::memset, std::memcmp #include #include +#include #include "bitboard.h" #include "misc.h" @@ -46,7 +47,7 @@ namespace Zobrist { namespace { -const string PieceToChar(" PNBRQK pnbrqk"); +constexpr std::string_view PieceToChar(" PNBRQK pnbrqk"); constexpr Piece Pieces[] = { W_PAWN, W_KNIGHT, W_BISHOP, W_ROOK, W_QUEEN, W_KING, B_PAWN, B_KNIGHT, B_BISHOP, B_ROOK, B_QUEEN, B_KING }; @@ -96,7 +97,7 @@ std::ostream& operator<<(std::ostream& os, const Position& pos) { // Marcel van Kervinck's cuckoo algorithm for fast detection of "upcoming repetition" // situations. Description of the algorithm in the following paper: -// https://marcelk.net/2013-04-06/paper/upcoming-rep-v2.pdf +// http://web.archive.org/web/20201107002606/https://marcelk.net/2013-04-06/paper/upcoming-rep-v2.pdf // First and second hash functions for indexing the cuckoo tables inline int H1(Key h) { return h & 0x1fff; } @@ -281,7 +282,7 @@ Position& Position::set(const string& fenStr, bool isChess960, StateInfo* si, Th chess960 = isChess960; thisThread = th; - set_state(st); + set_state(); assert(pos_is_ok()); @@ -312,60 +313,59 @@ void Position::set_castling_right(Color c, Square rfrom) { /// Position::set_check_info() sets king attacks to detect if a move gives check -void Position::set_check_info(StateInfo* si) const { +void Position::set_check_info() const { - si->blockersForKing[WHITE] = slider_blockers(pieces(BLACK), square(WHITE), si->pinners[BLACK]); - si->blockersForKing[BLACK] = slider_blockers(pieces(WHITE), square(BLACK), si->pinners[WHITE]); + st->blockersForKing[WHITE] = slider_blockers(pieces(BLACK), square(WHITE), st->pinners[BLACK]); + st->blockersForKing[BLACK] = slider_blockers(pieces(WHITE), square(BLACK), st->pinners[WHITE]); Square ksq = square(~sideToMove); - si->checkSquares[PAWN] = pawn_attacks_bb(~sideToMove, ksq); - si->checkSquares[KNIGHT] = attacks_bb(ksq); - si->checkSquares[BISHOP] = attacks_bb(ksq, pieces()); - si->checkSquares[ROOK] = attacks_bb(ksq, pieces()); - si->checkSquares[QUEEN] = si->checkSquares[BISHOP] | si->checkSquares[ROOK]; - si->checkSquares[KING] = 0; + st->checkSquares[PAWN] = pawn_attacks_bb(~sideToMove, ksq); + st->checkSquares[KNIGHT] = attacks_bb(ksq); + st->checkSquares[BISHOP] = attacks_bb(ksq, pieces()); + st->checkSquares[ROOK] = attacks_bb(ksq, pieces()); + st->checkSquares[QUEEN] = st->checkSquares[BISHOP] | st->checkSquares[ROOK]; + st->checkSquares[KING] = 0; } /// Position::set_state() computes the hash keys of the position, and other /// data that once computed is updated incrementally as moves are made. -/// The function is only used when a new position is set up, and to verify -/// the correctness of the StateInfo data when running in debug mode. +/// The function is only used when a new position is set up -void Position::set_state(StateInfo* si) const { +void Position::set_state() const { - si->key = si->materialKey = 0; - si->pawnKey = Zobrist::noPawns; - si->nonPawnMaterial[WHITE] = si->nonPawnMaterial[BLACK] = VALUE_ZERO; - si->checkersBB = attackers_to(square(sideToMove)) & pieces(~sideToMove); + st->key = st->materialKey = 0; + st->pawnKey = Zobrist::noPawns; + st->nonPawnMaterial[WHITE] = st->nonPawnMaterial[BLACK] = VALUE_ZERO; + st->checkersBB = attackers_to(square(sideToMove)) & pieces(~sideToMove); - set_check_info(si); + set_check_info(); for (Bitboard b = pieces(); b; ) { Square s = pop_lsb(b); Piece pc = piece_on(s); - si->key ^= Zobrist::psq[pc][s]; + st->key ^= Zobrist::psq[pc][s]; if (type_of(pc) == PAWN) - si->pawnKey ^= Zobrist::psq[pc][s]; + st->pawnKey ^= Zobrist::psq[pc][s]; else if (type_of(pc) != KING) - si->nonPawnMaterial[color_of(pc)] += PieceValue[MG][pc]; + st->nonPawnMaterial[color_of(pc)] += PieceValue[MG][pc]; } - if (si->epSquare != SQ_NONE) - si->key ^= Zobrist::enpassant[file_of(si->epSquare)]; + if (st->epSquare != SQ_NONE) + st->key ^= Zobrist::enpassant[file_of(st->epSquare)]; if (sideToMove == BLACK) - si->key ^= Zobrist::side; + st->key ^= Zobrist::side; - si->key ^= Zobrist::castling[si->castlingRights]; + st->key ^= Zobrist::castling[st->castlingRights]; for (Piece pc : Pieces) for (int cnt = 0; cnt < pieceCount[pc]; ++cnt) - si->materialKey ^= Zobrist::psq[pc][cnt]; + st->materialKey ^= Zobrist::psq[pc][cnt]; } @@ -568,8 +568,7 @@ bool Position::pseudo_legal(const Move m) const { : MoveList(*this).contains(m); // Is not a promotion, so promotion piece must be empty - if (promotion_type(m) - KNIGHT != NO_PIECE_TYPE) - return false; + assert(promotion_type(m) - KNIGHT == NO_PIECE_TYPE); // If the 'from' square is not occupied by a piece belonging to the side to // move, the move is obviously not legal. @@ -765,9 +764,6 @@ void Position::do_move(Move m, StateInfo& newSt, bool givesCheck) { // Update board and piece lists remove_piece(capsq); - if (type_of(m) == EN_PASSANT) - board[capsq] = NO_PIECE; - // Update material hash key and prefetch access to materialTable k ^= Zobrist::psq[captured][capsq]; st->materialKey ^= Zobrist::psq[captured][pieceCount[captured]]; @@ -868,7 +864,7 @@ void Position::do_move(Move m, StateInfo& newSt, bool givesCheck) { sideToMove = ~sideToMove; // Update king attacks used for fast check detection - set_check_info(st); + set_check_info(); // Calculate the repetition info. It is the ply distance from the previous // occurrence of the same position, negative in the 3-fold case, or zero @@ -1020,7 +1016,7 @@ void Position::do_null_move(StateInfo& newSt) { sideToMove = ~sideToMove; - set_check_info(st); + set_check_info(); st->repetition = 0; @@ -1065,7 +1061,7 @@ Key Position::key_after(Move m) const { /// SEE value of move is greater or equal to the given threshold. We'll use an /// algorithm similar to alpha-beta pruning with a null window. -bool Position::see_ge(Move m, Value threshold) const { +bool Position::see_ge(Move m, Bitboard& occupied, Value threshold) const { assert(is_ok(m)); @@ -1084,7 +1080,7 @@ bool Position::see_ge(Move m, Value threshold) const { return true; assert(color_of(piece_on(from)) == sideToMove); - Bitboard occupied = pieces() ^ from ^ to; + occupied = pieces() ^ from ^ to; // xoring to is important for pinned piece logic Color stm = sideToMove; Bitboard attackers = attackers_to(to, occupied); Bitboard stmAttackers, bb; @@ -1115,45 +1111,44 @@ bool Position::see_ge(Move m, Value threshold) const { // the bitboard 'attackers' any X-ray attackers behind it. if ((bb = stmAttackers & pieces(PAWN))) { + occupied ^= least_significant_square_bb(bb); if ((swap = PawnValueMg - swap) < res) break; - occupied ^= least_significant_square_bb(bb); attackers |= attacks_bb(to, occupied) & pieces(BISHOP, QUEEN); } else if ((bb = stmAttackers & pieces(KNIGHT))) { + occupied ^= least_significant_square_bb(bb); if ((swap = KnightValueMg - swap) < res) break; - - occupied ^= least_significant_square_bb(bb); } else if ((bb = stmAttackers & pieces(BISHOP))) { + occupied ^= least_significant_square_bb(bb); if ((swap = BishopValueMg - swap) < res) break; - occupied ^= least_significant_square_bb(bb); attackers |= attacks_bb(to, occupied) & pieces(BISHOP, QUEEN); } else if ((bb = stmAttackers & pieces(ROOK))) { + occupied ^= least_significant_square_bb(bb); if ((swap = RookValueMg - swap) < res) break; - occupied ^= least_significant_square_bb(bb); attackers |= attacks_bb(to, occupied) & pieces(ROOK, QUEEN); } else if ((bb = stmAttackers & pieces(QUEEN))) { + occupied ^= least_significant_square_bb(bb); if ((swap = QueenValueMg - swap) < res) break; - occupied ^= least_significant_square_bb(bb); attackers |= (attacks_bb(to, occupied) & pieces(BISHOP, QUEEN)) | (attacks_bb(to, occupied) & pieces(ROOK , QUEEN)); } @@ -1167,6 +1162,11 @@ bool Position::see_ge(Move m, Value threshold) const { return bool(res); } +bool Position::see_ge(Move m, Value threshold) const { + Bitboard occupied; + return see_ge(m, occupied, threshold); +} + /// Position::is_draw() tests whether the position is drawn by 50-move rule /// or by repetition. It does not detect stalemates. @@ -1323,12 +1323,6 @@ bool Position::pos_is_ok() const { if (p1 != p2 && (pieces(p1) & pieces(p2))) assert(0 && "pos_is_ok: Bitboards"); - StateInfo si = *st; - ASSERT_ALIGNED(&si, Eval::NNUE::CacheLineSize); - - set_state(&si); - if (std::memcmp(&si, st, sizeof(StateInfo))) - assert(0 && "pos_is_ok: State"); for (Piece pc : Pieces) if ( pieceCount[pc] != popcount(pieces(color_of(pc), type_of(pc))) diff --git a/src/position.h b/src/position.h index 078ff5b7..2e6014db 100644 --- a/src/position.h +++ b/src/position.h @@ -1,6 +1,6 @@ /* Stockfish, a UCI chess playing engine derived from Glaurung 2.1 - Copyright (C) 2004-2022 The Stockfish developers (see AUTHORS file) + Copyright (C) 2004-2023 The Stockfish developers (see AUTHORS file) Stockfish is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -68,7 +68,7 @@ struct StateInfo { /// start position to the position just before the search starts). Needed by /// 'draw by repetition' detection. Use a std::deque because pointers to /// elements are not invalidated upon list resizing. -typedef std::unique_ptr> StateListPtr; +using StateListPtr = std::unique_ptr>; /// Position class stores information regarding the board representation as @@ -92,10 +92,9 @@ public: // Position representation Bitboard pieces(PieceType pt) const; - Bitboard pieces(PieceType pt1, PieceType pt2) const; + template Bitboard pieces(PieceType pt, PieceTypes... pts) const; Bitboard pieces(Color c) const; - Bitboard pieces(Color c, PieceType pt) const; - Bitboard pieces(Color c, PieceType pt1, PieceType pt2) const; + template Bitboard pieces(Color c, PieceTypes... pts) const; Piece piece_on(Square s) const; Square ep_square() const; bool empty(Square s) const; @@ -126,6 +125,7 @@ public: bool legal(Move m) const; bool pseudo_legal(const Move m) const; bool capture(Move m) const; + bool capture_stage(Move m) const; bool gives_check(Move m) const; Piece moved_piece(Move m) const; Piece captured_piece() const; @@ -144,6 +144,7 @@ public: // Static Exchange Evaluation bool see_ge(Move m, Value threshold = VALUE_ZERO) const; + bool see_ge(Move m, Bitboard& occupied, Value threshold = VALUE_ZERO) const; // Accessing hash keys Key key() const; @@ -178,8 +179,8 @@ public: private: // Initialization helpers (used while setting up a position) void set_castling_right(Color c, Square rfrom); - void set_state(StateInfo* si) const; - void set_check_info(StateInfo* si) const; + void set_state() const; + void set_check_info() const; // Other helpers void move_piece(Square from, Square to); @@ -204,7 +205,7 @@ private: bool chess960; }; -extern std::ostream& operator<<(std::ostream& os, const Position& pos); +std::ostream& operator<<(std::ostream& os, const Position& pos); inline Color Position::side_to_move() const { return sideToMove; @@ -227,20 +228,18 @@ inline Bitboard Position::pieces(PieceType pt = ALL_PIECES) const { return byTypeBB[pt]; } -inline Bitboard Position::pieces(PieceType pt1, PieceType pt2) const { - return pieces(pt1) | pieces(pt2); +template +inline Bitboard Position::pieces(PieceType pt, PieceTypes... pts) const { + return pieces(pt) | pieces(pts...); } inline Bitboard Position::pieces(Color c) const { return byColorBB[c]; } -inline Bitboard Position::pieces(Color c, PieceType pt) const { - return pieces(c) & pieces(pt); -} - -inline Bitboard Position::pieces(Color c, PieceType pt1, PieceType pt2) const { - return pieces(c) & (pieces(pt1) | pieces(pt2)); +template +inline Bitboard Position::pieces(Color c, PieceTypes... pts) const { + return pieces(c) & pieces(pts...); } template inline int Position::count(Color c) const { @@ -383,8 +382,16 @@ inline bool Position::is_chess960() const { inline bool Position::capture(Move m) const { assert(is_ok(m)); - // Castling is encoded as "king captures rook" - return (!empty(to_sq(m)) && type_of(m) != CASTLING) || type_of(m) == EN_PASSANT; + return (!empty(to_sq(m)) && type_of(m) != CASTLING) + || type_of(m) == EN_PASSANT; +} + +// returns true if a move is generated from the capture stage +// having also queen promotions covered, i.e. consistency with the capture stage move generation +// is needed to avoid the generation of duplicate moves. +inline bool Position::capture_stage(Move m) const { + assert(is_ok(m)); + return capture(m) || promotion_type(m) == QUEEN; } inline Piece Position::captured_piece() const { diff --git a/src/psqt.cpp b/src/psqt.cpp index ca5664c2..d3ebb20d 100644 --- a/src/psqt.cpp +++ b/src/psqt.cpp @@ -1,6 +1,6 @@ /* Stockfish, a UCI chess playing engine derived from Glaurung 2.1 - Copyright (C) 2004-2022 The Stockfish developers (see AUTHORS file) + Copyright (C) 2004-2023 The Stockfish developers (see AUTHORS file) Stockfish is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by diff --git a/src/psqt.h b/src/psqt.h index 4ee0e379..9630f446 100644 --- a/src/psqt.h +++ b/src/psqt.h @@ -1,6 +1,6 @@ /* Stockfish, a UCI chess playing engine derived from Glaurung 2.1 - Copyright (C) 2004-2022 The Stockfish developers (see AUTHORS file) + Copyright (C) 2004-2023 The Stockfish developers (see AUTHORS file) Stockfish is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -30,7 +30,7 @@ namespace Stockfish::PSQT extern Score psq[PIECE_NB][SQUARE_NB]; // Fill psqt array from a set of internally linked parameters -extern void init(); +void init(); } // namespace Stockfish::PSQT diff --git a/src/search.cpp b/src/search.cpp index 1365e471..d8633686 100644 --- a/src/search.cpp +++ b/src/search.cpp @@ -1,6 +1,6 @@ /* Stockfish, a UCI chess playing engine derived from Glaurung 2.1 - Copyright (C) 2004-2022 The Stockfish developers (see AUTHORS file) + Copyright (C) 2004-2023 The Stockfish developers (see AUTHORS file) Stockfish is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -35,6 +35,7 @@ #include "tt.h" #include "uci.h" #include "syzygy/tbprobe.h" +#include "nnue/evaluate_nnue.h" namespace Stockfish { @@ -64,7 +65,7 @@ namespace { // Futility margin Value futility_margin(Depth d, bool improving) { - return Value(165 * (d - improving)); + return Value(140 * (d - improving)); } // Reductions lookup table, initialized at startup @@ -72,7 +73,7 @@ namespace { Depth reduction(bool i, Depth d, int mn, Value delta, Value rootDelta) { int r = Reductions[d] * Reductions[mn]; - return (r + 1642 - int(delta) * 1024 / int(rootDelta)) / 1024 + (!i && r > 916); + return (r + 1372 - int(delta) * 1073 / int(rootDelta)) / 1024 + (!i && r > 936); } constexpr int futility_move_count(bool improving, Depth depth) { @@ -82,7 +83,7 @@ namespace { // History and stats update bonus, based on depth int stat_bonus(Depth d) { - return std::min((12 * d + 282) * d - 349 , 1594); + return std::min(336 * d - 547, 1561); } // Add a small random component to draw evaluations to avoid 3-fold blindness @@ -97,7 +98,10 @@ namespace { struct Skill { Skill(int skill_level, int uci_elo) { if (uci_elo) - level = std::clamp(std::pow((uci_elo - 1346.6) / 143.4, 1 / 0.806), 0.0, 20.0); + { + double e = double(uci_elo - 1320) / (3190 - 1320); + level = std::clamp((((37.2473 * e - 40.8525) * e + 22.2943) * e - 0.311438), 0.0, 19.0); + } else level = double(skill_level); } @@ -159,7 +163,7 @@ namespace { void Search::init() { for (int i = 1; i < MAX_MOVES; ++i) - Reductions[i] = int((20.26 + std::log(Threads.size()) / 2) * std::log(i)); + Reductions[i] = int((20.57 + std::log(Threads.size()) / 2) * std::log(i)); } @@ -243,9 +247,6 @@ void MainThread::search() { && rootMoves[0].pv[0] != MOVE_NONE) bestThread = Threads.get_best_thread(); - for (Thread* th : Threads) - th->previousDepth = bestThread->completedDepth; - // Prepare PVLine and ponder move std::string PVLine = UCI::pv(bestThread->rootPos, bestThread->completedDepth); bestPreviousScore = bestThread->rootMoves[0].score; @@ -306,16 +307,18 @@ void Thread::search() { int iterIdx = 0; std::memset(ss-7, 0, 10 * sizeof(Stack)); - for (int i = 7; i > 0; i--) + for (int i = 7; i > 0; --i) + { (ss-i)->continuationHistory = &this->continuationHistory[0][0][NO_PIECE][0]; // Use as a sentinel + (ss-i)->staticEval = VALUE_NONE; + } for (int i = 0; i <= MAX_PLY + 2; ++i) (ss+i)->ply = i; ss->pv = pv; - bestValue = delta = alpha = -VALUE_INFINITE; - beta = VALUE_INFINITE; + bestValue = -VALUE_INFINITE; if (mainThread) { @@ -337,10 +340,6 @@ void Thread::search() { multiPV = std::min(multiPV, rootMoves.size()); - complexityAverage.set(155, 1); - - optimism[us] = optimism[~us] = VALUE_ZERO; - int searchAgainCounter = 0; // Iterative deepening loop until requested to stop or the target depth is reached @@ -362,7 +361,7 @@ void Thread::search() { pvLast = 0; if (!Threads.increaseDepth) - searchAgainCounter++; + searchAgainCounter++; // MultiPV loop. We perform a full root search for each PV line for (pvIdx = 0; pvIdx < multiPV && !Threads.stop; ++pvIdx) @@ -379,18 +378,15 @@ void Thread::search() { selDepth = 0; // Reset aspiration window starting size - if (rootDepth >= 4) - { - Value prev = rootMoves[pvIdx].averageScore; - delta = Value(10) + int(prev) * prev / 15620; - alpha = std::max(prev - delta,-VALUE_INFINITE); - beta = std::min(prev + delta, VALUE_INFINITE); + Value prev = rootMoves[pvIdx].averageScore; + delta = Value(10) + int(prev) * prev / 15799; + alpha = std::max(prev - delta,-VALUE_INFINITE); + beta = std::min(prev + delta, VALUE_INFINITE); - // Adjust optimism based on root move's previousScore - int opt = 118 * prev / (std::abs(prev) + 169); - optimism[ us] = Value(opt); - optimism[~us] = -optimism[us]; - } + // Adjust optimism based on root move's previousScore + int opt = 109 * prev / (std::abs(prev) + 141); + optimism[ us] = Value(opt); + optimism[~us] = -optimism[us]; // Start with a small aspiration window and, in the case of a fail // high/low, re-search with a bigger window until we don't fail @@ -448,7 +444,7 @@ void Thread::search() { else break; - delta += delta / 4 + 2; + delta += delta / 3; assert(alpha >= -VALUE_INFINITE && beta <= VALUE_INFINITE); } @@ -467,9 +463,10 @@ void Thread::search() { if (!Threads.stop) completedDepth = rootDepth; - if (rootMoves[0].pv[0] != lastBestMove) { - lastBestMove = rootMoves[0].pv[0]; - lastBestMoveDepth = rootDepth; + if (rootMoves[0].pv[0] != lastBestMove) + { + lastBestMove = rootMoves[0].pv[0]; + lastBestMoveDepth = rootDepth; } // Have we found a "mate in x"? @@ -497,18 +494,16 @@ void Thread::search() { && !Threads.stop && !mainThread->stopOnPonderhit) { - double fallingEval = (71 + 12 * (mainThread->bestPreviousAverageScore - bestValue) - + 6 * (mainThread->iterValue[iterIdx] - bestValue)) / 656.7; + double fallingEval = (69 + 13 * (mainThread->bestPreviousAverageScore - bestValue) + + 6 * (mainThread->iterValue[iterIdx] - bestValue)) / 619.6; fallingEval = std::clamp(fallingEval, 0.5, 1.5); // If the bestMove is stable over several iterations, reduce time accordingly - timeReduction = lastBestMoveDepth + 9 < completedDepth ? 1.37 : 0.65; - double reduction = (1.4 + mainThread->previousTimeReduction) / (2.15 * timeReduction); - double bestMoveInstability = 1 + 1.7 * totBestMoveChanges / Threads.size(); - int complexity = mainThread->complexityAverage.value(); - double complexPosition = std::min(1.0 + (complexity - 261) / 1738.7, 1.5); + timeReduction = lastBestMoveDepth + 8 < completedDepth ? 1.57 : 0.65; + double reduction = (1.4 + mainThread->previousTimeReduction) / (2.08 * timeReduction); + double bestMoveInstability = 1 + 1.8 * totBestMoveChanges / Threads.size(); - double totalTime = Time.optimum() * fallingEval * reduction * bestMoveInstability * complexPosition; + double totalTime = Time.optimum() * fallingEval * reduction * bestMoveInstability; // Cap used time in case of a single legal move for a better viewer experience in tournaments // yielding correct scores and sufficiently fast moves. @@ -525,12 +520,11 @@ void Thread::search() { else Threads.stop = true; } - else if ( Threads.increaseDepth - && !mainThread->ponder - && Time.elapsed() > totalTime * 0.53) - Threads.increaseDepth = false; + else if ( !mainThread->ponder + && Time.elapsed() > totalTime * 0.50) + Threads.increaseDepth = false; else - Threads.increaseDepth = true; + Threads.increaseDepth = true; } mainThread->iterValue[iterIdx] = bestValue; @@ -558,7 +552,6 @@ namespace { constexpr bool PvNode = nodeType != NonPV; constexpr bool rootNode = nodeType == Root; - const Depth maxNextDepth = rootNode ? depth : depth + 1; // Check if we have an upcoming move which draws by repetition, or // if the opponent had an alternative move earlier to this position. @@ -593,7 +586,7 @@ namespace { bool givesCheck, improving, priorCapture, singularQuietLMR; bool capture, moveCountPruning, ttCapture; Piece movedPiece; - int moveCount, captureCount, quietCount, improvement, complexity; + int moveCount, captureCount, quietCount, improvement; // Step 1. Initialize node Thread* thisThread = pos.this_thread(); @@ -637,52 +630,45 @@ namespace { assert(0 <= ss->ply && ss->ply < MAX_PLY); - (ss+1)->ttPv = false; (ss+1)->excludedMove = bestMove = MOVE_NONE; (ss+2)->killers[0] = (ss+2)->killers[1] = MOVE_NONE; (ss+2)->cutoffCnt = 0; ss->doubleExtensions = (ss-1)->doubleExtensions; - Square prevSq = to_sq((ss-1)->currentMove); + Square prevSq = is_ok((ss-1)->currentMove) ? to_sq((ss-1)->currentMove) : SQ_NONE; + ss->statScore = 0; - // Initialize statScore to zero for the grandchildren of the current position. - // So statScore is shared between all grandchildren and only the first grandchild - // starts with statScore = 0. Later grandchildren start with the last calculated - // statScore of the previous grandchild. This influences the reduction rules in - // LMR which are based on the statScore of parent position. - if (!rootNode) - (ss+2)->statScore = 0; - - // Step 4. Transposition table lookup. We don't want the score of a partial - // search to overwrite a previous full search TT value, so we use a different - // position key in case of an excluded move. + // Step 4. Transposition table lookup. excludedMove = ss->excludedMove; - posKey = excludedMove == MOVE_NONE ? pos.key() : pos.key() ^ make_key(excludedMove); + posKey = pos.key(); tte = TT.probe(posKey, ss->ttHit); ttValue = ss->ttHit ? value_from_tt(tte->value(), ss->ply, pos.rule50_count()) : VALUE_NONE; ttMove = rootNode ? thisThread->rootMoves[thisThread->pvIdx].pv[0] : ss->ttHit ? tte->move() : MOVE_NONE; - ttCapture = ttMove && pos.capture(ttMove); + ttCapture = ttMove && pos.capture_stage(ttMove); + + // At this point, if excluded, skip straight to step 6, static eval. However, + // to save indentation, we list the condition in all code between here and there. if (!excludedMove) ss->ttPv = PvNode || (ss->ttHit && tte->is_pv()); // At non-PV nodes we check for an early TT cutoff if ( !PvNode - && ss->ttHit + && !excludedMove && tte->depth() > depth - (tte->bound() == BOUND_EXACT) - && ttValue != VALUE_NONE // Possible in case of TT access race + && ttValue != VALUE_NONE // Possible in case of TT access race or if !ttHit && (tte->bound() & (ttValue >= beta ? BOUND_LOWER : BOUND_UPPER))) { - // If ttMove is quiet, update move sorting heuristics on TT hit (~1 Elo) + // If ttMove is quiet, update move sorting heuristics on TT hit (~2 Elo) if (ttMove) { if (ttValue >= beta) { - // Bonus for a quiet ttMove that fails high (~3 Elo) + // Bonus for a quiet ttMove that fails high (~2 Elo) if (!ttCapture) update_quiet_stats(pos, ss, ttMove, stat_bonus(depth)); - // Extra penalty for early quiet moves of the previous ply (~0 Elo) - if ((ss-1)->moveCount <= 2 && !priorCapture) + // Extra penalty for early quiet moves of the previous ply (~0 Elo on STC, ~2 Elo on LTC) + if (prevSq != SQ_NONE && (ss-1)->moveCount <= 2 && !priorCapture) update_continuation_histories(ss-1, pos.piece_on(prevSq), prevSq, -stat_bonus(depth + 1)); } // Penalty for a quiet ttMove that fails low (~1 Elo) @@ -701,7 +687,7 @@ namespace { } // Step 5. Tablebases probe - if (!rootNode && TB::Cardinality) + if (!rootNode && !excludedMove && TB::Cardinality) { int piecesCount = pos.count(); @@ -762,40 +748,40 @@ namespace { ss->staticEval = eval = VALUE_NONE; improving = false; improvement = 0; - complexity = 0; goto moves_loop; } + else if (excludedMove) + { + // Providing the hint that this node's accumulator will be used often brings significant Elo gain (13 Elo) + Eval::NNUE::hint_common_parent_position(pos); + eval = ss->staticEval; + } else if (ss->ttHit) { // Never assume anything about values stored in TT ss->staticEval = eval = tte->eval(); if (eval == VALUE_NONE) - ss->staticEval = eval = evaluate(pos, &complexity); - else // Fall back to (semi)classical complexity for TT hits, the NNUE complexity is lost - complexity = abs(ss->staticEval - pos.psq_eg_stm()); + ss->staticEval = eval = evaluate(pos); + else if (PvNode) + Eval::NNUE::hint_common_parent_position(pos); - // ttValue can be used as a better position evaluation (~4 Elo) + // ttValue can be used as a better position evaluation (~7 Elo) if ( ttValue != VALUE_NONE && (tte->bound() & (ttValue > eval ? BOUND_LOWER : BOUND_UPPER))) eval = ttValue; } else { - ss->staticEval = eval = evaluate(pos, &complexity); - + ss->staticEval = eval = evaluate(pos); // Save static evaluation into transposition table - if (!excludedMove) - Cluster::save(thisThread, tte, - posKey, VALUE_NONE, ss->ttPv, BOUND_NONE, DEPTH_NONE, MOVE_NONE, - eval); + Cluster::save(thisThread, tte, + posKey, VALUE_NONE, ss->ttPv, BOUND_NONE, DEPTH_NONE, MOVE_NONE, eval); } - thisThread->complexityAverage.update(complexity); - - // Use static evaluation difference to improve quiet move ordering (~3 Elo) + // Use static evaluation difference to improve quiet move ordering (~4 Elo) if (is_ok((ss-1)->currentMove) && !(ss-1)->inCheck && !priorCapture) { - int bonus = std::clamp(-19 * int((ss-1)->staticEval + ss->staticEval), -1914, 1914); + int bonus = std::clamp(-18 * int((ss-1)->staticEval + ss->staticEval), -1817, 1817); thisThread->mainHistory[~us][from_to((ss-1)->currentMove)] << bonus; } @@ -805,43 +791,43 @@ namespace { // margin and the improving flag are used in various pruning heuristics. improvement = (ss-2)->staticEval != VALUE_NONE ? ss->staticEval - (ss-2)->staticEval : (ss-4)->staticEval != VALUE_NONE ? ss->staticEval - (ss-4)->staticEval - : 168; + : 173; improving = improvement > 0; - // Step 7. Razoring. + // Step 7. Razoring (~1 Elo). // If eval is really low check with qsearch if it can exceed alpha, if it can't, // return a fail low. - if (eval < alpha - 369 - 254 * depth * depth) + if (eval < alpha - 456 - 252 * depth * depth) { value = qsearch(pos, ss, alpha - 1, alpha); if (value < alpha) return value; } - // Step 8. Futility pruning: child node (~25 Elo). + // Step 8. Futility pruning: child node (~40 Elo). // The depth condition is important for mate finding. if ( !ss->ttPv - && depth < 8 - && eval - futility_margin(depth, improving) - (ss-1)->statScore / 303 >= beta + && depth < 9 + && eval - futility_margin(depth, improving) - (ss-1)->statScore / 306 >= beta && eval >= beta - && eval < 28031) // larger than VALUE_KNOWN_WIN, but smaller than TB wins + && eval < 24923) // larger than VALUE_KNOWN_WIN, but smaller than TB wins return eval; - // Step 9. Null move search with verification search (~22 Elo) + // Step 9. Null move search with verification search (~35 Elo) if ( !PvNode && (ss-1)->currentMove != MOVE_NULL - && (ss-1)->statScore < 17139 + && (ss-1)->statScore < 17329 && eval >= beta && eval >= ss->staticEval - && ss->staticEval >= beta - 20 * depth - improvement / 13 + 233 + complexity / 25 + && ss->staticEval >= beta - 21 * depth - improvement / 13 + 258 && !excludedMove && pos.non_pawn_material(us) - && (ss->ply >= thisThread->nmpMinPly || us != thisThread->nmpColor)) + && (ss->ply >= thisThread->nmpMinPly)) { assert(eval - beta >= 0); - // Null move dynamic reduction based on depth, eval and complexity of position - Depth R = std::min(int(eval - beta) / 168, 7) + depth / 3 + 4 - (complexity > 861); + // Null move dynamic reduction based on depth and eval + Depth R = std::min(int(eval - beta) / 173, 6) + depth / 3 + 4; ss->currentMove = MOVE_NULL; ss->continuationHistory = &thisThread->continuationHistory[0][0][NO_PIECE][0]; @@ -855,18 +841,16 @@ namespace { if (nullValue >= beta) { // Do not return unproven mate or TB scores - if (nullValue >= VALUE_TB_WIN_IN_MAX_PLY) - nullValue = beta; + nullValue = std::min(nullValue, VALUE_TB_WIN_IN_MAX_PLY-1); - if (thisThread->nmpMinPly || (abs(beta) < VALUE_KNOWN_WIN && depth < 14)) + if (thisThread->nmpMinPly || depth < 14) return nullValue; assert(!thisThread->nmpMinPly); // Recursive verification is not allowed // Do verification search at high depths, with null move pruning disabled - // for us, until ply exceeds nmpMinPly. + // until ply exceeds nmpMinPly. thisThread->nmpMinPly = ss->ply + 3 * (depth-R) / 4; - thisThread->nmpColor = us; Value v = search(pos, ss, beta-1, beta, depth-R, false); @@ -877,20 +861,34 @@ namespace { } } - probCutBeta = beta + 191 - 54 * improving; + // Step 10. If the position doesn't have a ttMove, decrease depth by 2 + // (or by 4 if the TT entry for the current position was hit and the stored depth is greater than or equal to the current depth). + // Use qsearch if depth is equal or below zero (~9 Elo) + if ( PvNode + && !ttMove) + depth -= 2 + 2 * (ss->ttHit && tte->depth() >= depth); - // Step 10. ProbCut (~4 Elo) - // If we have a good enough capture and a reduced search returns a value + if (depth <= 0) + return qsearch(pos, ss, alpha, beta); + + if ( cutNode + && depth >= 8 + && !ttMove) + depth -= 2; + + probCutBeta = beta + 168 - 61 * improving; + + // Step 11. ProbCut (~10 Elo) + // If we have a good enough capture (or queen promotion) and a reduced search returns a value // much above beta, we can (almost) safely prune the previous move. if ( !PvNode - && depth > 4 + && depth > 3 && abs(beta) < VALUE_TB_WIN_IN_MAX_PLY // if value from transposition table is lower than probCutBeta, don't attempt probCut // there and in further interactions with transposition table cutoff depth is set to depth - 3 // because probCut search has depth set to depth - 4 but we also do a move before it // so effective depth is equal to depth - 3 - && !( ss->ttHit - && tte->depth() >= depth - 3 + && !( tte->depth() >= depth - 3 && ttValue != VALUE_NONE && ttValue < probCutBeta)) { @@ -901,7 +899,7 @@ namespace { while ((move = mp.next_move()) != MOVE_NONE) if (move != excludedMove && pos.legal(move)) { - assert(pos.capture(move) || promotion_type(move) == QUEEN); + assert(pos.capture_stage(move)); ss->currentMove = move; ss->continuationHistory = &thisThread->continuationHistory[ss->inCheck] @@ -928,44 +926,29 @@ namespace { return value; } } + + Eval::NNUE::hint_common_parent_position(pos); } - // Step 11. If the position is not in TT, decrease depth by 3. - // Use qsearch if depth is equal or below zero (~4 Elo) - if ( PvNode - && !ttMove) - depth -= 3; - - if (depth <= 0) - return qsearch(pos, ss, alpha, beta); - - if ( cutNode - && depth >= 9 - && !ttMove) - depth -= 2; - moves_loop: // When in check, search starts here - // Step 12. A small Probcut idea, when we are in check (~0 Elo) - probCutBeta = beta + 417; + // Step 12. A small Probcut idea, when we are in check (~4 Elo) + probCutBeta = beta + 413; if ( ss->inCheck && !PvNode - && depth >= 2 && ttCapture && (tte->bound() & BOUND_LOWER) - && tte->depth() >= depth - 3 + && tte->depth() >= depth - 4 && ttValue >= probCutBeta && abs(ttValue) <= VALUE_KNOWN_WIN - && abs(beta) <= VALUE_KNOWN_WIN - ) + && abs(beta) <= VALUE_KNOWN_WIN) return probCutBeta; - const PieceToHistory* contHist[] = { (ss-1)->continuationHistory, (ss-2)->continuationHistory, nullptr , (ss-4)->continuationHistory, nullptr , (ss-6)->continuationHistory }; - Move countermove = thisThread->counterMoves[pos.piece_on(prevSq)][prevSq]; + Move countermove = prevSq != SQ_NONE ? thisThread->counterMoves[pos.piece_on(prevSq)][prevSq] : MOVE_NONE; MovePicker mp(pos, ttMove, depth, &thisThread->mainHistory, &captureHistory, @@ -1014,7 +997,7 @@ moves_loop: // When in check, search starts here (ss+1)->pv = nullptr; extension = 0; - capture = pos.capture(move); + capture = pos.capture_stage(move); movedPiece = pos.moved_piece(move); givesCheck = pos.gives_check(move); @@ -1023,32 +1006,52 @@ moves_loop: // When in check, search starts here Value delta = beta - alpha; - // Step 14. Pruning at shallow depth (~98 Elo). Depth conditions are important for mate finding. + Depth r = reduction(improving, depth, moveCount, delta, thisThread->rootDelta); + + // Step 14. Pruning at shallow depth (~120 Elo). Depth conditions are important for mate finding. if ( !rootNode && pos.non_pawn_material(us) && bestValue > VALUE_TB_LOSS_IN_MAX_PLY) { - // Skip quiet moves if movecount exceeds our FutilityMoveCount threshold (~7 Elo) + // Skip quiet moves if movecount exceeds our FutilityMoveCount threshold (~8 Elo) moveCountPruning = moveCount >= futility_move_count(improving, depth); // Reduced depth of the next LMR search - int lmrDepth = std::max(newDepth - reduction(improving, depth, moveCount, delta, thisThread->rootDelta), 0); + int lmrDepth = newDepth - r; if ( capture || givesCheck) { - // Futility pruning for captures (~0 Elo) + // Futility pruning for captures (~2 Elo) if ( !givesCheck - && !PvNode && lmrDepth < 7 && !ss->inCheck - && ss->staticEval + 180 + 201 * lmrDepth + PieceValue[EG][pos.piece_on(to_sq(move))] - + captureHistory[movedPiece][to_sq(move)][type_of(pos.piece_on(to_sq(move)))] / 6 < alpha) + && ss->staticEval + 197 + 248 * lmrDepth + PieceValue[EG][pos.piece_on(to_sq(move))] + + captureHistory[movedPiece][to_sq(move)][type_of(pos.piece_on(to_sq(move)))] / 7 < alpha) continue; - // SEE based pruning (~9 Elo) - if (!pos.see_ge(move, Value(-222) * depth)) - continue; + Bitboard occupied; + // SEE based pruning (~11 Elo) + if (!pos.see_ge(move, occupied, Value(-205) * depth)) + { + if (depth < 2 - capture) + continue; + // Don't prune the move if opponent Queen/Rook is under discovered attack after the exchanges + // Don't prune the move if opponent King is under discovered attack after or during the exchanges + Bitboard leftEnemies = (pos.pieces(~us, KING, QUEEN, ROOK)) & occupied; + Bitboard attacks = 0; + occupied |= to_sq(move); + while (leftEnemies && !attacks) + { + Square sq = pop_lsb(leftEnemies); + attacks |= pos.attackers_to(sq, occupied) & pos.pieces(us) & occupied; + // don't consider pieces which were already threatened/hanging before SEE exchanges + if (attacks && (sq != pos.square(~us) && (pos.attackers_to(sq, pos.pieces()) & pos.pieces(us)))) + attacks = 0; + } + if (!attacks) + continue; + } } else { @@ -1057,35 +1060,43 @@ moves_loop: // When in check, search starts here + (*contHist[3])[movedPiece][to_sq(move)]; // Continuation history based pruning (~2 Elo) - if ( lmrDepth < 5 - && history < -3875 * (depth - 1)) + if ( lmrDepth < 6 + && history < -3832 * depth) continue; history += 2 * thisThread->mainHistory[us][from_to(move)]; - // Futility pruning: parent node (~9 Elo) + lmrDepth += history / 7011; + lmrDepth = std::max(lmrDepth, -2); + + // Futility pruning: parent node (~13 Elo) if ( !ss->inCheck - && lmrDepth < 13 - && ss->staticEval + 106 + 145 * lmrDepth + history / 52 <= alpha) + && lmrDepth < 12 + && ss->staticEval + 112 + 138 * lmrDepth <= alpha) continue; - // Prune moves with negative SEE (~3 Elo) - if (!pos.see_ge(move, Value(-24 * lmrDepth * lmrDepth - 15 * lmrDepth))) + lmrDepth = std::max(lmrDepth, 0); + + // Prune moves with negative SEE (~4 Elo) + if (!pos.see_ge(move, Value(-27 * lmrDepth * lmrDepth - 16 * lmrDepth))) continue; } } - // Step 15. Extensions (~66 Elo) + // Step 15. Extensions (~100 Elo) // We take care to not overdo to avoid search getting stuck. if (ss->ply < thisThread->rootDepth * 2) { - // Singular extension search (~58 Elo). If all moves but one fail low on a + // Singular extension search (~94 Elo). If all moves but one fail low on a // search of (alpha-s, beta-s), and just one fails high on (alpha, beta), // then that move is singular and should be extended. To verify this we do // a reduced search on all the other moves but the ttMove and if the // result is lower than ttValue minus a margin, then we will extend the ttMove. + // Depth margin and singularBeta margin are known for having non-linear scaling. + // Their values are optimized to time controls of 180+1.8 and longer + // so changing them requires tests at this type of time controls. if ( !rootNode - && depth >= 4 - (thisThread->previousDepth > 24) + 2 * (PvNode && tte->is_pv()) + && depth >= 4 - (thisThread->completedDepth > 22) + 2 * (PvNode && tte->is_pv()) && move == ttMove && !excludedMove // Avoid recursive singular search /* && ttValue != VALUE_NONE Already implicit in the next condition */ @@ -1093,7 +1104,7 @@ moves_loop: // When in check, search starts here && (tte->bound() & BOUND_LOWER) && tte->depth() >= depth - 3) { - Value singularBeta = ttValue - (3 + (ss->ttPv && !PvNode)) * depth; + Value singularBeta = ttValue - (82 + 65 * (ss->ttPv && !PvNode)) * depth / 64; Depth singularDepth = (depth - 1) / 2; ss->excludedMove = move; @@ -1107,9 +1118,12 @@ moves_loop: // When in check, search starts here // Avoid search explosion by limiting the number of double extensions if ( !PvNode - && value < singularBeta - 25 - && ss->doubleExtensions <= 9) + && value < singularBeta - 21 + && ss->doubleExtensions <= 11) + { extension = 2; + depth += depth < 13; + } } // Multi-cut pruning @@ -1120,26 +1134,29 @@ moves_loop: // When in check, search starts here else if (singularBeta >= beta) return singularBeta; - // If the eval of ttMove is greater than beta, we reduce it (negative extension) + // If the eval of ttMove is greater than beta, we reduce it (negative extension) (~7 Elo) else if (ttValue >= beta) - extension = -2; + extension = -2 - !PvNode; - // If the eval of ttMove is less than alpha and value, we reduce it (negative extension) - else if (ttValue <= alpha && ttValue <= value) + // If the eval of ttMove is less than value, we reduce it (negative extension) (~1 Elo) + else if (ttValue <= value) + extension = -1; + + // If the eval of ttMove is less than alpha, we reduce it (negative extension) (~1 Elo) + else if (ttValue <= alpha) extension = -1; } // Check extensions (~1 Elo) else if ( givesCheck - && depth > 9 - && abs(ss->staticEval) > 82) + && depth > 9) extension = 1; - // Quiet ttMove extensions (~0 Elo) + // Quiet ttMove extensions (~1 Elo) else if ( PvNode && move == ttMove && move == ss->killers[0] - && (*contHist[0])[movedPiece][to_sq(move)] >= 5177) + && (*contHist[0])[movedPiece][to_sq(move)] >= 5168) extension = 1; } @@ -1160,7 +1177,50 @@ moves_loop: // When in check, search starts here // Step 16. Make the move pos.do_move(move, st, givesCheck); - // Step 17. Late moves reduction / extension (LMR, ~98 Elo) + // Decrease reduction if position is or has been on the PV + // and node is not likely to fail low. (~3 Elo) + // Decrease further on cutNodes. (~1 Elo) + if ( ss->ttPv + && !likelyFailLow) + r -= cutNode && tte->depth() >= depth + 3 ? 3 : 2; + + // Decrease reduction if opponent's move count is high (~1 Elo) + if ((ss-1)->moveCount > 8) + r--; + + // Increase reduction for cut nodes (~3 Elo) + if (cutNode) + r += 2; + + // Increase reduction if ttMove is a capture (~3 Elo) + if (ttCapture) + r++; + + // Decrease reduction for PvNodes based on depth (~2 Elo) + if (PvNode) + r -= 1 + 12 / (3 + depth); + + // Decrease reduction if ttMove has been singularly extended (~1 Elo) + if (singularQuietLMR) + r--; + + // Increase reduction if next ply has a lot of fail high (~5 Elo) + if ((ss+1)->cutoffCnt > 3) + r++; + + else if (move == ttMove) + r--; + + ss->statScore = 2 * thisThread->mainHistory[us][from_to(move)] + + (*contHist[0])[movedPiece][to_sq(move)] + + (*contHist[1])[movedPiece][to_sq(move)] + + (*contHist[3])[movedPiece][to_sq(move)] + - 4006; + + // Decrease/increase reduction for moves with a good/bad history (~25 Elo) + r -= ss->statScore / (11124 + 4740 * (depth > 5 && depth < 22)); + + // Step 17. Late moves reduction / extension (LMR, ~117 Elo) // We use various heuristics for the sons of a node after the first son has // been searched. In general we would like to reduce them, but there are many // cases where we extend a son if it has good chances to be "interesting". @@ -1170,52 +1230,6 @@ moves_loop: // When in check, search starts here || !capture || (cutNode && (ss-1)->moveCount > 1))) { - Depth r = reduction(improving, depth, moveCount, delta, thisThread->rootDelta); - - // Decrease reduction if position is or has been on the PV - // and node is not likely to fail low. (~3 Elo) - if ( ss->ttPv - && !likelyFailLow) - r -= 2; - - // Decrease reduction if opponent's move count is high (~1 Elo) - if ((ss-1)->moveCount > 7) - r--; - - // Increase reduction for cut nodes (~3 Elo) - if (cutNode) - r += 2; - - // Increase reduction if ttMove is a capture (~3 Elo) - if (ttCapture) - r++; - - // Decrease reduction for PvNodes based on depth - if (PvNode) - r -= 1 + 11 / (3 + depth); - - // Decrease reduction if ttMove has been singularly extended (~1 Elo) - if (singularQuietLMR) - r--; - - // Decrease reduction if we move a threatened piece (~1 Elo) - if ( depth > 9 - && (mp.threatenedPieces & from_sq(move))) - r--; - - // Increase reduction if next ply has a lot of fail high - if ((ss+1)->cutoffCnt > 3) - r++; - - ss->statScore = 2 * thisThread->mainHistory[us][from_to(move)] - + (*contHist[0])[movedPiece][to_sq(move)] - + (*contHist[1])[movedPiece][to_sq(move)] - + (*contHist[3])[movedPiece][to_sq(move)] - - 4433; - - // Decrease/increase reduction for moves with a good/bad history (~30 Elo) - r -= ss->statScore / (13628 + 4000 * (depth > 7 && depth < 19)); - // In general we want to cap the LMR depth search at newDepth, but when // reduction is negative, we allow this move a limited search extension // beyond the first move depth. This may lead to hidden double extensions. @@ -1228,28 +1242,33 @@ moves_loop: // When in check, search starts here { // Adjust full depth search based on LMR results - if result // was good enough search deeper, if it was bad enough search shallower - const bool doDeeperSearch = value > (alpha + 64 + 11 * (newDepth - d)); + const bool doDeeperSearch = value > (bestValue + 64 + 11 * (newDepth - d)); + const bool doEvenDeeperSearch = value > alpha + 711 && ss->doubleExtensions <= 6; const bool doShallowerSearch = value < bestValue + newDepth; - newDepth += doDeeperSearch - doShallowerSearch; + ss->doubleExtensions = ss->doubleExtensions + doEvenDeeperSearch; + + newDepth += doDeeperSearch - doShallowerSearch + doEvenDeeperSearch; if (newDepth > d) value = -search(pos, ss+1, -(alpha+1), -alpha, newDepth, !cutNode); - int bonus = value > alpha ? stat_bonus(newDepth) - : -stat_bonus(newDepth); - - if (capture) - bonus /= 6; + int bonus = value <= alpha ? -stat_bonus(newDepth) + : value >= beta ? stat_bonus(newDepth) + : 0; update_continuation_histories(ss, movedPiece, to_sq(move), bonus); } } - // Step 18. Full depth search when LMR is skipped + // Step 18. Full depth search when LMR is skipped. If expected reduction is high, reduce its depth by 1. else if (!PvNode || moveCount > 1) { - value = -search(pos, ss+1, -(alpha+1), -alpha, newDepth, !cutNode); + // Increase reduction for cut nodes and not ttMove (~1 Elo) + if (!ttMove && cutNode) + r += 2; + + value = -search(pos, ss+1, -(alpha+1), -alpha, newDepth - (r > 3), !cutNode); } // For PV nodes only, do a full PV search on the first move or after a fail @@ -1260,8 +1279,7 @@ moves_loop: // When in check, search starts here (ss+1)->pv = pv; (ss+1)->pv[0] = MOVE_NONE; - value = -search(pos, ss+1, -beta, -alpha, - std::min(maxNextDepth, newDepth), false); + value = -search(pos, ss+1, -beta, -alpha, newDepth, false); } // Step 19. Undo move @@ -1286,10 +1304,21 @@ moves_loop: // When in check, search starts here // PV move or new best move? if (moveCount == 1 || value > alpha) { - rm.score = value; + rm.score = rm.uciScore = value; rm.selDepth = thisThread->selDepth; - rm.scoreLowerbound = value >= beta; - rm.scoreUpperbound = value <= alpha; + rm.scoreLowerbound = rm.scoreUpperbound = false; + + if (value >= beta) + { + rm.scoreLowerbound = true; + rm.uciScore = beta; + } + else if (value <= alpha) + { + rm.scoreUpperbound = true; + rm.uciScore = alpha; + } + rm.pv.resize(1); assert((ss+1)->pv); @@ -1322,24 +1351,23 @@ moves_loop: // When in check, search starts here if (PvNode && !rootNode) // Update pv even in fail-high case update_pv(ss->pv, move, (ss+1)->pv); - if (PvNode && value < beta) // Update alpha! Always alpha < beta + if (value >= beta) { - alpha = value; - - // Reduce other moves if we have found at least one score improvement - if ( depth > 1 - && depth < 6 - && beta < VALUE_KNOWN_WIN - && alpha > -VALUE_KNOWN_WIN) - depth -= 1; - - assert(depth > 0); + ss->cutoffCnt += 1 + !ttMove; + assert(value >= beta); // Fail high + break; } else { - ss->cutoffCnt++; - assert(value >= beta); // Fail high - break; + // Reduce other moves if we have found at least one score improvement (~1 Elo) + // Reduce more for depth > 3 and depth < 12 (~1 Elo) + if ( depth > 1 + && beta < 14362 + && value > -12393) + depth -= depth > 3 && depth < 12 ? 2 : 1; + + assert(depth > 0); + alpha = value; // Update alpha! Always alpha < beta } } } @@ -1382,23 +1410,17 @@ moves_loop: // When in check, search starts here quietsSearched, quietCount, capturesSearched, captureCount, depth); // Bonus for prior countermove that caused the fail low - else if ( (depth >= 5 || PvNode) - && !priorCapture) + else if (!priorCapture && prevSq != SQ_NONE) { - //Assign extra bonus if current node is PvNode or cutNode - //or fail low was really bad - bool extraBonus = PvNode - || cutNode - || bestValue < alpha - 62 * depth; - - update_continuation_histories(ss-1, pos.piece_on(prevSq), prevSq, stat_bonus(depth) * (1 + extraBonus)); + int bonus = (depth > 5) + (PvNode || cutNode) + (bestValue < alpha - 113 * depth) + ((ss-1)->moveCount > 12); + update_continuation_histories(ss-1, pos.piece_on(prevSq), prevSq, stat_bonus(depth) * bonus); } if (PvNode) bestValue = std::min(bestValue, maxValue); // If no good move is found and the previous position was ttPv, then the previous - // opponent move is probably good and the new position is added to the search tree. + // opponent move is probably good and the new position is added to the search tree. (~7 Elo) if (bestValue <= alpha) ss->ttPv = ss->ttPv || ((ss-1)->ttPv && depth > 3); @@ -1418,7 +1440,7 @@ moves_loop: // When in check, search starts here // qsearch() is the quiescence search function, which is called by the main search // function with zero depth, or recursively with further decreasing depth per call. - // (~155 elo) + // (~155 Elo) template Value qsearch(Position& pos, Stack* ss, Value alpha, Value beta, Depth depth) { @@ -1441,6 +1463,7 @@ moves_loop: // When in check, search starts here bool pvHit, givesCheck, capture; int moveCount; + // Step 1. Initialize node if (PvNode) { (ss+1)->pv = pv; @@ -1452,7 +1475,7 @@ moves_loop: // When in check, search starts here ss->inCheck = pos.checkers(); moveCount = 0; - // Check for an immediate draw or maximum ply reached + // Step 2. Check for an immediate draw or maximum ply reached if ( pos.is_draw(ss->ply) || ss->ply >= MAX_PLY) return (ss->ply >= MAX_PLY && !ss->inCheck) ? evaluate(pos) : VALUE_DRAW; @@ -1463,27 +1486,25 @@ moves_loop: // When in check, search starts here // TT entry depth that we are going to use. Note that in qsearch we use // only two types of depth in TT: DEPTH_QS_CHECKS or DEPTH_QS_NO_CHECKS. ttDepth = ss->inCheck || depth >= DEPTH_QS_CHECKS ? DEPTH_QS_CHECKS - : DEPTH_QS_NO_CHECKS; - // Transposition table lookup + : DEPTH_QS_NO_CHECKS; + + // Step 3. Transposition table lookup posKey = pos.key(); tte = TT.probe(posKey, ss->ttHit); ttValue = ss->ttHit ? value_from_tt(tte->value(), ss->ply, pos.rule50_count()) : VALUE_NONE; ttMove = ss->ttHit ? tte->move() : MOVE_NONE; pvHit = ss->ttHit && tte->is_pv(); + // At non-PV nodes we check for an early TT cutoff if ( !PvNode - && ss->ttHit && tte->depth() >= ttDepth - && ttValue != VALUE_NONE // Only in case of TT access race + && ttValue != VALUE_NONE // Only in case of TT access race or if !ttHit && (tte->bound() & (ttValue >= beta ? BOUND_LOWER : BOUND_UPPER))) return ttValue; - // Evaluate the position statically + // Step 4. Static evaluation of the position if (ss->inCheck) - { - ss->staticEval = VALUE_NONE; bestValue = futilityBase = -VALUE_INFINITE; - } else { if (ss->ttHit) @@ -1492,16 +1513,15 @@ moves_loop: // When in check, search starts here if ((ss->staticEval = bestValue = tte->eval()) == VALUE_NONE) ss->staticEval = bestValue = evaluate(pos); - // ttValue can be used as a better position evaluation (~7 Elo) + // ttValue can be used as a better position evaluation (~13 Elo) if ( ttValue != VALUE_NONE && (tte->bound() & (ttValue > bestValue ? BOUND_LOWER : BOUND_UPPER))) bestValue = ttValue; } else // In case of null move search use previous static eval with a different sign - ss->staticEval = bestValue = - (ss-1)->currentMove != MOVE_NULL ? evaluate(pos) - : -(ss-1)->staticEval; + ss->staticEval = bestValue = (ss-1)->currentMove != MOVE_NULL ? evaluate(pos) + : -(ss-1)->staticEval; // Stand pat. Return immediately if static value is at least beta if (bestValue >= beta) @@ -1518,7 +1538,7 @@ moves_loop: // When in check, search starts here if (PvNode && bestValue > alpha) alpha = bestValue; - futilityBase = bestValue + 153; + futilityBase = bestValue + 200; } const PieceToHistory* contHist[] = { (ss-1)->continuationHistory, (ss-2)->continuationHistory, @@ -1529,7 +1549,7 @@ moves_loop: // When in check, search starts here // to search the moves. Because the depth is <= 0 here, only captures, // queen promotions, and other checks (only if depth >= DEPTH_QS_CHECKS) // will be generated. - Square prevSq = to_sq((ss-1)->currentMove); + Square prevSq = is_ok((ss-1)->currentMove) ? to_sq((ss-1)->currentMove) : SQ_NONE; MovePicker mp(pos, ttMove, depth, &thisThread->mainHistory, &thisThread->captureHistory, contHist, @@ -1537,102 +1557,105 @@ moves_loop: // When in check, search starts here int quietCheckEvasions = 0; - // Loop through the moves until no moves remain or a beta cutoff occurs + // Step 5. Loop through all pseudo-legal moves until no moves remain + // or a beta cutoff occurs. while ((move = mp.next_move()) != MOVE_NONE) { - assert(is_ok(move)); + assert(is_ok(move)); - // Check for legality - if (!pos.legal(move)) - continue; + // Check for legality + if (!pos.legal(move)) + continue; - givesCheck = pos.gives_check(move); - capture = pos.capture(move); + givesCheck = pos.gives_check(move); + capture = pos.capture_stage(move); - moveCount++; + moveCount++; - // Futility pruning and moveCount pruning (~5 Elo) - if ( bestValue > VALUE_TB_LOSS_IN_MAX_PLY - && !givesCheck - && to_sq(move) != prevSq - && futilityBase > -VALUE_KNOWN_WIN - && type_of(move) != PROMOTION) - { + // Step 6. Pruning. + if (bestValue > VALUE_TB_LOSS_IN_MAX_PLY) + { + // Futility pruning and moveCount pruning (~10 Elo) + if ( !givesCheck + && to_sq(move) != prevSq + && futilityBase > -VALUE_KNOWN_WIN + && type_of(move) != PROMOTION) + { + if (moveCount > 2) + continue; - if (moveCount > 2) - continue; + futilityValue = futilityBase + PieceValue[EG][pos.piece_on(to_sq(move))]; - futilityValue = futilityBase + PieceValue[EG][pos.piece_on(to_sq(move))]; + if (futilityValue <= alpha) + { + bestValue = std::max(bestValue, futilityValue); + continue; + } - if (futilityValue <= alpha) - { - bestValue = std::max(bestValue, futilityValue); - continue; - } + if (futilityBase <= alpha && !pos.see_ge(move, VALUE_ZERO + 1)) + { + bestValue = std::max(bestValue, futilityBase); + continue; + } + } - if (futilityBase <= alpha && !pos.see_ge(move, VALUE_ZERO + 1)) - { - bestValue = std::max(bestValue, futilityBase); - continue; - } - } + // We prune after the second quiet check evasion move, where being 'in check' is + // implicitly checked through the counter, and being a 'quiet move' apart from + // being a tt move is assumed after an increment because captures are pushed ahead. + if (quietCheckEvasions > 1) + break; - // Do not search moves with negative SEE values (~5 Elo) - if ( bestValue > VALUE_TB_LOSS_IN_MAX_PLY - && !pos.see_ge(move)) - continue; + // Continuation history based pruning (~3 Elo) + if ( !capture + && (*contHist[0])[pos.moved_piece(move)][to_sq(move)] < 0 + && (*contHist[1])[pos.moved_piece(move)][to_sq(move)] < 0) + continue; - // Speculative prefetch as early as possible - prefetch(TT.first_entry(pos.key_after(move))); + // Do not search moves with bad enough SEE values (~5 Elo) + if (!pos.see_ge(move, Value(-95))) + continue; + } - ss->currentMove = move; - ss->continuationHistory = &thisThread->continuationHistory[ss->inCheck] - [capture] - [pos.moved_piece(move)] - [to_sq(move)]; + // Speculative prefetch as early as possible + prefetch(TT.first_entry(pos.key_after(move))); - // Continuation history based pruning (~2 Elo) - if ( !capture - && bestValue > VALUE_TB_LOSS_IN_MAX_PLY - && (*contHist[0])[pos.moved_piece(move)][to_sq(move)] < 0 - && (*contHist[1])[pos.moved_piece(move)][to_sq(move)] < 0) - continue; + // Update the current move + ss->currentMove = move; + ss->continuationHistory = &thisThread->continuationHistory[ss->inCheck] + [capture] + [pos.moved_piece(move)] + [to_sq(move)]; - // We prune after 2nd quiet check evasion where being 'in check' is implicitly checked through the counter - // and being a 'quiet' apart from being a tt move is assumed after an increment because captures are pushed ahead. - if ( bestValue > VALUE_TB_LOSS_IN_MAX_PLY - && quietCheckEvasions > 1) - break; + quietCheckEvasions += !capture && ss->inCheck; - quietCheckEvasions += !capture && ss->inCheck; + // Step 7. Make and search the move + pos.do_move(move, st, givesCheck); + value = -qsearch(pos, ss+1, -beta, -alpha, depth - 1); + pos.undo_move(move); - // Make and search the move - pos.do_move(move, st, givesCheck); - value = -qsearch(pos, ss+1, -beta, -alpha, depth - 1); - pos.undo_move(move); + assert(value > -VALUE_INFINITE && value < VALUE_INFINITE); - assert(value > -VALUE_INFINITE && value < VALUE_INFINITE); + // Step 8. Check for a new best move + if (value > bestValue) + { + bestValue = value; - // Check for a new best move - if (value > bestValue) - { - bestValue = value; + if (value > alpha) + { + bestMove = move; - if (value > alpha) - { - bestMove = move; + if (PvNode) // Update pv even in fail-high case + update_pv(ss->pv, move, (ss+1)->pv); - if (PvNode) // Update pv even in fail-high case - update_pv(ss->pv, move, (ss+1)->pv); - - if (PvNode && value < beta) // Update alpha here! - alpha = value; - else - break; // Fail high - } - } + if (PvNode && value < beta) // Update alpha here! + alpha = value; + else + break; // Fail high + } + } } + // Step 9. Check for mate // All legal moves have been searched. A special case: if we're in check // and no legal moves were found, it is checkmate. if (ss->inCheck && bestValue == -VALUE_INFINITE) @@ -1717,12 +1740,13 @@ moves_loop: // When in check, search starts here Thread* thisThread = pos.this_thread(); CapturePieceToHistory& captureHistory = thisThread->captureHistory; Piece moved_piece = pos.moved_piece(bestMove); - PieceType captured = type_of(pos.piece_on(to_sq(bestMove))); + PieceType captured; + int bonus1 = stat_bonus(depth + 1); - if (!pos.capture(bestMove)) + if (!pos.capture_stage(bestMove)) { - int bonus2 = bestValue > beta + 137 ? bonus1 // larger bonus + int bonus2 = bestValue > beta + 145 ? bonus1 // larger bonus : stat_bonus(depth); // smaller bonus // Increase stats for the best move in case it was a quiet move @@ -1736,12 +1760,16 @@ moves_loop: // When in check, search starts here } } else + { // Increase stats for the best move in case it was a capture move + captured = type_of(pos.piece_on(to_sq(bestMove))); captureHistory[moved_piece][to_sq(bestMove)][captured] << bonus1; + } // Extra penalty for a quiet early move that was not a TT move or // main killer move in previous ply when it gets refuted. - if ( ((ss-1)->moveCount == 1 + (ss-1)->ttHit || ((ss-1)->currentMove == (ss-1)->killers[0])) + if ( prevSq != SQ_NONE + && ((ss-1)->moveCount == 1 + (ss-1)->ttHit || ((ss-1)->currentMove == (ss-1)->killers[0])) && !pos.captured_piece()) update_continuation_histories(ss-1, pos.piece_on(prevSq), prevSq, -bonus1); @@ -1888,7 +1916,7 @@ string UCI::pv(const Position& pos, Depth depth) { continue; Depth d = updated ? depth : std::max(1, depth - 1); - Value v = updated ? rootMoves[i].score : rootMoves[i].previousScore; + Value v = updated ? rootMoves[i].uciScore : rootMoves[i].previousScore; if (v == -VALUE_INFINITE) v = VALUE_ZERO; diff --git a/src/search.h b/src/search.h index 3c4a0be3..16f92ffa 100644 --- a/src/search.h +++ b/src/search.h @@ -1,6 +1,6 @@ /* Stockfish, a UCI chess playing engine derived from Glaurung 2.1 - Copyright (C) 2004-2022 The Stockfish developers (see AUTHORS file) + Copyright (C) 2004-2023 The Stockfish developers (see AUTHORS file) Stockfish is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -72,6 +72,7 @@ struct RootMove { Value score = -VALUE_INFINITE; Value previousScore = -VALUE_INFINITE; Value averageScore = -VALUE_INFINITE; + Value uciScore = -VALUE_INFINITE; bool scoreLowerbound = false; bool scoreUpperbound = false; int selDepth = 0; @@ -80,7 +81,7 @@ struct RootMove { std::vector pv; }; -typedef std::vector RootMoves; +using RootMoves = std::vector; /// LimitsType struct stores information sent by GUI about available time to diff --git a/src/syzygy/tbprobe.cpp b/src/syzygy/tbprobe.cpp index ccfc1038..4a0912f0 100644 --- a/src/syzygy/tbprobe.cpp +++ b/src/syzygy/tbprobe.cpp @@ -1,6 +1,6 @@ /* Stockfish, a UCI chess playing engine derived from Glaurung 2.1 - Copyright (C) 2004-2022 The Stockfish developers (see AUTHORS file) + Copyright (C) 2004-2023 The Stockfish developers (see AUTHORS file) Stockfish is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -24,9 +24,10 @@ #include #include #include -#include -#include #include +#include +#include +#include #include "../bitboard.h" #include "../cluster.h" @@ -71,7 +72,7 @@ enum TBFlag { STM = 1, Mapped = 2, WinPlies = 4, LossPlies = 8, Wide = 16, Singl inline WDLScore operator-(WDLScore d) { return WDLScore(-int(d)); } inline Square operator^(Square s, int i) { return Square(int(s) ^ i); } -const std::string PieceToChar = " PNBRQK pnbrqk"; +constexpr std::string_view PieceToChar = " PNBRQK pnbrqk"; int MapPawns[SQUARE_NB]; int MapB1H1H7[SQUARE_NB]; @@ -142,7 +143,7 @@ struct SparseEntry { static_assert(sizeof(SparseEntry) == 6, "SparseEntry must be 6 bytes"); -typedef uint16_t Sym; // Huffman symbol +using Sym = uint16_t; // Huffman symbol struct LR { enum Side { Left, Right }; @@ -200,13 +201,10 @@ public: } } - // Memory map the file and check it. File should be already open and will be - // closed after mapping. + // Memory map the file and check it. uint8_t* map(void** baseAddress, uint64_t* mapping, TBType type) { - - assert(is_open()); - - close(); // Need to re-open to get native file descriptor + if (is_open()) + close(); // Need to re-open to get native file descriptor #ifndef _WIN32 struct stat statbuf; @@ -237,7 +235,7 @@ public: } #else // Note FILE_FLAG_RANDOM_ACCESS is only a hint to Windows and as such may get ignored. - HANDLE fd = CreateFile(fname.c_str(), GENERIC_READ, FILE_SHARE_READ, nullptr, + HANDLE fd = CreateFileA(fname.c_str(), GENERIC_READ, FILE_SHARE_READ, nullptr, OPEN_EXISTING, FILE_FLAG_RANDOM_ACCESS, nullptr); if (fd == INVALID_HANDLE_VALUE) @@ -330,7 +328,7 @@ struct PairsData { // first access, when the corresponding file is memory mapped. template struct TBTable { - typedef typename std::conditional::type Ret; + using Ret = typename std::conditional::type; static constexpr int Sides = Type == WDL ? 2 : 1; @@ -1516,7 +1514,7 @@ int Tablebases::probe_dtz(Position& pos, ProbeState* result) { // A return value false indicates that not all probes were successful. bool Tablebases::root_probe(Position& pos, Search::RootMoves& rootMoves) { - ProbeState result; + ProbeState result = OK; StateInfo st; // Obtain 50-move counter for the root position @@ -1595,7 +1593,7 @@ bool Tablebases::root_probe_wdl(Position& pos, Search::RootMoves& rootMoves) { static const int WDL_to_rank[] = { -MAX_DTZ, -MAX_DTZ + 101, 0, MAX_DTZ - 101, MAX_DTZ }; - ProbeState result; + ProbeState result = OK; StateInfo st; WDLScore wdl; diff --git a/src/syzygy/tbprobe.h b/src/syzygy/tbprobe.h index 179c7572..159c6865 100644 --- a/src/syzygy/tbprobe.h +++ b/src/syzygy/tbprobe.h @@ -1,6 +1,6 @@ /* Stockfish, a UCI chess playing engine derived from Glaurung 2.1 - Copyright (C) 2004-2022 The Stockfish developers (see AUTHORS file) + Copyright (C) 2004-2023 The Stockfish developers (see AUTHORS file) Stockfish is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by diff --git a/src/thread.cpp b/src/thread.cpp index 5d981360..8ad73518 100644 --- a/src/thread.cpp +++ b/src/thread.cpp @@ -1,6 +1,6 @@ /* Stockfish, a UCI chess playing engine derived from Glaurung 2.1 - Copyright (C) 2004-2022 The Stockfish developers (see AUTHORS file) + Copyright (C) 2004-2023 The Stockfish developers (see AUTHORS file) Stockfish is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -60,7 +60,6 @@ void Thread::clear() { counterMoves.fill(MOVE_NONE); mainHistory.fill(0); captureHistory.fill(0); - previousDepth = 0; for (bool inCheck : { false, true }) for (StatsType c : { NoCaptures, Captures }) @@ -73,9 +72,9 @@ void Thread::clear() { /// Thread::start_searching() wakes up the thread that will start the search void Thread::start_searching() { - - std::lock_guard lk(mutex); + mutex.lock(); searching = true; + mutex.unlock(); // Unlock before notifying saves a few CPU-cycles cv.notify_one(); // Wake up the thread in idle_loop() } @@ -125,20 +124,20 @@ void Thread::idle_loop() { void ThreadPool::set(size_t requested) { - if (size() > 0) // destroy any existing thread(s) + if (threads.size() > 0) // destroy any existing thread(s) { main()->wait_for_search_finished(); - while (size() > 0) - delete back(), pop_back(); + while (threads.size() > 0) + delete threads.back(), threads.pop_back(); } if (requested > 0) // create new thread(s) { - push_back(new MainThread(0)); + threads.push_back(new MainThread(0)); - while (size() < requested) - push_back(new Thread(size())); + while (threads.size() < requested) + threads.push_back(new Thread(threads.size())); clear(); // Reallocate the hash with the new threadpool size @@ -157,7 +156,7 @@ void ThreadPool::set(size_t requested) { void ThreadPool::clear() { - for (Thread* th : *this) + for (Thread* th : threads) th->clear(); main()->callsCnt = 0; @@ -190,7 +189,7 @@ void ThreadPool::start_thinking(Position& pos, StateListPtr& states, Tablebases::rank_root_moves(pos, rootMoves); // After ownership transfer 'states' becomes empty, so if we stop the search - // and call 'go' again without setting a new position states.get() == NULL. + // and call 'go' again without setting a new position states.get() == nullptr. assert(states.get() || setupStates.get()); if (states.get()) @@ -201,7 +200,7 @@ void ThreadPool::start_thinking(Position& pos, StateListPtr& states, // be deduced from a fen string, so set() clears them and they are set from // setupStates->back() later. The rootState is per thread, earlier states are shared // since they are read-only. - for (Thread* th : *this) + for (Thread* th : threads) { th->nodes = th->tbHits = th->TTsaves = th->nmpMinPly = th->bestMoveChanges = 0; th->rootDepth = th->completedDepth = 0; @@ -217,12 +216,12 @@ void ThreadPool::start_thinking(Position& pos, StateListPtr& states, Thread* ThreadPool::get_best_thread() const { - Thread* bestThread = front(); + Thread* bestThread = threads.front(); std::map votes; Value minScore = VALUE_NONE; // Find minimum score of all threads - for (Thread* th: *this) + for (Thread* th: threads) minScore = std::min(minScore, th->rootMoves[0].score); // Vote according to score and depth, and select the best thread @@ -230,10 +229,10 @@ Thread* ThreadPool::get_best_thread() const { return (th->rootMoves[0].score - minScore + 14) * int(th->completedDepth); }; - for (Thread* th : *this) + for (Thread* th : threads) votes[th->rootMoves[0].pv[0]] += thread_value(th); - for (Thread* th : *this) + for (Thread* th : threads) if (abs(bestThread->rootMoves[0].score) >= VALUE_TB_WIN_IN_MAX_PLY) { // Make sure we pick the shortest mate / TB conversion or stave off mate the longest @@ -244,7 +243,8 @@ Thread* ThreadPool::get_best_thread() const { || ( th->rootMoves[0].score > VALUE_TB_LOSS_IN_MAX_PLY && ( votes[th->rootMoves[0].pv[0]] > votes[bestThread->rootMoves[0].pv[0]] || ( votes[th->rootMoves[0].pv[0]] == votes[bestThread->rootMoves[0].pv[0]] - && thread_value(th) > thread_value(bestThread))))) + && thread_value(th) * int(th->rootMoves[0].pv.size() > 2) + > thread_value(bestThread) * int(bestThread->rootMoves[0].pv.size() > 2))))) bestThread = th; return bestThread; @@ -255,8 +255,8 @@ Thread* ThreadPool::get_best_thread() const { void ThreadPool::start_searching() { - for (Thread* th : *this) - if (th != front()) + for (Thread* th : threads) + if (th != threads.front()) th->start_searching(); } @@ -265,8 +265,8 @@ void ThreadPool::start_searching() { void ThreadPool::wait_for_search_finished() const { - for (Thread* th : *this) - if (th != front()) + for (Thread* th : threads) + if (th != threads.front()) th->wait_for_search_finished(); } diff --git a/src/thread.h b/src/thread.h index aed82591..84abe59d 100644 --- a/src/thread.h +++ b/src/thread.h @@ -1,6 +1,6 @@ /* Stockfish, a UCI chess playing engine derived from Glaurung 2.1 - Copyright (C) 2004-2022 The Stockfish developers (see AUTHORS file) + Copyright (C) 2004-2023 The Stockfish developers (see AUTHORS file) Stockfish is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -61,16 +61,14 @@ public: Pawns::Table pawnsTable; Material::Table materialTable; size_t pvIdx, pvLast; - RunningAverage complexityAverage; std::atomic nodes, tbHits, TTsaves, bestMoveChanges; int selDepth, nmpMinPly; - Color nmpColor; Value bestValue, optimism[COLOR_NB]; Position rootPos; StateInfo rootState; Search::RootMoves rootMoves; - Depth rootDepth, completedDepth, previousDepth; + Depth rootDepth, completedDepth; Value rootDelta; CounterMoveHistory counterMoves; ButterflyHistory mainHistory; @@ -108,13 +106,13 @@ struct MainThread : public Thread { /// parking and, most importantly, launching a thread. All the access to threads /// is done through this class. -struct ThreadPool : public std::vector { +struct ThreadPool { void start_thinking(Position&, StateListPtr&, const Search::LimitsType&, bool = false); void clear(); void set(size_t); - MainThread* main() const { return static_cast(front()); } + MainThread* main() const { return static_cast(threads.front()); } uint64_t nodes_searched() const { return accumulate(&Thread::nodes); } uint64_t tb_hits() const { return accumulate(&Thread::tbHits); } uint64_t TT_saves() const { return accumulate(&Thread::TTsaves); } @@ -124,13 +122,21 @@ struct ThreadPool : public std::vector { std::atomic_bool stop, increaseDepth; + auto cbegin() const noexcept { return threads.cbegin(); } + auto begin() noexcept { return threads.begin(); } + auto end() noexcept { return threads.end(); } + auto cend() const noexcept { return threads.cend(); } + auto size() const noexcept { return threads.size(); } + auto empty() const noexcept { return threads.empty(); } + private: StateListPtr setupStates; + std::vector threads; uint64_t accumulate(std::atomic Thread::* member) const { uint64_t sum = 0; - for (Thread* th : *this) + for (Thread* th : threads) sum += (th->*member).load(std::memory_order_relaxed); return sum; } diff --git a/src/thread_win32_osx.h b/src/thread_win32_osx.h index 77d1c3c7..330a8341 100644 --- a/src/thread_win32_osx.h +++ b/src/thread_win32_osx.h @@ -1,6 +1,6 @@ /* Stockfish, a UCI chess playing engine derived from Glaurung 2.1 - Copyright (C) 2004-2022 The Stockfish developers (see AUTHORS file) + Copyright (C) 2004-2023 The Stockfish developers (see AUTHORS file) Stockfish is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -41,7 +41,7 @@ void* start_routine(void* ptr) P* p = reinterpret_cast(ptr); (p->first->*(p->second))(); // Call member function pointer delete p; - return NULL; + return nullptr; } class NativeThread { @@ -56,7 +56,7 @@ public: pthread_attr_setstacksize(attr, TH_STACK_SIZE); pthread_create(&thread, attr, start_routine, new P(obj, fun)); } - void join() { pthread_join(thread, NULL); } + void join() { pthread_join(thread, nullptr); } }; } // namespace Stockfish @@ -65,7 +65,7 @@ public: namespace Stockfish { -typedef std::thread NativeThread; +using NativeThread = std::thread; } // namespace Stockfish diff --git a/src/timeman.cpp b/src/timeman.cpp index cab0d767..061de018 100644 --- a/src/timeman.cpp +++ b/src/timeman.cpp @@ -1,6 +1,6 @@ /* Stockfish, a UCI chess playing engine derived from Glaurung 2.1 - Copyright (C) 2004-2022 The Stockfish developers (see AUTHORS file) + Copyright (C) 2004-2023 The Stockfish developers (see AUTHORS file) Stockfish is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -36,6 +36,12 @@ TimeManagement Time; // Our global time management object void TimeManagement::init(Search::LimitsType& limits, Color us, int ply) { + // if we have no time, no need to initialize TM, except for the start time, + // which is used by movetime. + startTime = limits.startTime; + if (limits.time[us] == 0) + return; + TimePoint moveOverhead = TimePoint(Options["Move Overhead"]); TimePoint slowMover = TimePoint(Options["Slow Mover"]); TimePoint npmsec = TimePoint(Options["nodestime"]); @@ -59,8 +65,6 @@ void TimeManagement::init(Search::LimitsType& limits, Color us, int ply) { limits.npmsec = npmsec; } - startTime = limits.startTime; - // Maximum move horizon of 50 moves int mtg = limits.movestogo ? std::min(limits.movestogo, 50) : 50; diff --git a/src/timeman.h b/src/timeman.h index a67385c6..bf21463d 100644 --- a/src/timeman.h +++ b/src/timeman.h @@ -1,6 +1,6 @@ /* Stockfish, a UCI chess playing engine derived from Glaurung 2.1 - Copyright (C) 2004-2022 The Stockfish developers (see AUTHORS file) + Copyright (C) 2004-2023 The Stockfish developers (see AUTHORS file) Stockfish is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by diff --git a/src/tt.cpp b/src/tt.cpp index c7118aea..3339c993 100644 --- a/src/tt.cpp +++ b/src/tt.cpp @@ -1,6 +1,6 @@ /* Stockfish, a UCI chess playing engine derived from Glaurung 2.1 - Copyright (C) 2004-2022 The Stockfish developers (see AUTHORS file) + Copyright (C) 2004-2023 The Stockfish developers (see AUTHORS file) Stockfish is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -87,7 +87,7 @@ void TranspositionTable::clear() { std::vector threads; - for (size_t idx = 0; idx < Options["Threads"]; ++idx) + for (size_t idx = 0; idx < size_t(Options["Threads"]); ++idx) { threads.emplace_back([this, idx]() { @@ -98,7 +98,7 @@ void TranspositionTable::clear() { // Each thread will zero its part of the hash table const size_t stride = size_t(clusterCount / Options["Threads"]), start = size_t(stride * idx), - len = idx != Options["Threads"] - 1 ? + len = idx != size_t(Options["Threads"]) - 1 ? stride : clusterCount - start; std::memset(&table[start], 0, len * sizeof(Cluster)); diff --git a/src/tt.h b/src/tt.h index b6e0799b..f5c789c4 100644 --- a/src/tt.h +++ b/src/tt.h @@ -1,6 +1,6 @@ /* Stockfish, a UCI chess playing engine derived from Glaurung 2.1 - Copyright (C) 2004-2022 The Stockfish developers (see AUTHORS file) + Copyright (C) 2004-2023 The Stockfish developers (see AUTHORS file) Stockfish is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by diff --git a/src/tune.cpp b/src/tune.cpp index a885845f..41f6664d 100644 --- a/src/tune.cpp +++ b/src/tune.cpp @@ -1,6 +1,6 @@ /* Stockfish, a UCI chess playing engine derived from Glaurung 2.1 - Copyright (C) 2004-2022 The Stockfish developers (see AUTHORS file) + Copyright (C) 2004-2023 The Stockfish developers (see AUTHORS file) Stockfish is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by diff --git a/src/tune.h b/src/tune.h index 75ab484a..440d950a 100644 --- a/src/tune.h +++ b/src/tune.h @@ -1,6 +1,6 @@ /* Stockfish, a UCI chess playing engine derived from Glaurung 2.1 - Copyright (C) 2004-2022 The Stockfish developers (see AUTHORS file) + Copyright (C) 2004-2023 The Stockfish developers (see AUTHORS file) Stockfish is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -26,8 +26,8 @@ namespace Stockfish { -typedef std::pair Range; // Option's min-max values -typedef Range (RangeFun) (int); +using Range = std::pair; // Option's min-max values +using RangeFun = Range (int); // Default Range function, to calculate Option's min-max values inline Range default_range(int v) { @@ -75,7 +75,7 @@ struct SetRange { class Tune { - typedef void (PostUpdate) (); // Post-update function + using PostUpdate = void (); // Post-update function Tune() { read_results(); } Tune(const Tune&) = delete; diff --git a/src/types.h b/src/types.h index c2087c6c..06b0a059 100644 --- a/src/types.h +++ b/src/types.h @@ -1,6 +1,6 @@ /* Stockfish, a UCI chess playing engine derived from Glaurung 2.1 - Copyright (C) 2004-2022 The Stockfish developers (see AUTHORS file) + Copyright (C) 2004-2023 The Stockfish developers (see AUTHORS file) Stockfish is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -103,8 +103,8 @@ constexpr bool Is64Bit = true; constexpr bool Is64Bit = false; #endif -typedef uint64_t Key; -typedef uint64_t Bitboard; +using Key = uint64_t; +using Bitboard = uint64_t; constexpr int MAX_MOVES = 256; constexpr int MAX_PLY = 246; @@ -186,6 +186,9 @@ enum Value : int { VALUE_MATE_IN_MAX_PLY = VALUE_MATE - MAX_PLY, VALUE_MATED_IN_MAX_PLY = -VALUE_MATE_IN_MAX_PLY, + // In the code, we make the assumption that these values + // are such that non_pawn_material() can be used to uniquely + // identify the material on the board. PawnValueMg = 126, PawnValueEg = 208, KnightValueMg = 781, KnightValueEg = 854, BishopValueMg = 825, BishopValueEg = 915, @@ -215,7 +218,7 @@ constexpr Value PieceValue[PHASE_NB][PIECE_NB] = { VALUE_ZERO, PawnValueEg, KnightValueEg, BishopValueEg, RookValueEg, QueenValueEg, VALUE_ZERO, VALUE_ZERO } }; -typedef int Depth; +using Depth = int; enum : int { DEPTH_QS_CHECKS = 0, @@ -413,6 +416,10 @@ inline Color color_of(Piece pc) { return Color(pc >> 3); } +constexpr bool is_ok(Move m) { + return m != MOVE_NONE && m != MOVE_NULL; +} + constexpr bool is_ok(Square s) { return s >= SQ_A1 && s <= SQ_H8; } @@ -442,10 +449,12 @@ constexpr Direction pawn_push(Color c) { } constexpr Square from_sq(Move m) { + assert(is_ok(m)); return Square((m >> 6) & 0x3F); } constexpr Square to_sq(Move m) { + assert(is_ok(m)); return Square(m & 0x3F); } @@ -470,10 +479,6 @@ constexpr Move make(Square from, Square to, PieceType pt = KNIGHT) { return Move(T + ((pt - KNIGHT) << 12) + (from << 6) + to); } -constexpr bool is_ok(Move m) { - return from_sq(m) != to_sq(m); // Catch MOVE_NULL and MOVE_NONE -} - /// Based on a congruential pseudo random number generator constexpr Key make_key(uint64_t seed) { return seed * 6364136223846793005ULL + 1442695040888963407ULL; diff --git a/src/uci.cpp b/src/uci.cpp index 690214ad..bd475763 100644 --- a/src/uci.cpp +++ b/src/uci.cpp @@ -1,6 +1,6 @@ /* Stockfish, a UCI chess playing engine derived from Glaurung 2.1 - Copyright (C) 2004-2022 The Stockfish developers (see AUTHORS file) + Copyright (C) 2004-2023 The Stockfish developers (see AUTHORS file) Stockfish is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -22,6 +22,7 @@ #include #include +#include "benchmark.h" #include "evaluate.h" #include "cluster.h" #include "movegen.h" @@ -32,13 +33,12 @@ #include "tt.h" #include "uci.h" #include "syzygy/tbprobe.h" +#include "nnue/evaluate_nnue.h" using namespace std; namespace Stockfish { -extern vector setup_bench(const Position&, istream&); - namespace { // FEN string for the initial position in standard chess @@ -162,7 +162,7 @@ namespace { uint64_t num, nodes = 0, cnt = 1; vector list = setup_bench(pos, args); - num = count_if(list.begin(), list.end(), [](string s) { return s.find("go ") == 0 || s.find("eval") == 0; }); + num = count_if(list.begin(), list.end(), [](const string& s) { return s.find("go ") == 0 || s.find("eval") == 0; }); TimePoint elapsed = now(); @@ -211,8 +211,8 @@ namespace { // The coefficients of a third-order polynomial fit is based on the fishtest data // for two parameters that need to transform eval to the argument of a logistic // function. - constexpr double as[] = { -0.58270499, 2.68512549, 15.24638015, 344.49745382}; - constexpr double bs[] = { -2.65734562, 15.96509799, -20.69040836, 73.61029937 }; + constexpr double as[] = { 0.38036525, -2.82015070, 23.17882135, 307.36768407}; + constexpr double bs[] = { -2.29434733, 13.27689788, -14.26828904, 63.45318330 }; // Enforce that NormalizeToPawnValue corresponds to a 50% win rate at ply 64 static_assert(UCI::NormalizeToPawnValue == int(as[0] + as[1] + as[2] + as[3])); @@ -323,8 +323,13 @@ string UCI::value(Value v) { stringstream ss; - if (abs(v) < VALUE_MATE_IN_MAX_PLY) + if (abs(v) < VALUE_TB_WIN_IN_MAX_PLY) ss << "cp " << v * 100 / NormalizeToPawnValue; + else if (abs(v) < VALUE_MATE_IN_MAX_PLY) + { + const int ply = VALUE_MATE_IN_MAX_PLY - 1 - std::abs(v); // recompute ss->ply + ss << "cp " << (v > 0 ? 20000 - ply : -20000 + ply); + } else ss << "mate " << (v > 0 ? VALUE_MATE - v + 1 : -VALUE_MATE - v) / 2; @@ -362,15 +367,15 @@ std::string UCI::square(Square s) { string UCI::move(Move m, bool chess960) { - Square from = from_sq(m); - Square to = to_sq(m); - if (m == MOVE_NONE) return "(none)"; if (m == MOVE_NULL) return "0000"; + Square from = from_sq(m); + Square to = to_sq(m); + if (type_of(m) == CASTLING && !chess960) to = make_square(to > from ? FILE_G : FILE_C, rank_of(from)); diff --git a/src/uci.h b/src/uci.h index 3b5a6764..8f1be00c 100644 --- a/src/uci.h +++ b/src/uci.h @@ -1,6 +1,6 @@ /* Stockfish, a UCI chess playing engine derived from Glaurung 2.1 - Copyright (C) 2004-2022 The Stockfish developers (see AUTHORS file) + Copyright (C) 2004-2023 The Stockfish developers (see AUTHORS file) Stockfish is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -35,7 +35,7 @@ namespace UCI { // the win_rate_model() such that Stockfish outputs an advantage of // "100 centipawns" for a position if the engine has a 50% probability to win // from this position in selfplay at fishtest LTC time control. -const int NormalizeToPawnValue = 361; +const int NormalizeToPawnValue = 328; class Option; @@ -45,12 +45,12 @@ struct CaseInsensitiveLess { }; /// The options container is defined as a std::map -typedef std::map OptionsMap; +using OptionsMap = std::map; /// The Option class implements each option as specified by the UCI protocol class Option { - typedef void (*OnChange)(const Option&); + using OnChange = void (*)(const Option&); public: Option(OnChange = nullptr); @@ -61,7 +61,7 @@ public: Option& operator=(const std::string&); void operator<<(const Option&); - operator double() const; + operator int() const; operator std::string() const; bool operator==(const char*) const; diff --git a/src/ucioption.cpp b/src/ucioption.cpp index 9fb48345..f6342e5c 100644 --- a/src/ucioption.cpp +++ b/src/ucioption.cpp @@ -1,6 +1,6 @@ /* Stockfish, a UCI chess playing engine derived from Glaurung 2.1 - Copyright (C) 2004-2022 The Stockfish developers (see AUTHORS file) + Copyright (C) 2004-2023 The Stockfish developers (see AUTHORS file) Stockfish is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -38,13 +38,13 @@ UCI::OptionsMap Options; // Global object namespace UCI { /// 'On change' actions, triggered by an option's value change -void on_clear_hash(const Option&) { Search::clear(); } -void on_hash_size(const Option& o) { TT.resize(size_t(o)); } -void on_logger(const Option& o) { start_logger(o); } -void on_threads(const Option& o) { Threads.set(size_t(o)); } -void on_tb_path(const Option& o) { Tablebases::init(o); } -void on_use_NNUE(const Option& ) { Eval::NNUE::init(); } -void on_eval_file(const Option& ) { Eval::NNUE::init(); } +static void on_clear_hash(const Option&) { Search::clear(); } +static void on_hash_size(const Option& o) { TT.resize(size_t(o)); } +static void on_logger(const Option& o) { start_logger(o); } +static void on_threads(const Option& o) { Threads.set(size_t(o)); } +static void on_tb_path(const Option& o) { Tablebases::init(o); } +static void on_use_NNUE(const Option&) { Eval::NNUE::init(); } +static void on_eval_file(const Option&) { Eval::NNUE::init(); } /// Our case insensitive less() function as required by UCI protocol bool CaseInsensitiveLess::operator() (const string& s1, const string& s2) const { @@ -73,7 +73,7 @@ void init(OptionsMap& o) { o["UCI_Chess960"] << Option(false); o["UCI_AnalyseMode"] << Option(false); o["UCI_LimitStrength"] << Option(false); - o["UCI_Elo"] << Option(1350, 1350, 2850); + o["UCI_Elo"] << Option(1320, 1320, 3190); o["UCI_ShowWDL"] << Option(false); o["SyzygyPath"] << Option("", on_tb_path); o["SyzygyProbeDepth"] << Option(1, 1, 100); @@ -128,9 +128,9 @@ Option::Option(double v, int minv, int maxv, OnChange f) : type("spin"), min(min Option::Option(const char* v, const char* cur, OnChange f) : type("combo"), min(0), max(0), on_change(f) { defaultValue = v; currentValue = cur; } -Option::operator double() const { +Option::operator int() const { assert(type == "check" || type == "spin"); - return (type == "spin" ? stof(currentValue) : currentValue == "true"); + return (type == "spin" ? std::stoi(currentValue) : currentValue == "true"); } Option::operator std::string() const { diff --git a/tests/instrumented.sh b/tests/instrumented.sh index e9455eab..1b37c7a8 100755 --- a/tests/instrumented.sh +++ b/tests/instrumented.sh @@ -70,7 +70,8 @@ for args in "eval" \ "go depth 10" \ "go movetime 1000" \ "go wtime 8000 btime 8000 winc 500 binc 500" \ - "bench 128 $threads 8 default depth" + "bench 128 $threads 8 default depth" \ + "export_net verify.nnue" do echo "$prefix $exeprefix ./stockfish $args $postfix" @@ -78,6 +79,11 @@ do done +# verify the generated net equals the base net +network=`./stockfish uci | grep 'option name EvalFile type string default' | awk '{print $NF}'` +echo "Comparing $network to the written verify.nnue" +diff $network verify.nnue + # more general testing, following an uci protocol exchange cat << EOF > game.exp set timeout 240