FutilityValue formula tweak

Passed STC: LLR: 2.95 (-2.94,2.94) <0.00,2.00> Total: 29600 W: 7979 L: 7662 D: 13959 Ptnml(0-2): 138, 3446, 7324, 3745, 147 https://tests.stockfishchess.org/tests/view/67ac7dff52879dfd14d7e7da Passed LTC: LLR: 2.95 (-2.94,2.94) <0.50,2.50> Total: 49662 W: 12850 L: 12502 D: 24310 Ptnml(0-2): 41, 5354, 13689, 5710, 37 https://tests.stockfishchess.org/tests/view/67acc1b252879dfd14d7e81d closes https://github.com/official-stockfish/Stockfish/pull/5879 Bench: 2581469
Decrease lmr depth if static eval decreases a lot
2026-05-20 07:27:46 +00:00 · 2025-02-13 20:18:35 +01:00 · 2025-02-13 20:17:26 +01:00 · 2025-02-13 19:52:06 +01:00 · 2025-02-13 19:49:54 +01:00 · 2025-02-13 19:46:53 +01:00
112 changed files with 18619 additions and 12919 deletions
@@ -0,0 +1,44 @@
+AccessModifierOffset: -1
+AlignAfterOpenBracket: Align
+AlignConsecutiveAssignments: Consecutive
+AlignConsecutiveDeclarations: Consecutive
+AlignEscapedNewlines: DontAlign
+AlignOperands: AlignAfterOperator
+AlignTrailingComments: true
+AllowAllParametersOfDeclarationOnNextLine: true
+AllowShortCaseLabelsOnASingleLine: false
+AllowShortEnumsOnASingleLine: false
+AllowShortIfStatementsOnASingleLine: false
+AlwaysBreakTemplateDeclarations: Yes
+BasedOnStyle: WebKit
+BitFieldColonSpacing: After
+BinPackParameters: false
+BreakBeforeBinaryOperators: NonAssignment
+BreakBeforeBraces: Custom
+BraceWrapping:
+  AfterFunction: false 
+  AfterClass: false
+  AfterControlStatement: true
+  BeforeElse: true
+BreakBeforeTernaryOperators: true
+BreakConstructorInitializers: AfterColon
+BreakStringLiterals: false
+ColumnLimit: 100
+ContinuationIndentWidth: 2
+Cpp11BracedListStyle: true
+IndentGotoLabels: false
+IndentPPDirectives: BeforeHash
+IndentWidth: 4
+MaxEmptyLinesToKeep: 2
+NamespaceIndentation: None
+PackConstructorInitializers: Never
+ReflowComments: false
+SortIncludes: false
+SortUsingDeclarations: false
+SpaceAfterCStyleCast: true
+SpaceAfterTemplateKeyword: false
+SpaceBeforeCaseColon: true
+SpaceBeforeCpp11BracedList: false
+SpaceBeforeInheritanceColon: false
+SpaceInEmptyBlock: false
+SpacesBeforeTrailingComments: 2
@@ -0,0 +1,7 @@
+# .git-blame-ignore-revs
+# Ignore commit which added clang-format
+2d0237db3f0e596fb06e3ffbadba84dcc4e018f6
+
+# Post commit formatting fixes
+0fca5605fa2e5e7240fde5e1aae50952b2612231
+08ed4c90db31959521b7ef3186c026edd1e90307
@@ -0,0 +1,65 @@
+name: Report issue
+description: Create a report to help us fix issues with the engine
+body:
+- type: textarea
+  attributes:
+    label: Describe the issue
+    description: A clear and concise description of what you're experiencing.
+  validations:
+    required: true
+
+- type: textarea
+  attributes:
+    label: Expected behavior
+    description: A clear and concise description of what you expected to happen.
+  validations:
+    required: true
+
+- type: textarea
+  attributes:
+    label: Steps to reproduce
+    description: |
+      Steps to reproduce the behavior.
+      You can also use this section to paste the command line output.
+    placeholder: |
+      ```
+      position startpos moves g2g4 e7e5 f2f3
+      go mate 1
+      info string NNUE evaluation using nn-6877cd24400e.nnue enabled
+      info depth 1 seldepth 1 multipv 1 score mate 1 nodes 33 nps 11000 tbhits 0 time 3 pv d8h4
+      bestmove d8h4
+      ```
+  validations:
+    required: true
+
+- type: textarea
+  attributes:
+    label: Anything else?
+    description: |
+      Anything that will give us more context about the issue you are encountering.
+      You can also use this section to propose ideas on how to solve the issue. 
+  validations:
+    required: false
+
+- type: dropdown
+  attributes:
+    label: Operating system
+    options:
+      - All
+      - Windows
+      - Linux
+      - MacOS
+      - Android
+      - Other or N/A
+  validations:
+    required: true
+
+- type: input
+  attributes:
+    label: Stockfish version
+    description: |
+      This can be found by running the engine.
+      You can also use the commit ID.
+    placeholder: Stockfish 15 / e6e324e
+  validations:
+    required: true
@@ -0,0 +1,8 @@
+blank_issues_enabled: false
+contact_links:
+  - name: Discord server
+    url: https://discord.gg/GWDRS3kU6R
+    about: Feel free to ask for support or have a chat with us on our Discord server!
+  - name: Discussions, Q&A, ideas, show us something...
+    url: https://github.com/official-stockfish/Stockfish/discussions/new
+    about: Do you have an idea for Stockfish? Do you want to show something that you made? Please open a discussion about it!
@@ -0,0 +1,51 @@
+{
+  "config": [
+    {
+      "name": "Android NDK aarch64",
+      "os": "ubuntu-22.04",
+      "simple_name": "android",
+      "compiler": "aarch64-linux-android21-clang++",
+      "emu": "qemu-aarch64",
+      "comp": "ndk",
+      "shell": "bash",
+      "archive_ext": "tar"
+    },
+    {
+      "name": "Android NDK arm",
+      "os": "ubuntu-22.04",
+      "simple_name": "android",
+      "compiler": "armv7a-linux-androideabi21-clang++",
+      "emu": "qemu-arm",
+      "comp": "ndk",
+      "shell": "bash",
+      "archive_ext": "tar"
+    }
+  ],
+  "binaries": ["armv8-dotprod", "armv8", "armv7", "armv7-neon"],
+  "exclude": [
+    {
+      "binaries": "armv8-dotprod",
+      "config": {
+        "compiler": "armv7a-linux-androideabi21-clang++"
+      }
+    },
+    {
+      "binaries": "armv8",
+      "config": {
+        "compiler": "armv7a-linux-androideabi21-clang++"
+      }
+    },
+    {
+      "binaries": "armv7",
+      "config": {
+        "compiler": "aarch64-linux-android21-clang++"
+      }
+    },
+    {
+      "binaries": "armv7-neon",
+      "config": {
+        "compiler": "aarch64-linux-android21-clang++"
+      }
+    }
+  ]
+}
@@ -0,0 +1,22 @@
+[
+    # Mappings for libcxx's internal headers
+    { include: [ "<__fwd/fstream.h>", private, "<iosfwd>", public ] },
+    { include: [ "<__fwd/ios.h>", private, "<iosfwd>", public ] },
+    { include: [ "<__fwd/istream.h>", private, "<iosfwd>", public ] },
+    { include: [ "<__fwd/ostream.h>", private, "<iosfwd>", public ] },
+    { include: [ "<__fwd/sstream.h>", private, "<iosfwd>", public ] },
+    { include: [ "<__fwd/streambuf.h>", private, "<iosfwd>", public ] },
+    { include: [ "<__fwd/string_view.h>", private, "<string_view>", public ] },
+    { include: [ "<__system_error/errc.h>", private, "<system_error>", public ] },
+
+    # Mappings for includes between public headers
+    { include: [ "<ios>", public, "<iostream>", public ] },
+    { include: [ "<streambuf>", public, "<iostream>", public ] },
+    { include: [ "<istream>", public, "<iostream>", public ] },
+    { include: [ "<ostream>", public, "<iostream>", public ] },
+    { include: [ "<iosfwd>", public, "<iostream>", public ] },
+
+    # Missing mappings in include-what-you-use's libcxx.imp
+    { include: ["@<__condition_variable/.*>", private, "<condition_variable>", public ] },
+    { include: ["@<__mutex/.*>", private, "<mutex>", public ] },
+]
@@ -0,0 +1,160 @@
+{
+  "config": [
+    {
+      "name": "Ubuntu 22.04 GCC",
+      "os": "ubuntu-22.04",
+      "simple_name": "ubuntu",
+      "compiler": "g++",
+      "comp": "gcc",
+      "shell": "bash",
+      "archive_ext": "tar",
+      "sde": "/home/runner/work/Stockfish/Stockfish/.output/sde-temp-files/sde-external-9.27.0-2023-09-13-lin/sde -future --"
+    },
+    {
+      "name": "MacOS 13 Apple Clang",
+      "os": "macos-13",
+      "simple_name": "macos",
+      "compiler": "clang++",
+      "comp": "clang",
+      "shell": "bash",
+      "archive_ext": "tar"
+    },
+    {
+      "name": "MacOS 14 Apple Clang M1",
+      "os": "macos-14",
+      "simple_name": "macos-m1",
+      "compiler": "clang++",
+      "comp": "clang",
+      "shell": "bash",
+      "archive_ext": "tar"
+    },
+    {
+      "name": "Windows 2022 Mingw-w64 GCC x86_64",
+      "os": "windows-2022",
+      "simple_name": "windows",
+      "compiler": "g++",
+      "comp": "mingw",
+      "msys_sys": "mingw64",
+      "msys_env": "x86_64-gcc",
+      "shell": "msys2 {0}",
+      "ext": ".exe",
+      "sde": "/d/a/Stockfish/Stockfish/.output/sde-temp-files/sde-external-9.27.0-2023-09-13-win/sde.exe -future --",
+      "archive_ext": "zip"
+    }
+  ],
+  "binaries": [
+    "x86-64",
+    "x86-64-sse41-popcnt",
+    "x86-64-avx2",
+    "x86-64-bmi2",
+    "x86-64-avxvnni",
+    "x86-64-avx512",
+    "x86-64-vnni256",
+    "x86-64-vnni512",
+    "apple-silicon"
+  ],
+  "exclude": [
+    {
+      "binaries": "x86-64",
+      "config": {
+        "os": "macos-14"
+      }
+    },
+    {
+      "binaries": "x86-64-sse41-popcnt",
+      "config": {
+        "os": "macos-14"
+      }
+    },
+    {
+      "binaries": "x86-64-avx2",
+      "config": {
+        "os": "macos-14"
+      }
+    },
+    {
+      "binaries": "x86-64-bmi2",
+      "config": {
+        "os": "macos-14"
+      }
+    },
+    {
+      "binaries": "x86-64-avxvnni",
+      "config": {
+        "os": "macos-14"
+      }
+    },
+    {
+      "binaries": "x86-64-avxvnni",
+      "config": {
+        "os": "macos-14"
+      }
+    },
+    {
+      "binaries": "x86-64-avx512",
+      "config": {
+        "os": "macos-14"
+      }
+    },
+    {
+      "binaries": "x86-64-vnni256",
+      "config": {
+        "os": "macos-14"
+      }
+    },
+    {
+      "binaries": "x86-64-vnni512",
+      "config": {
+        "os": "macos-14"
+      }
+    },
+    {
+      "binaries": "x86-64-avxvnni",
+      "config": {
+        "ubuntu-22.04": null
+      }
+    },
+    {
+      "binaries": "x86-64-avxvnni",
+      "config": {
+        "os": "macos-13"
+      }
+    },
+    {
+      "binaries": "x86-64-avx512",
+      "config": {
+        "os": "macos-13"
+      }
+    },
+    {
+      "binaries": "x86-64-vnni256",
+      "config": {
+        "os": "macos-13"
+      }
+    },
+    {
+      "binaries": "x86-64-vnni512",
+      "config": {
+        "os": "macos-13"
+      }
+    },
+    {
+      "binaries": "apple-silicon",
+      "config": {
+        "os": "windows-2022"
+      }
+    },
+    {
+      "binaries": "apple-silicon",
+      "config": {
+        "os": "macos-13"
+      }
+    },
+    {
+      "binaries": "apple-silicon",
+      "config": {
+        "os": "ubuntu-22.04"
+      }
+    }
+  ]
+}
@@ -0,0 +1,98 @@
+name: Compilation
+on:
+  workflow_call:
+    inputs:
+      matrix:
+        type: string
+        required: true
+jobs:
+  Compilation:
+    name: ${{ matrix.config.name }} ${{ matrix.binaries }}
+    runs-on: ${{ matrix.config.os }}
+    env:
+      COMPCXX: ${{ matrix.config.compiler }}
+      COMP: ${{ matrix.config.comp }}
+      EMU: ${{ matrix.config.emu }}
+      EXT: ${{ matrix.config.ext }}
+      BINARY: ${{ matrix.binaries }}
+    strategy:
+      fail-fast: false
+      matrix: ${{ fromJson(inputs.matrix) }}
+    defaults:
+      run:
+        working-directory: src
+        shell: ${{ matrix.config.shell }}
+    steps:
+      - uses: actions/checkout@v4
+        with:
+          fetch-depth: 0
+          persist-credentials: false
+
+      - name: Download required linux packages
+        if: runner.os == 'Linux'
+        run: |
+          sudo apt update
+          sudo apt install qemu-user
+
+      - name: Install NDK
+        if: runner.os == 'Linux'
+        run: |
+          if [ $COMP == ndk ]; then
+            NDKV="21.4.7075529"
+            ANDROID_ROOT=/usr/local/lib/android
+            ANDROID_SDK_ROOT=$ANDROID_ROOT/sdk
+            SDKMANAGER=$ANDROID_SDK_ROOT/cmdline-tools/latest/bin/sdkmanager
+            echo "y" | $SDKMANAGER "ndk;$NDKV"
+            ANDROID_NDK_ROOT=$ANDROID_SDK_ROOT/ndk/$NDKV
+            ANDROID_NDK_BIN=$ANDROID_NDK_ROOT/toolchains/llvm/prebuilt/linux-x86_64/bin
+            echo "ANDROID_NDK_BIN=$ANDROID_NDK_BIN" >> $GITHUB_ENV
+          fi
+
+      - name: Extract the bench number from the commit history
+        run: |
+          for hash in $(git rev-list -100 HEAD); do
+            benchref=$(git show -s $hash | tac | grep -m 1 -o -x '[[:space:]]*\b[Bb]ench[ :]\+[1-9][0-9]\{5,7\}\b[[:space:]]*' | sed 's/[^0-9]//g') && break || true
+          done
+          [[ -n "$benchref" ]] && echo "benchref=$benchref" >> $GITHUB_ENV && echo "From commit: $hash" && echo "Reference bench: $benchref" || echo "No bench found"
+
+      - name: Download the used network from the fishtest framework
+        run: make net
+
+      - name: Check compiler
+        run: |
+          if [ $COMP == ndk ]; then
+            export PATH=${{ env.ANDROID_NDK_BIN }}:$PATH
+          fi
+          $COMPCXX -v
+
+      - name: Test help target
+        run: make help
+
+      - name: Check git
+        run: git --version
+
+      # Compile profile guided builds
+
+      - name: Compile ${{ matrix.binaries }} build
+        run: |
+          if [ $COMP == ndk ]; then
+            export PATH=${{ env.ANDROID_NDK_BIN }}:$PATH
+            export LDFLAGS="-static -Wno-unused-command-line-argument"
+          fi
+          make clean
+          make -j4 profile-build ARCH=$BINARY COMP=$COMP WINE_PATH=$EMU
+          make strip ARCH=$BINARY COMP=$COMP
+          WINE_PATH=$EMU ../tests/signature.sh $benchref
+          mv ./stockfish$EXT ../stockfish-android-$BINARY$EXT
+
+      - name: Remove non src files
+        run: git clean -fx
+
+      - name: Upload artifact for (pre)-release
+        uses: actions/upload-artifact@v4
+        with:
+          name: ${{ matrix.config.simple_name }} ${{ matrix.binaries }}
+          path: |
+            .
+            !.git
+            !.output
@@ -0,0 +1,57 @@
+# This workflow will run clang-format and comment on the PR.
+# Because of security reasons, it is crucial that this workflow
+# executes no shell script nor runs make.
+# Read this before editing: https://securitylab.github.com/research/github-actions-preventing-pwn-requests/
+
+name: Clang-Format
+on:
+  pull_request_target:
+    branches:
+      - "master"
+    paths:
+      - "**.cpp"
+      - "**.h"
+
+permissions:
+  pull-requests: write
+
+jobs:
+  Clang-Format:
+    name: Clang-Format
+    runs-on: ubuntu-22.04
+    steps:
+      - uses: actions/checkout@v4
+        with:
+          ref: ${{ github.event.pull_request.head.sha }}
+
+      - name: Run clang-format style check
+        uses: jidicula/clang-format-action@f62da5e3d3a2d88ff364771d9d938773a618ab5e # @v4.11.0
+        id: clang-format
+        continue-on-error: true
+        with:
+          clang-format-version: "18"
+          exclude-regex: "incbin"
+
+      - name: Comment on PR
+        if: steps.clang-format.outcome == 'failure'
+        uses: thollander/actions-comment-pull-request@fabd468d3a1a0b97feee5f6b9e499eab0dd903f6 # @v2.5.0
+        with:
+          message: |
+            clang-format 18 needs to be run on this PR.
+            If you do not have clang-format installed, the maintainer will run it when merging.
+            For the exact version please see https://packages.ubuntu.com/noble/clang-format-18.
+
+            _(execution **${{ github.run_id }}** / attempt **${{ github.run_attempt }}**)_
+          comment_tag: execution
+          GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
+
+      - name: Comment on PR
+        if: steps.clang-format.outcome != 'failure'
+        uses: thollander/actions-comment-pull-request@fabd468d3a1a0b97feee5f6b9e499eab0dd903f6 # @v2.5.0
+        with:
+          message: |
+            _(execution **${{ github.run_id }}** / attempt **${{ github.run_attempt }}**)_
+          create_if_not_exists: false
+          comment_tag: execution
+          mode: delete
+          GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
@@ -0,0 +1,55 @@
+name: "CodeQL"
+
+on:
+  push:
+    branches: ["master"]
+  pull_request:
+    # The branches below must be a subset of the branches above
+    branches: ["master"]
+  schedule:
+    - cron: "17 18 * * 1"
+
+jobs:
+  analyze:
+    name: Analyze
+    runs-on: ubuntu-latest
+    permissions:
+      actions: read
+      contents: read
+      security-events: write
+
+    strategy:
+      fail-fast: false
+      matrix:
+        language: ["cpp"]
+        # CodeQL supports [ 'cpp', 'csharp', 'go', 'java', 'javascript', 'python', 'ruby' ]
+        # Use only 'java' to analyze code written in Java, Kotlin, or both
+        # Use only 'javascript' to analyze code written in JavaScript, TypeScript or both
+        # Learn more about CodeQL language support at https://aka.ms/codeql-docs/language-support
+
+    steps:
+      - name: Checkout repository
+        uses: actions/checkout@v4
+        with:
+          persist-credentials: false
+
+      # Initializes the CodeQL tools for scanning.
+      - name: Initialize CodeQL
+        uses: github/codeql-action/init@v3
+        with:
+          languages: ${{ matrix.language }}
+          # If you wish to specify custom queries, you can do so here or in a config file.
+          # By default, queries listed here will override any specified in a config file.
+          # Prefix the list here with "+" to use these queries and those in the config file.
+
+          # For more details on CodeQL's query packs, refer to: https://docs.github.com/en/code-security/code-scanning/automatically-scanning-your-code-for-vulnerabilities-and-errors/configuring-code-scanning#using-queries-in-ql-packs
+          # queries: security-extended,security-and-quality
+
+      - name: Build
+        working-directory: src
+        run: make -j build ARCH=x86-64-modern
+
+      - name: Perform CodeQL Analysis
+        uses: github/codeql-action/analyze@v3
+        with:
+          category: "/language:${{matrix.language}}"
@@ -0,0 +1,94 @@
+name: Compilation
+on:
+  workflow_call:
+    inputs:
+      matrix:
+        type: string
+        required: true
+jobs:
+  Compilation:
+    name: ${{ matrix.config.name }} ${{ matrix.binaries }}
+    runs-on: ${{ matrix.config.os }}
+    env:
+      COMPCXX: ${{ matrix.config.compiler }}
+      COMP: ${{ matrix.config.comp }}
+      EXT: ${{ matrix.config.ext }}
+      NAME: ${{ matrix.config.simple_name }}
+      BINARY: ${{ matrix.binaries }}
+      SDE: ${{ matrix.config.sde }}
+    strategy:
+      fail-fast: false
+      matrix: ${{ fromJson(inputs.matrix) }}
+    defaults:
+      run:
+        working-directory: src
+        shell: ${{ matrix.config.shell }}
+    steps:
+      - uses: actions/checkout@v4
+        with:
+          persist-credentials: false
+
+      - name: Install fixed GCC on Linux
+        if: runner.os == 'Linux'
+        uses: egor-tensin/setup-gcc@eaa888eb19115a521fa72b65cd94fe1f25bbcaac # @v1.3
+        with:
+          version: 11
+
+      - name: Setup msys and install required packages
+        if: runner.os == 'Windows'
+        uses: msys2/setup-msys2@v2
+        with:
+          msystem: ${{ matrix.config.msys_sys }}
+          install: mingw-w64-${{ matrix.config.msys_env }} make git zip
+
+      - name: Download SDE package
+        if: runner.os == 'Linux' || runner.os == 'Windows'
+        uses: petarpetrovt/setup-sde@91a1a03434384e064706634125a15f7446d2aafb # @v2.3
+        with:
+          environmentVariableName: SDE_DIR
+          sdeVersion: 9.27.0
+
+      - name: Download the used network from the fishtest framework
+        run: make net
+
+      - name: Check compiler
+        run: $COMPCXX -v
+
+      - name: Test help target
+        run: make help
+
+      - name: Check git
+        run: git --version
+
+      - name: Check compiler
+        run: $COMPCXX -v
+
+      - name: Show g++ cpu info
+        if: runner.os != 'macOS'
+        run: g++ -Q -march=native --help=target
+
+      - name: Show clang++ cpu info
+        if: runner.os == 'macOS'
+        run: clang++ -E - -march=native -###
+
+      # x86-64 with newer extensions tests
+
+      - name: Compile ${{ matrix.config.binaries }} build
+        run: |
+          make clean
+          make -j4 profile-build ARCH=$BINARY COMP=$COMP WINE_PATH="$SDE"
+          make strip ARCH=$BINARY COMP=$COMP
+          WINE_PATH="$SDE" ../tests/signature.sh $benchref
+          mv ./stockfish$EXT ../stockfish-$NAME-$BINARY$EXT
+
+      - name: Remove non src files
+        run: git clean -fx
+
+      - name: Upload artifact for (pre)-release
+        uses: actions/upload-artifact@v4
+        with:
+          name: ${{ matrix.config.simple_name }} ${{ matrix.binaries }}
+          path: |
+             .
+             !.git
+             !.output
@@ -0,0 +1,43 @@
+# This workflow will play games with a debug enabled SF using the PR
+
+name: Games
+on:
+  workflow_call:
+jobs:
+  Matetrack:
+    name: Games
+    runs-on: ubuntu-22.04
+    steps:
+      - name: Checkout SF repo 
+        uses: actions/checkout@v4
+        with:
+          ref: ${{ github.event.pull_request.head.sha }}
+          path: Stockfish
+          persist-credentials: false
+
+      - name: build debug enabled version of SF
+        working-directory: Stockfish/src
+        run: make -j build debug=yes
+
+      - name: Checkout fastchess repo
+        uses: actions/checkout@v4
+        with:
+          repository: Disservin/fastchess
+          path: fastchess
+          ref: 894616028492ae6114835195f14a899f6fa237d3
+          persist-credentials: false
+
+      - name: fastchess build
+        working-directory: fastchess
+        run: make -j
+
+      - name: Run games
+        working-directory: fastchess
+        run: |
+          ./fastchess -rounds 4 -games 2 -repeat -concurrency 4 -openings file=app/tests/data/openings.epd format=epd order=random -srand $RANDOM\
+               -engine name=sf1 cmd=/home/runner/work/Stockfish/Stockfish/Stockfish/src/stockfish\
+               -engine name=sf2 cmd=/home/runner/work/Stockfish/Stockfish/Stockfish/src/stockfish\
+               -ratinginterval 1 -report penta=true -each proto=uci tc=4+0.04 -log file=fast.log | tee fast.out
+          cat fast.log
+          ! grep "Assertion" fast.log > /dev/null
+          ! grep "disconnect" fast.out > /dev/null
@@ -0,0 +1,49 @@
+name: IWYU
+on:
+  workflow_call:
+jobs:
+  Analyzers:
+    name: Check includes
+    runs-on: ubuntu-22.04
+    defaults:
+      run:
+        working-directory: Stockfish/src
+        shell: bash
+    steps:
+      - name: Checkout Stockfish
+        uses: actions/checkout@v4
+        with:
+          path: Stockfish
+          persist-credentials: false
+
+      - name: Checkout include-what-you-use
+        uses: actions/checkout@v4
+        with:
+          repository: include-what-you-use/include-what-you-use
+          ref: f25caa280dc3277c4086ec345ad279a2463fea0f
+          path: include-what-you-use
+          persist-credentials: false
+
+      - name: Download required linux packages
+        run: |
+          sudo add-apt-repository 'deb http://apt.llvm.org/jammy/ llvm-toolchain-jammy-17 main'
+          wget -O - https://apt.llvm.org/llvm-snapshot.gpg.key | sudo apt-key add -
+          sudo apt update
+          sudo apt install -y libclang-17-dev clang-17 libc++-17-dev
+
+      - name: Set up include-what-you-use
+        run: |
+          mkdir build && cd build
+          cmake -G "Unix Makefiles" -DCMAKE_PREFIX_PATH="/usr/lib/llvm-17" ..
+          sudo make install
+        working-directory: include-what-you-use
+
+      - name: Check include-what-you-use
+        run: include-what-you-use --version
+
+      - name: Check includes
+        run: >
+          make analyze
+          COMP=clang
+          CXX=include-what-you-use
+          CXXFLAGS="-stdlib=libc++ -Xiwyu --comment_style=long -Xiwyu --mapping='${{ github.workspace }}/Stockfish/.github/ci/libcxx17.imp' -Xiwyu --error"
@@ -0,0 +1,71 @@
+# This workflow will run matetrack on the PR
+
+name: Matetrack
+on:
+  workflow_call:
+jobs:
+  Matetrack:
+    name: Matetrack
+    runs-on: ubuntu-22.04
+    steps:
+      - name: Checkout SF repo 
+        uses: actions/checkout@v4
+        with:
+          ref: ${{ github.event.pull_request.head.sha }}
+          path: Stockfish
+          persist-credentials: false
+
+      - name: build SF
+        working-directory: Stockfish/src
+        run: make -j profile-build
+
+      - name: Checkout matetrack repo
+        uses: actions/checkout@v4
+        with:
+          repository: vondele/matetrack
+          path: matetrack
+          ref: 4f8a80860ed8f3607f05a9195df8b40203bdc360
+          persist-credentials: false
+
+      - name: matetrack install deps
+        working-directory: matetrack
+        run: pip install -r requirements.txt
+
+      - name: cache syzygy
+        id: cache-syzygy
+        uses: actions/cache@v4
+        with:
+           path: |
+              matetrack/3-4-5-wdl/
+              matetrack/3-4-5-dtz/
+           key: key-syzygy
+
+      - name: download syzygy 3-4-5 if needed
+        working-directory: matetrack
+        if: steps.cache-syzygy.outputs.cache-hit != 'true'
+        run: |
+          wget --no-verbose -r -nH --cut-dirs=2 --no-parent --reject="index.html*" -e robots=off https://tablebase.lichess.ovh/tables/standard/3-4-5-wdl/
+          wget --no-verbose -r -nH --cut-dirs=2 --no-parent --reject="index.html*" -e robots=off https://tablebase.lichess.ovh/tables/standard/3-4-5-dtz/
+
+      - name: Run matetrack
+        working-directory: matetrack
+        run: |
+          python matecheck.py --syzygyPath 3-4-5-wdl/:3-4-5-dtz/ --engine /home/runner/work/Stockfish/Stockfish/Stockfish/src/stockfish --epdFile mates2000.epd --nodes 100000 | tee matecheckout.out
+          ! grep "issues were detected" matecheckout.out > /dev/null
+
+      - name: Run matetrack with --syzygy50MoveRule false
+        working-directory: matetrack
+        run: |
+          grep 5men cursed.epd > cursed5.epd
+          python matecheck.py --syzygyPath 3-4-5-wdl/:3-4-5-dtz/ --engine /home/runner/work/Stockfish/Stockfish/Stockfish/src/stockfish --epdFile cursed5.epd --nodes 100000 --syzygy50MoveRule false | tee matecheckcursed.out
+          ! grep "issues were detected" matecheckcursed.out > /dev/null
+
+      - name: Verify mate and TB win count for matecheckcursed.out
+        working-directory: matetrack
+        run: |
+          mates=$(grep "Found mates:" matecheckcursed.out | awk '{print $3}')
+          tbwins=$(grep "Found TB wins:" matecheckcursed.out | awk '{print $4}')
+          if [ $(($mates + $tbwins)) -ne 32 ]; then
+            echo "Sum of mates and TB wins is not 32 in matecheckcursed.out" >&2
+            exit 1
+          fi
@@ -0,0 +1,87 @@
+name: Sanitizers
+on:
+  workflow_call:
+jobs:
+  Test-under-sanitizers:
+    name: ${{ matrix.sanitizers.name }}
+    runs-on: ${{ matrix.config.os }}
+    env:
+      COMPCXX: ${{ matrix.config.compiler }}
+      COMP: ${{ matrix.config.comp }}
+      CXXFLAGS: "-Werror"
+    strategy:
+      fail-fast: false
+      matrix:
+        config:
+          - name: Ubuntu 22.04 GCC
+            os: ubuntu-22.04
+            compiler: g++
+            comp: gcc
+            shell: bash
+        sanitizers:
+          - name: Run with thread sanitizer
+            make_option: sanitize=thread
+            cxx_extra_flags: ""
+            instrumented_option: sanitizer-thread
+          - name: Run with UB sanitizer
+            make_option: sanitize=undefined
+            cxx_extra_flags: ""
+            instrumented_option: sanitizer-undefined
+          - name: Run under valgrind
+            make_option: ""
+            cxx_extra_flags: ""
+            instrumented_option: valgrind
+          - name: Run under valgrind-thread
+            make_option: ""
+            cxx_extra_flags: ""
+            instrumented_option: valgrind-thread
+          - name: Run non-instrumented
+            make_option: ""
+            cxx_extra_flags: ""
+            instrumented_option: none
+          - name: Run with glibcxx assertions
+            make_option: ""
+            cxx_extra_flags: -D_GLIBCXX_ASSERTIONS
+            instrumented_option: non
+    defaults:
+      run:
+        working-directory: src
+        shell: ${{ matrix.config.shell }}
+    steps:
+      - uses: actions/checkout@v4
+        with:
+          persist-credentials: false
+
+      - name: Download required linux packages
+        run: |
+          sudo apt update
+          sudo apt install expect valgrind g++-multilib
+
+      - name: Download the used network from the fishtest framework
+        run: make net
+
+      - name: Check compiler
+        run: $COMPCXX -v
+
+      - name: Test help target
+        run: make help
+
+      - name: Check git
+        run: git --version
+
+      # Since Linux Kernel 6.5 we are getting false positives from the ci,
+      # lower the ALSR entropy to disable ALSR, which works as a temporary workaround.
+      # https://github.com/google/sanitizers/issues/1716
+      # https://bugs.launchpad.net/ubuntu/+source/linux/+bug/2056762
+
+      - name: Lower ALSR entropy
+        run: sudo sysctl -w vm.mmap_rnd_bits=28
+
+      # Sanitizers
+
+      - name: ${{ matrix.sanitizers.name }}
+        run: |
+          export CXXFLAGS="-O1 -fno-inline ${{ matrix.sanitizers.cxx_extra_flags }}"
+          make clean
+          make -j4 ARCH=x86-64-sse41-popcnt ${{ matrix.sanitizers.make_option }} debug=yes optimize=no build > /dev/null
+          python3 ../tests/instrumented.py --${{ matrix.sanitizers.instrumented_option }} ./stockfish
@@ -0,0 +1,122 @@
+name: Stockfish
+on:
+  push:
+    tags:
+      - "*"
+    branches:
+      - master
+      - tools
+      - github_ci
+  pull_request:
+    branches:
+      - master
+      - tools
+jobs:
+  Prerelease:
+    if: github.repository == 'official-stockfish/Stockfish' && (github.ref == 'refs/heads/master' || (startsWith(github.ref_name, 'sf_') && github.ref_type == 'tag'))
+    runs-on: ubuntu-latest
+    permissions:
+      contents: write # For deleting/creating a prerelease
+    steps:
+      - uses: actions/checkout@v4
+        with:
+          persist-credentials: false
+
+      # returns null if no pre-release exists
+      - name: Get Commit SHA of Latest Pre-release
+        run: |
+          # Install required packages
+          sudo apt-get update
+          sudo apt-get install -y curl jq
+
+          echo "COMMIT_SHA_TAG=$(jq -r 'map(select(.prerelease)) | first | .tag_name' <<< $(curl -s https://api.github.com/repos/${{ github.repository_owner }}/Stockfish/releases))" >> $GITHUB_ENV
+
+      # delete old previous pre-release and tag
+      - run: gh release delete ${{ env.COMMIT_SHA_TAG }} --cleanup-tag
+        if: env.COMMIT_SHA_TAG != 'null'
+        env:
+          GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
+
+      # Make sure that an old ci that still runs on master doesn't recreate a prerelease
+      - name: Check Pullable Commits
+        id: check_commits
+        run: |
+          git fetch
+          CHANGES=$(git rev-list HEAD..origin/master --count)
+          echo "CHANGES=$CHANGES" >> $GITHUB_ENV
+
+      - name: Get last commit SHA
+        id: last_commit
+        run: echo "COMMIT_SHA=$(git rev-parse HEAD | cut -c 1-8)" >> $GITHUB_ENV
+
+      - name: Get commit date
+        id: commit_date
+        run: echo "COMMIT_DATE=$(git show -s --date=format:'%Y%m%d' --format=%cd HEAD)" >> $GITHUB_ENV
+
+      # Create a new pre-release, the other upload_binaries.yml will upload the binaries
+      # to this pre-release.
+      - name: Create Prerelease
+        if: github.ref_name == 'master' && env.CHANGES == '0'
+        uses: softprops/action-gh-release@4634c16e79c963813287e889244c50009e7f0981
+        with:
+          name: Stockfish dev-${{ env.COMMIT_DATE }}-${{ env.COMMIT_SHA }}
+          tag_name: stockfish-dev-${{ env.COMMIT_DATE }}-${{ env.COMMIT_SHA }}
+          prerelease: true
+
+  Matrix:
+    runs-on: ubuntu-latest
+    outputs:
+      matrix: ${{ steps.set-matrix.outputs.matrix }}
+      arm_matrix: ${{ steps.set-arm-matrix.outputs.arm_matrix }}
+    steps:
+      - uses: actions/checkout@v4
+        with:
+          persist-credentials: false
+      - id: set-matrix
+        run: |
+          TASKS=$(echo $(cat .github/ci/matrix.json) )
+          echo "MATRIX=$TASKS" >> $GITHUB_OUTPUT
+      - id: set-arm-matrix
+        run: |
+          TASKS_ARM=$(echo $(cat .github/ci/arm_matrix.json) )
+          echo "ARM_MATRIX=$TASKS_ARM" >> $GITHUB_OUTPUT
+  Compilation:
+    needs: [Matrix]
+    uses: ./.github/workflows/compilation.yml
+    with:
+      matrix: ${{ needs.Matrix.outputs.matrix }}
+  ARMCompilation:
+    needs: [Matrix]
+    uses: ./.github/workflows/arm_compilation.yml
+    with:
+      matrix: ${{ needs.Matrix.outputs.arm_matrix }}
+  IWYU:
+    uses: ./.github/workflows/iwyu.yml
+  Sanitizers:
+    uses: ./.github/workflows/sanitizers.yml
+  Tests:
+    uses: ./.github/workflows/tests.yml
+  Matetrack:
+    uses: ./.github/workflows/matetrack.yml
+  Games:
+    uses: ./.github/workflows/games.yml
+  Binaries:
+    if: github.repository == 'official-stockfish/Stockfish'
+    needs: [Matrix, Prerelease, Compilation]
+    uses: ./.github/workflows/upload_binaries.yml
+    with:
+      matrix: ${{ needs.Matrix.outputs.matrix }}
+    permissions:
+      contents: write # For deleting/creating a (pre)release
+    secrets:
+      token: ${{ secrets.GITHUB_TOKEN }}
+  ARM_Binaries:
+    if: github.repository == 'official-stockfish/Stockfish'
+    needs: [Matrix, Prerelease, ARMCompilation]
+    uses: ./.github/workflows/upload_binaries.yml
+    with:
+      matrix: ${{ needs.Matrix.outputs.arm_matrix }}
+    permissions:
+      contents: write # For deleting/creating a (pre)release
+    secrets:
+      token: ${{ secrets.GITHUB_TOKEN }}
@@ -0,0 +1,366 @@
+name: Tests
+on:
+  workflow_call:
+jobs:
+  Test-Targets:
+    name: ${{ matrix.config.name }}
+    runs-on: ${{ matrix.config.os }}
+    env:
+      COMPCXX: ${{ matrix.config.compiler }}
+      COMP: ${{ matrix.config.comp }}
+      CXXFLAGS: "-Werror"
+    strategy:
+      fail-fast: false
+      matrix:
+        config:
+          - name: Ubuntu 22.04 GCC
+            os: ubuntu-22.04
+            compiler: g++
+            comp: gcc
+            run_32bit_tests: true
+            run_64bit_tests: true
+            shell: bash
+          - name: Ubuntu 22.04 Clang
+            os: ubuntu-22.04
+            compiler: clang++
+            comp: clang
+            run_32bit_tests: true
+            run_64bit_tests: true
+            shell: bash
+          - name: Android NDK aarch64
+            os: ubuntu-22.04
+            compiler: aarch64-linux-android21-clang++
+            comp: ndk
+            run_armv8_tests: true
+            shell: bash
+          - name: Android NDK arm
+            os: ubuntu-22.04
+            compiler: armv7a-linux-androideabi21-clang++
+            comp: ndk
+            run_armv7_tests: true
+            shell: bash
+          - name: Linux GCC riscv64
+            os: ubuntu-22.04
+            compiler: g++
+            comp: gcc
+            run_riscv64_tests: true
+            base_image: "riscv64/alpine:edge"
+            platform: linux/riscv64
+            shell: bash
+          - name: Linux GCC ppc64
+            os: ubuntu-22.04
+            compiler: g++
+            comp: gcc
+            run_ppc64_tests: true
+            base_image: "ppc64le/alpine:latest"
+            platform: linux/ppc64le
+            shell: bash
+          - name: MacOS 13 Apple Clang
+            os: macos-13
+            compiler: clang++
+            comp: clang
+            run_64bit_tests: true
+            shell: bash
+          - name: MacOS 14 Apple Clang M1
+            os: macos-14
+            compiler: clang++
+            comp: clang
+            run_64bit_tests: false
+            run_m1_tests: true
+            shell: bash
+          - name: MacOS 13 GCC 11
+            os: macos-13
+            compiler: g++-11
+            comp: gcc
+            run_64bit_tests: true
+            shell: bash
+          - name: Windows 2022 Mingw-w64 GCC x86_64
+            os: windows-2022
+            compiler: g++
+            comp: mingw
+            run_64bit_tests: true
+            msys_sys: mingw64
+            msys_env: x86_64-gcc
+            shell: msys2 {0}
+          - name: Windows 2022 Mingw-w64 GCC i686
+            os: windows-2022
+            compiler: g++
+            comp: mingw
+            run_32bit_tests: true
+            msys_sys: mingw32
+            msys_env: i686-gcc
+            shell: msys2 {0}
+          - name: Windows 2022 Mingw-w64 Clang x86_64
+            os: windows-2022
+            compiler: clang++
+            comp: clang
+            run_64bit_tests: true
+            msys_sys: clang64
+            msys_env: clang-x86_64-clang
+            shell: msys2 {0}
+    defaults:
+      run:
+        working-directory: src
+        shell: ${{ matrix.config.shell }}
+    steps:
+      - uses: actions/checkout@v4
+        with:
+          fetch-depth: 0
+          persist-credentials: false
+
+      - name: Download required linux packages
+        if: runner.os == 'Linux'
+        run: |
+          sudo apt update
+          sudo apt install expect valgrind g++-multilib qemu-user-static
+
+      - name: Install NDK
+        if: runner.os == 'Linux'
+        run: |
+          if [ $COMP == ndk ]; then
+            NDKV="21.4.7075529"
+            ANDROID_ROOT=/usr/local/lib/android
+            ANDROID_SDK_ROOT=$ANDROID_ROOT/sdk
+            SDKMANAGER=$ANDROID_SDK_ROOT/cmdline-tools/latest/bin/sdkmanager
+            echo "y" | $SDKMANAGER "ndk;$NDKV"
+            ANDROID_NDK_ROOT=$ANDROID_SDK_ROOT/ndk/$NDKV
+            ANDROID_NDK_BIN=$ANDROID_NDK_ROOT/toolchains/llvm/prebuilt/linux-x86_64/bin
+            echo "ANDROID_NDK_BIN=$ANDROID_NDK_BIN" >> $GITHUB_ENV
+          fi
+
+      - name: Set up QEMU
+        if: matrix.config.base_image
+        uses: docker/setup-qemu-action@v3
+
+      - name: Set up Docker Buildx
+        if: matrix.config.base_image
+        uses: docker/setup-buildx-action@v3
+
+      - name: Build Docker container
+        if: matrix.config.base_image
+        run: |
+          docker buildx build --platform ${{ matrix.config.platform }} --load -t sf_builder - << EOF
+          FROM ${{ matrix.config.base_image }}
+          WORKDIR /app
+          RUN apk update && apk add make g++
+          CMD ["sh", "src/script.sh"]
+          EOF
+
+      - name: Download required macOS packages
+        if: runner.os == 'macOS'
+        run: brew install coreutils gcc@11
+
+      - name: Setup msys and install required packages
+        if: runner.os == 'Windows'
+        uses: msys2/setup-msys2@v2
+        with:
+          msystem: ${{ matrix.config.msys_sys }}
+          install: mingw-w64-${{ matrix.config.msys_env }} make git expect
+
+      - name: Download the used network from the fishtest framework
+        run: make net
+
+      - name: Extract the bench number from the commit history
+        run: |
+          for hash in $(git rev-list -100 HEAD); do
+            benchref=$(git show -s $hash | tac | grep -m 1 -o -x '[[:space:]]*\b[Bb]ench[ :]\+[1-9][0-9]\{5,7\}\b[[:space:]]*' | sed 's/[^0-9]//g') && break || true
+          done
+          [[ -n "$benchref" ]] && echo "benchref=$benchref" >> $GITHUB_ENV && echo "From commit: $hash" && echo "Reference bench: $benchref" || echo "No bench found"
+
+      - name: Check compiler
+        run: |
+          if [ -z "${{ matrix.config.base_image }}" ]; then
+            if [ $COMP == ndk ]; then
+              export PATH=${{ env.ANDROID_NDK_BIN }}:$PATH
+            fi
+            $COMPCXX -v
+          else
+            echo "$COMPCXX -v" > script.sh
+            docker run --rm --platform ${{ matrix.config.platform }} -v ${{ github.workspace }}:/app sf_builder
+          fi
+
+      - name: Test help target
+        run: make help
+
+      - name: Check git
+        run: git --version
+
+      # x86-32 tests
+
+      - name: Test debug x86-32 build
+        if: matrix.config.run_32bit_tests
+        run: |
+          export CXXFLAGS="-Werror -D_GLIBCXX_DEBUG"
+          make clean
+          make -j4 ARCH=x86-32 optimize=no debug=yes build
+          ../tests/signature.sh $benchref
+
+      - name: Test x86-32 build
+        if: matrix.config.run_32bit_tests
+        run: |
+          make clean
+          make -j4 ARCH=x86-32 build
+          ../tests/signature.sh $benchref
+
+      - name: Test x86-32-sse41-popcnt build
+        if: matrix.config.run_32bit_tests
+        run: |
+          make clean
+          make -j4 ARCH=x86-32-sse41-popcnt build
+          ../tests/signature.sh $benchref
+
+      - name: Test x86-32-sse2 build
+        if: matrix.config.run_32bit_tests
+        run: |
+          make clean
+          make -j4 ARCH=x86-32-sse2 build
+          ../tests/signature.sh $benchref
+
+      - name: Test general-32 build
+        if: matrix.config.run_32bit_tests
+        run: |
+          make clean
+          make -j4 ARCH=general-32 build
+          ../tests/signature.sh $benchref
+
+      # x86-64 tests
+
+      - name: Test debug x86-64-avx2 build
+        if: matrix.config.run_64bit_tests
+        run: |
+          export CXXFLAGS="-Werror -D_GLIBCXX_DEBUG"
+          make clean
+          make -j4 ARCH=x86-64-avx2 optimize=no debug=yes build
+          ../tests/signature.sh $benchref
+
+      - name: Test x86-64-bmi2 build
+        if: matrix.config.run_64bit_tests
+        run: |
+          make clean
+          make -j4 ARCH=x86-64-bmi2 build
+          ../tests/signature.sh $benchref
+
+      - name: Test x86-64-avx2 build
+        if: matrix.config.run_64bit_tests
+        run: |
+          make clean
+          make -j4 ARCH=x86-64-avx2 build
+          ../tests/signature.sh $benchref
+
+      # Test a deprecated arch
+      - name: Test x86-64-modern build
+        if: matrix.config.run_64bit_tests
+        run: |
+          make clean
+          make -j4 ARCH=x86-64-modern build
+          ../tests/signature.sh $benchref
+
+      - name: Test x86-64-sse41-popcnt build
+        if: matrix.config.run_64bit_tests
+        run: |
+          make clean
+          make -j4 ARCH=x86-64-sse41-popcnt build
+          ../tests/signature.sh $benchref
+
+      - name: Test x86-64-ssse3 build
+        if: matrix.config.run_64bit_tests
+        run: |
+          make clean
+          make -j4 ARCH=x86-64-ssse3 build
+          ../tests/signature.sh $benchref
+
+      - name: Test x86-64-sse3-popcnt build
+        if: matrix.config.run_64bit_tests
+        run: |
+          make clean
+          make -j4 ARCH=x86-64-sse3-popcnt build
+          ../tests/signature.sh $benchref
+
+      - name: Test x86-64 build
+        if: matrix.config.run_64bit_tests
+        run: |
+          make clean
+          make -j4 ARCH=x86-64 build
+          ../tests/signature.sh $benchref
+
+      - name: Test general-64 build
+        if: matrix.config.run_64bit_tests
+        run: |
+          make clean
+          make -j4 ARCH=general-64 build
+          ../tests/signature.sh $benchref
+
+      - name: Test apple-silicon build
+        if: matrix.config.run_m1_tests
+        run: |
+          make clean
+          make -j4 ARCH=apple-silicon build
+          ../tests/signature.sh $benchref
+
+      # armv8 tests
+
+      - name: Test armv8 build
+        if: matrix.config.run_armv8_tests
+        run: |
+          export PATH=${{ env.ANDROID_NDK_BIN }}:$PATH
+          export LDFLAGS="-static -Wno-unused-command-line-argument"
+          make clean
+          make -j4 ARCH=armv8 build
+          ../tests/signature.sh $benchref
+
+      - name: Test armv8-dotprod build
+        if: matrix.config.run_armv8_tests
+        run: |
+          export PATH=${{ env.ANDROID_NDK_BIN }}:$PATH
+          export LDFLAGS="-static -Wno-unused-command-line-argument"
+          make clean
+          make -j4 ARCH=armv8-dotprod build
+          ../tests/signature.sh $benchref
+
+      # armv7 tests
+
+      - name: Test armv7 build
+        if: matrix.config.run_armv7_tests
+        run: |
+          export PATH=${{ env.ANDROID_NDK_BIN }}:$PATH
+          export LDFLAGS="-static -Wno-unused-command-line-argument"
+          make clean
+          make -j4 ARCH=armv7 build
+          ../tests/signature.sh $benchref
+
+      - name: Test armv7-neon build
+        if: matrix.config.run_armv7_tests
+        run: |
+          export PATH=${{ env.ANDROID_NDK_BIN }}:$PATH
+          export LDFLAGS="-static -Wno-unused-command-line-argument"
+          make clean
+          make -j4 ARCH=armv7-neon build
+          ../tests/signature.sh $benchref
+
+      # riscv64 tests
+
+      - name: Test riscv64 build
+        if: matrix.config.run_riscv64_tests
+        run: |
+          echo "cd src && export LDFLAGS='-static' && make clean && make -j4 ARCH=riscv64 build" > script.sh
+          docker run --rm --platform ${{ matrix.config.platform }} -v ${{ github.workspace }}:/app sf_builder
+          ../tests/signature.sh $benchref
+
+      # ppc64 tests
+
+      - name: Test ppc64 build
+        if: matrix.config.run_ppc64_tests
+        run: |
+          echo "cd src && export LDFLAGS='-static' && make clean && make -j4 ARCH=ppc-64 build" > script.sh
+          docker run --rm --platform ${{ matrix.config.platform }} -v ${{ github.workspace }}:/app sf_builder
+          ../tests/signature.sh $benchref
+
+      # Other tests
+
+      - name: Check perft and search reproducibility
+        if: matrix.config.run_64bit_tests
+        run: |
+          make clean
+          make -j4 ARCH=x86-64-avx2 build
+          ../tests/perft.sh
+          ../tests/reprosearch.sh
@@ -0,0 +1,114 @@
+name: Upload Binaries
+on:
+  workflow_call:
+    inputs:
+      matrix:
+        type: string
+        required: true
+    secrets:
+      token:
+        required: true
+
+jobs:
+  Artifacts:
+    name: ${{ matrix.config.name }} ${{ matrix.binaries }}
+    runs-on: ${{ matrix.config.os }}
+    env:
+      COMPCXX: ${{ matrix.config.compiler }}
+      COMP: ${{ matrix.config.comp }}
+      EXT: ${{ matrix.config.ext }}
+      NAME: ${{ matrix.config.simple_name }}
+      BINARY: ${{ matrix.binaries }}
+      SDE: ${{ matrix.config.sde }}
+    strategy:
+      fail-fast: false
+      matrix: ${{ fromJson(inputs.matrix) }}
+    defaults:
+      run:
+        shell: ${{ matrix.config.shell }}
+    steps:
+      - uses: actions/checkout@v4
+        with:
+          persist-credentials: false
+
+      - name: Download artifact from compilation
+        uses: actions/download-artifact@v4
+        with:
+          name: ${{ matrix.config.simple_name }} ${{ matrix.binaries }}
+          path: ${{ matrix.config.simple_name }} ${{ matrix.binaries }}
+
+      - name: Setup msys and install required packages
+        if: runner.os == 'Windows'
+        uses: msys2/setup-msys2@v2
+        with:
+          msystem: ${{ matrix.config.msys_sys }}
+          install: mingw-w64-${{ matrix.config.msys_env }} make git zip
+
+      - name: Create Package
+        run: |
+          mkdir stockfish
+
+      - name: Download wiki
+        run: |
+          git clone https://github.com/official-stockfish/Stockfish.wiki.git wiki
+          rm -rf wiki/.git
+          mv wiki stockfish/
+
+      - name: Copy files
+        run: |
+          mv "${{ matrix.config.simple_name }} ${{ matrix.binaries }}" stockfish-workflow
+          cd stockfish-workflow
+          cp -r src ../stockfish/
+          cp -r scripts ../stockfish/
+          cp stockfish-$NAME-$BINARY$EXT ../stockfish/
+          cp "Top CPU Contributors.txt" ../stockfish/
+          cp Copying.txt ../stockfish/
+          cp AUTHORS ../stockfish/
+          cp CITATION.cff ../stockfish/
+          cp README.md ../stockfish/
+          cp CONTRIBUTING.md ../stockfish/
+
+      - name: Create tar
+        if: runner.os != 'Windows'
+        run: |
+          chmod +x ./stockfish/stockfish-$NAME-$BINARY$EXT
+          tar -cvf stockfish-$NAME-$BINARY.tar stockfish
+
+      - name: Create zip
+        if: runner.os == 'Windows'
+        run: |
+          zip -r stockfish-$NAME-$BINARY.zip stockfish
+
+      - name: Release
+        if: startsWith(github.ref_name, 'sf_') && github.ref_type == 'tag'
+        uses: softprops/action-gh-release@4634c16e79c963813287e889244c50009e7f0981
+        with:
+          files: stockfish-${{ matrix.config.simple_name }}-${{ matrix.binaries }}.${{ matrix.config.archive_ext }}
+          token: ${{ secrets.token }}
+
+      - name: Get last commit sha
+        id: last_commit
+        run: echo "COMMIT_SHA=$(git rev-parse HEAD | cut -c 1-8)" >> $GITHUB_ENV
+
+      - name: Get commit date
+        id: commit_date
+        run: echo "COMMIT_DATE=$(git show -s --date=format:'%Y%m%d' --format=%cd HEAD)" >> $GITHUB_ENV
+
+      # Make sure that an old ci that still runs on master doesn't recreate a prerelease
+      - name: Check Pullable Commits
+        id: check_commits
+        run: |
+          git fetch
+          CHANGES=$(git rev-list HEAD..origin/master --count)
+          echo "CHANGES=$CHANGES" >> $GITHUB_ENV
+
+      - name: Prerelease
+        if: github.ref_name == 'master' && env.CHANGES == '0'
+        continue-on-error: true
+        uses: softprops/action-gh-release@4634c16e79c963813287e889244c50009e7f0981
+        with:
+          name: Stockfish dev-${{ env.COMMIT_DATE }}-${{ env.COMMIT_SHA }}
+          tag_name: stockfish-dev-${{ env.COMMIT_DATE }}-${{ env.COMMIT_SHA }}
+          prerelease: true
+          files: stockfish-${{ matrix.config.simple_name }}-${{ matrix.binaries }}.${{ matrix.config.archive_ext }}
+          token: ${{ secrets.token }}
@@ -10,3 +10,8 @@ src/-lstdc++.res
 # Neural network for the NNUE evaluation
 **/*.nnue

+# Files generated by the instrumented tests
+tsan.supp
+__pycache__/
+tests/syzygy
+tests/bench_tmp.epd
@@ -1,101 +0,0 @@
-language: cpp
-dist: bionic
-
-matrix:
-  include:
-    - os: linux
-      compiler: gcc
-      addons:
-        apt:
-          packages: ['g++-8', 'g++-8-multilib', 'g++-multilib', 'valgrind', 'expect', 'curl']
-      env:
-        - COMPILER=g++-8
-        - COMP=gcc
-
-    - os: linux
-      compiler: clang
-      addons:
-        apt:
-          packages: ['clang-10', 'llvm-10-dev', 'g++-multilib', 'valgrind', 'expect', 'curl']
-      env:
-        - COMPILER=clang++-10
-        - COMP=clang
-
-    - os: osx
-      osx_image: xcode12
-      compiler: gcc
-      env:
-        - COMPILER=g++
-        - COMP=gcc
-
-    - os: osx
-      osx_image: xcode12
-      compiler: clang
-      env:
-        - COMPILER=clang++
-        - COMP=clang
-
-branches:
-  only:
-   - master
-
-before_script:
-  - cd src
-
-script:
-  # Download net
-  - make net
-
-  # Obtain bench reference from git log
-  - git log HEAD | grep "\b[Bb]ench[ :]\+[0-9]\{7\}" | head -n 1 | sed "s/[^0-9]*\([0-9]*\).*/\1/g" > git_sig
-  - export benchref=$(cat git_sig)
-  - echo "Reference bench:" $benchref
-
-  # Compiler version string
-  - $COMPILER -v
-
-  # test help target
-  - make help
-
-  # Verify bench number against various builds
-  - export CXXFLAGS="-Werror -D_GLIBCXX_DEBUG"
-  - make clean && make -j2 ARCH=x86-64-modern optimize=no debug=yes build && ../tests/signature.sh $benchref
-  - export CXXFLAGS="-Werror"
-  - make clean && make -j2 ARCH=x86-64-modern build && ../tests/signature.sh $benchref
-  - make clean && make -j2 ARCH=x86-64-ssse3 build && ../tests/signature.sh $benchref
-  - make clean && make -j2 ARCH=x86-64-sse3-popcnt build && ../tests/signature.sh $benchref
-  - make clean && make -j2 ARCH=x86-64 build && ../tests/signature.sh $benchref
-  - if [[ "$TRAVIS_OS_NAME" == "linux" ]]; then make clean && make -j2 ARCH=general-64 build && ../tests/signature.sh $benchref; fi
-  - if [[ "$TRAVIS_OS_NAME" == "linux" ]]; then make clean && make -j2 ARCH=x86-32 optimize=no debug=yes build && ../tests/signature.sh $benchref; fi
-  - if [[ "$TRAVIS_OS_NAME" == "linux" ]]; then make clean && make -j2 ARCH=x86-32-sse41-popcnt build && ../tests/signature.sh $benchref; fi
-  - if [[ "$TRAVIS_OS_NAME" == "linux" ]]; then make clean && make -j2 ARCH=x86-32-sse2 build && ../tests/signature.sh $benchref; fi
-  - if [[ "$TRAVIS_OS_NAME" == "linux" ]]; then make clean && make -j2 ARCH=x86-32 build && ../tests/signature.sh $benchref; fi
-  - if [[ "$TRAVIS_OS_NAME" == "linux" ]]; then make clean && make -j2 ARCH=general-32 build && ../tests/signature.sh $benchref; fi
-  # workaround: exclude a custom version of llvm+clang, which doesn't find llvm-profdata on ubuntu
-  - if [[ "$TRAVIS_OS_NAME" != "linux" || "$COMP" == "gcc" ]]; then make clean && make -j2 ARCH=x86-64-modern profile-build && ../tests/signature.sh $benchref; fi
-
-  # compile only for some more advanced architectures (might not run in travis)
-  - make clean && make -j2 ARCH=x86-64-avx2 build
-  - make clean && make -j2 ARCH=x86-64-bmi2 build
-  - make clean && make -j2 ARCH=x86-64-avx512 build
-  - make clean && make -j2 ARCH=x86-64-vnni512 build
-  - make clean && make -j2 ARCH=x86-64-vnni256 build
-
-  #
-  # Check perft and reproducible search
-  - make clean && make -j2 ARCH=x86-64-modern build
-  - ../tests/perft.sh
-  - ../tests/reprosearch.sh
-
-  #
-  # Valgrind
-  #
-  - export CXXFLAGS="-O1 -fno-inline"
-  - if [ -x "$(command -v valgrind )" ]; then make clean && make -j2 ARCH=x86-64-modern debug=yes optimize=no build > /dev/null && ../tests/instrumented.sh --valgrind; fi
-  - if [ -x "$(command -v valgrind )" ]; then ../tests/instrumented.sh --valgrind-thread; fi
-
-  #
-  # Sanitizer
-  #
-  - if [[ "$TRAVIS_OS_NAME" == "linux" ]]; then make clean && make -j2 ARCH=x86-64-modern sanitize=undefined optimize=no debug=yes build > /dev/null && ../tests/instrumented.sh --sanitizer-undefined; fi
-  - if [[ "$TRAVIS_OS_NAME" == "linux" ]]; then make clean && make -j2 ARCH=x86-64-modern sanitize=thread    optimize=no debug=yes build > /dev/null && ../tests/instrumented.sh --sanitizer-thread; fi
@@ -1,19 +1,18 @@
-# List of authors for Stockfish, as of August 4, 2020
-
-# Founders of the Stockfish project and fishtest infrastructure
+# Founders of the Stockfish project and Fishtest infrastructure
 Tord Romstad (romstad)
 Marco Costalba (mcostalba)
 Joona Kiiski (zamar)
 Gary Linscott (glinscott)

-# Authors and inventors of NNUE, training, NNUE port
+# Authors and inventors of NNUE, training, and NNUE port
 Yu Nasu (ynasu87)
 Motohiro Isozaki (yaneurao)
 Hisayori Noda (nodchip)

-# all other authors of the code in alphabetical order
+# All other authors of Stockfish code (in alphabetical order)
 Aditya (absimaldata)
 Adrian Petrescu (apetresc)
+Ahmed Kerimov (wcdbmv)
 Ajith Chandy Jose (ajithcj)
 Alain Savard (Rocky640)
 Alayan Feh (Alayan-stk-2)
@@ -21,28 +20,46 @@ Alexander Kure
 Alexander Pagel (Lolligerhans)
 Alfredo Menezes (lonfom169)
 Ali AlZhrani (Cooffe)
+Andreas Jan van der Meulen (Andyson007)
+Andreas Matthies (Matthies)
+Andrei Vetrov (proukornew)
 Andrew Grant (AndyGrant)
 Andrey Neporada (nepal)
 Andy Duplain
+Antoine Champion (antoinechampion)
 Aram Tumanian (atumanian)
 Arjun Temurnikar
+Artem Solopiy (EntityFX)
 Auguste Pop
+Balazs Szilagyi
 Balint Pfliegel
+Ben Chaney (Chaneybenjamini)
 Ben Koshy (BKSpurgeon)
 Bill Henry (VoyagerOne)
 Bojun Guo (noobpwnftw, Nooby)
+borg323
+Boštjan Mejak (PedanticHacker)
 braich
 Brian Sheppard (SapphireBrand, briansheppard-toast)
 Bruno de Melo Costa (BM123499)
+Bruno Pellanda (pellanda)
 Bryan Cross (crossbr)
 candirufish
+Carlos Esparza Sánchez (ces42)
 Chess13234
+Chris Bao (sscg13)
 Chris Cain (ceebo)
+Ciekce
+clefrks
+Clemens L. (rn5f107s2)
+Cody Ho (aesrentai)
 Dale Weiler (graphitemaster)
-Dan Schmidt (dfannius)
 Daniel Axtens (daxtens)
 Daniel Dugovic (ddugovic)
+Daniel Monroe (Ergodice)
+Dan Schmidt (dfannius)
 Dariusz Orzechowski (dorzechowski)
+David (dav1312)
 David Zar
 Daylen Yang (daylen)
 Deshawn Mohan-Smith (GoldenRare)
@@ -50,59 +67,76 @@ Dieter Dobbelaere (ddobbelaere)
 DiscanX
 Dominik Schlösser (domschl)
 double-beep
+Douglas Matos Gomes (dsmsgms)
+Dubslow
 Eduardo Cáceres (eduherminio)
 Eelco de Groot (KingDefender)
+Ehsan Rashid (erashid)
 Elvin Liu (solarlight2)
 erbsenzaehler
 Ernesto Gatti
-Linmiao Xu (linrock)
+evqsx
 Fabian Beuke (madnight)
 Fabian Fichter (ianfab)
 Fanael Linithien (Fanael)
 fanon
-Fauzi Akram Dabat (FauziAkram)
+Fauzi Akram Dabat (fauzi2)
 Felix Wittmann
 gamander
+Gabriele Lombardo (gabe)
+Gahtan Nahdi
 Gary Heckman (gheckman)
 George Sobala (gsobala)
 gguliash
+Giacomo Lorenzetti (G-Lorenz)
 Gian-Carlo Pascutto (gcp)
+Goh CJ (cj5716)
 Gontran Lemaire (gonlem)
 Goodkov Vasiliy Aleksandrovich (goodkov)
 Gregor Cramer
 GuardianRM
-Günther Demetz (pb00067, pb00068)
 Guy Vreuls (gvreuls)
+Günther Demetz (pb00067, pb00068)
 Henri Wiechers
 Hiraoka Takuya (HiraokaTakuya)
 homoSapiensSapiens
 Hongzhi Cheng
 Ivan Ivec (IIvec)
 Jacques B. (Timshel)
+Jake Senne (w1wwwwww)
 Jan Ondruš (hxim)
-Jared Kish (Kurtbusch)
+Jared Kish (Kurtbusch, kurt22i)
 Jarrod Torriero (DU-jdto)
-Jean Gauthier (OuaisBla)
+Jasper Shovelton (Beanie496)
 Jean-Francois Romang (jromang)
+Jean Gauthier (OuaisBla)
 Jekaa
 Jerry Donald Watson (jerrydonaldwatson)
 jjoshua2
-Jonathan Calovski (Mysseno)
 Jonathan Buladas Dumale (SFisGOD)
+Jonathan Calovski (Mysseno)
+Jonathan McDermid (jonathanmcdermid)
 Joost VandeVondele (vondele)
-Jörg Oster (joergoster)
 Joseph Ellis (jhellis3)
 Joseph R. Prostko
+Jörg Oster (joergoster)
+Julian Willemer (NightlyKing)
 jundery
 Justin Blanchard (UncombedCoconut)
 Kelly Wilson
 Ken Takusagawa
+Kenneth Lee (kennethlee33)
+Kian E (KJE-98)
 kinderchocolate
 Kiran Panditrao (Krgp)
 Kojirion
+Krisztián Peőcz
 Krystian Kuzniarek (kuzkry)
 Leonardo Ljubičić (ICCF World Champion)
 Leonid Pechenik (lp--)
+Li Ying (yl25946)
+Liam Keegan (lkeegan)
+Linmiao Xu (linrock)
 Linus Arver (listx)
 loco-loco
 Lub van den Berg (ElbertoOne)
@@ -113,9 +147,12 @@ Maciej Żenczykowski (zenczykowski)
 Malcolm Campbell (xoto10)
 Mark Tenzer (31m059)
 marotear
+Mathias Parnaudeau (mparnaudeau)
 Matt Ginsberg (mattginsberg)
 Matthew Lai (matthewlai)
 Matthew Sullivan (Matt14916)
+Max A. (Disservin)
+Maxim Masiutin (maximmasiutin)
 Maxim Molchanov (Maxim)
 Michael An (man)
 Michael Byrne (MichaelB7)
@@ -125,38 +162,49 @@ Michael Whiteley (protonspring)
 Michel Van den Bergh (vdbergh)
 Miguel Lahoz (miguel-l)
 Mikael Bäckman (mbootsector)
+Mike Babigian (Farseer)
 Mira
 Miroslav Fontán (Hexik)
 Moez Jellouli (MJZ1977)
 Mohammed Li (tthsqe12)
+Muzhen J (XInTheDark)
 Nathan Rugg (nmrugg)
-Nick Pelling (nickpelling)
+Nguyen Pham (nguyenpham)
 Nicklas Persson (NicklasPersson)
+Nick Pelling (nickpelling)
 Niklas Fiekas (niklasf)
 Nikolay Kostov (NikolayIT)
-Nguyen Pham (nguyenpham)
 Norman Schmidt (FireFather)
 notruck
+Nour Berakdar (Nonlinear)
+Ofek Shochat (OfekShochat, ghostway)
 Ondrej Mosnáček (WOnder93)
+Ondřej Mišina (AndrovT)
 Oskar Werkelin Ahlin
+Ömer Faruk Tutkun (OmerFarukTutkun)
 Pablo Vazquez
 Panthee
 Pascal Romaret
 Pasquale Pigazzini (ppigazzini)
 Patrick Jansen (mibere)
-pellanda
+Peter Schneider (pschneider1968)
 Peter Zsifkovits (CoffeeOne)
+PikaCat
 Praveen Kumar Tummala (praveentml)
+Prokop Randáček (ProkopRandacek)
 Rahul Dsilva (silversolver1)
 Ralph Stößer (Ralph Stoesser)
 Raminder Singh
 renouve
-Reuven Peleg
-Richard Lloyd
+Reuven Peleg (R-Peleg)
+Richard Lloyd (Richard-Lloyd)
+Robert Nürnberg (robertnurnberg)
 Rodrigo Exterckötter Tjäder
-Ron Britvich (Britvich)
+Rodrigo Roim (roim)
 Ronald de Man (syzygy1, syzygy)
+Ron Britvich (Britvich)
 rqs
+Rui Coelho (ruicoelhopedro)
 Ryan Schmitt
 Ryan Takker
 Sami Kiminki (skiminki)
@@ -165,28 +213,43 @@ Sergei Antonov (saproj)
 Sergei Ivanov (svivanov72)
 Sergio Vieri (sergiovieri)
 sf-x
+Shahin M. Shahin (peregrine)
 Shane Booth (shane31)
 Shawn Varghese (xXH4CKST3RXx)
+Shawn Xu (xu-shawn)
+Siad Daboul (Topologist)
 Stefan Geschwentner (locutus2)
 Stefano Cardanobile (Stefano80)
+Stefano Di Martino (StefanoD)
 Steinar Gunderson (sesse)
 Stéphane Nicolet (snicolet)
+Stephen Touset (stouset)
+Syine Mineta (MinetaS)
+Taras Vuk (TarasVuk)
 Thanar2
 thaspel
 theo77186
+TierynnB
+Ting-Hsuan Huang (fffelix-huang)
+Tobias Steinmann
+Tomasz Sobczyk (Sopel97)
 Tom Truscott
 Tom Vijlbrief (tomtor)
-Tomasz Sobczyk (Sopel97)
 Torsten Franz (torfranz, tfranzer)
+Torsten Hellwig (Torom)
 Tracey Emery (basepr1me)
 tttak
 Unai Corzo (unaiic)
 Uri Blass (uriblass)
 Vince Negri (cuddlestmonkey)
+Viren
+Wencey Wang
+windfishballad
+xefoci7612
+Xiang Wang (KatyushaScarlet)
 zz4032

-
 # Additionally, we acknowledge the authors and maintainers of fishtest,
-# an amazing and essential framework for the development of Stockfish!
+# an amazing and essential framework for Stockfish development!
 #
-# https://github.com/glinscott/fishtest/blob/master/AUTHORS
+# https://github.com/official-stockfish/fishtest/blob/master/AUTHORS
@@ -0,0 +1,23 @@
+# This CITATION.cff file was generated with cffinit.
+# Visit https://bit.ly/cffinit to generate yours today!
+
+cff-version: 1.2.0
+title: Stockfish
+message: >-
+  Please cite this software using the metadata from this
+  file.
+type: software
+authors:
+  - name: The Stockfish developers (see AUTHORS file)
+repository-code: 'https://github.com/official-stockfish/Stockfish'
+url: 'https://stockfishchess.org/'
+repository-artifact: 'https://stockfishchess.org/download/'
+abstract: Stockfish is a free and strong UCI chess engine.
+keywords:
+  - chess
+  - artificial intelligence (AI)
+  - tree search
+  - alpha-beta search
+  - neural networks (NN)
+  - efficiently updatable neural networks (NNUE)
+license: GPL-3.0
@@ -0,0 +1,96 @@
+# Contributing to Stockfish
+
+Welcome to the Stockfish project! We are excited that you are interested in
+contributing. This document outlines the guidelines and steps to follow when
+making contributions to Stockfish.
+
+## Table of Contents
+
+- [Building Stockfish](#building-stockfish)
+- [Making Contributions](#making-contributions)
+  - [Reporting Issues](#reporting-issues)
+  - [Submitting Pull Requests](#submitting-pull-requests)
+- [Code Style](#code-style)
+- [Community and Communication](#community-and-communication)
+- [License](#license)
+
+## Building Stockfish
+
+In case you do not have a C++ compiler installed, you can follow the
+instructions from our wiki.
+
+- [Ubuntu][ubuntu-compiling-link]
+- [Windows][windows-compiling-link]
+- [macOS][macos-compiling-link]
+
+## Making Contributions
+
+### Reporting Issues
+
+If you find a bug, please open an issue on the
+[issue tracker][issue-tracker-link]. Be sure to include relevant information
+like your operating system, build environment, and a detailed description of the
+problem.
+
+_Please note that Stockfish's development is not focused on adding new features.
+Thus any issue regarding missing features will potentially be closed without
+further discussion._
+
+### Submitting Pull Requests
+
+- Functional changes need to be tested on fishtest. See
+  [Creating my First Test][creating-my-first-test] for more details.
+  The accompanying pull request should include a link to the test results and
+  the new bench.
+
+- Non-functional changes (e.g. refactoring, code style, documentation) do not
+  need to be tested on fishtest, unless they might impact performance.
+
+- Provide a clear and concise description of the changes in the pull request
+  description.
+
+_First time contributors should add their name to [AUTHORS](./AUTHORS)._
+
+_Stockfish's development is not focused on adding new features. Thus any pull
+request introducing new features will potentially be closed without further
+discussion._
+
+## Code Style
+
+Changes to Stockfish C++ code should respect our coding style defined by
+[.clang-format](.clang-format). You can format your changes by running
+`make format`. This requires clang-format version 18 to be installed on your system.
+
+## Navigate
+
+For experienced Git users who frequently use git blame, it is recommended to
+configure the blame.ignoreRevsFile setting.
+This setting is useful for excluding noisy formatting commits.
+
+```bash
+git config blame.ignoreRevsFile .git-blame-ignore-revs
+```
+
+## Community and Communication
+
+- Join the [Stockfish discord][discord-link] to discuss ideas, issues, and
+  development.
+- Participate in the [Stockfish GitHub discussions][discussions-link] for
+  broader conversations.
+
+## License
+
+By contributing to Stockfish, you agree that your contributions will be licensed
+under the GNU General Public License v3.0. See [Copying.txt][copying-link] for
+more details.
+
+Thank you for contributing to Stockfish and helping us make it even better!
+
+[copying-link]:           https://github.com/official-stockfish/Stockfish/blob/master/Copying.txt
+[discord-link]:           https://discord.gg/GWDRS3kU6R
+[discussions-link]:       https://github.com/official-stockfish/Stockfish/discussions/new
+[creating-my-first-test]: https://github.com/official-stockfish/fishtest/wiki/Creating-my-first-test#create-your-test
+[issue-tracker-link]:     https://github.com/official-stockfish/Stockfish/issues
+[ubuntu-compiling-link]:  https://github.com/official-stockfish/Stockfish/wiki/Developers#user-content-installing-a-compiler-1
+[windows-compiling-link]: https://github.com/official-stockfish/Stockfish/wiki/Developers#user-content-installing-a-compiler
+[macos-compiling-link]:   https://github.com/official-stockfish/Stockfish/wiki/Developers#user-content-installing-a-compiler-2
@@ -1,300 +1,161 @@
+<div align="center">
+
+  [![Stockfish][stockfish128-logo]][website-link]
+
+  <h3>Stockfish</h3>
+
+  A free and strong UCI chess engine.
+  <br>
+  <strong>[Explore Stockfish docs »][wiki-link]</strong>
+  <br>
+  <br>
+  [Report bug][issue-link]
+  ·
+  [Open a discussion][discussions-link]
+  ·
+  [Discord][discord-link]
+  ·
+  [Blog][website-blog-link]
+
+  [![Build][build-badge]][build-link]
+  [![License][license-badge]][license-link]
+  <br>
+  [![Release][release-badge]][release-link]
+  [![Commits][commits-badge]][commits-link]
+  <br>
+  [![Website][website-badge]][website-link]
+  [![Fishtest][fishtest-badge]][fishtest-link]
+  [![Discord][discord-badge]][discord-link]
+
+</div>
+
 ## Overview

-[![Build Status](https://travis-ci.org/official-stockfish/Stockfish.svg?branch=master)](https://travis-ci.org/official-stockfish/Stockfish)
-[![Build Status](https://ci.appveyor.com/api/projects/status/github/official-stockfish/Stockfish?branch=master&svg=true)](https://ci.appveyor.com/project/mcostalba/stockfish/branch/master)
+[Stockfish][website-link] is a **free and strong UCI chess engine** derived from
+Glaurung 2.1 that analyzes chess positions and computes the optimal moves.

-[Stockfish](https://stockfishchess.org) is a free, powerful UCI chess engine
-derived from Glaurung 2.1. Stockfish is not a complete chess program and requires a
-UCI-compatible graphical user interface (GUI) (e.g. XBoard with PolyGlot, Scid,
-Cute Chess, eboard, Arena, Sigma Chess, Shredder, Chess Partner or Fritz) in order
-to be used comfortably. Read the documentation for your GUI of choice for information
-about how to use Stockfish with it.
-
-The Stockfish engine features two evaluation functions for chess, the classical
-evaluation based on handcrafted terms, and the NNUE evaluation based on efficiently
-updatable neural networks. The classical evaluation runs efficiently on almost all
-CPU architectures, while the NNUE evaluation benefits from the vector
-intrinsics available on most CPUs (sse2, avx2, neon, or similar).
+Stockfish **does not include a graphical user interface** (GUI) that is required
+to display a chessboard and to make it easy to input moves. These GUIs are
+developed independently from Stockfish and are available online. **Read the
+documentation for your GUI** of choice for information about how to use
+Stockfish with it.

+See also the Stockfish [documentation][wiki-usage-link] for further usage help.

 ## Files

 This distribution of Stockfish consists of the following files:

-  * Readme.md, the file you are currently reading.
+  * [README.md][readme-link], the file you are currently reading.

-  * Copying.txt, a text file containing the GNU General Public License version 3.
-  
-  * AUTHORS, a text file with the list of authors for the project
+  * [Copying.txt][license-link], a text file containing the GNU General Public
+    License version 3.

-  * src, a subdirectory containing the full source code, including a Makefile
-    that can be used to compile Stockfish on Unix-like systems.
+  * [AUTHORS][authors-link], a text file with the list of authors for the project.

-  * a file with the .nnue extension, storing the neural network for the NNUE 
+  * [src][src-link], a subdirectory containing the full source code, including a
+    Makefile that can be used to compile Stockfish on Unix-like systems.
+
+  * a file with the .nnue extension, storing the neural network for the NNUE
    evaluation. Binary distributions will have this file embedded.

-## UCI options
+## Contributing

-Currently, Stockfish has the following UCI options:
-
-  * #### Threads
-    The number of CPU threads used for searching a position. For best performance, set
-    this equal to the number of CPU cores available.
-
-  * #### Hash
-    The size of the hash table in MB. It is recommended to set Hash after setting Threads.
-
-  * #### Clear Hash
-    Clear the hash table.
-
-  * #### Ponder
-    Let Stockfish ponder its next move while the opponent is thinking.
-
-  * #### MultiPV
-    Output the N best lines (principal variations, PVs) when searching.
-    Leave at 1 for best performance.
-
-  * #### Use NNUE
-    Toggle between the NNUE and classical evaluation functions. If set to "true",
-    the network parameters must be available to load from file (see also EvalFile),
-    if they are not embedded in the binary.
-
-  * #### EvalFile
-    The name of the file of the NNUE evaluation parameters. Depending on the GUI the
-    filename might have to include the full path to the folder/directory that contains the file.
-    Other locations, such as the directory that contains the binary and the working directory,
-    are also searched.
-
-  * #### UCI_AnalyseMode
-    An option handled by your GUI.
-
-  * #### UCI_Chess960
-    An option handled by your GUI. If true, Stockfish will play Chess960.
-
-  * #### UCI_ShowWDL
-    If enabled, show approximate WDL statistics as part of the engine output.
-    These WDL numbers model expected game outcomes for a given evaluation and
-    game ply for engine self-play at fishtest LTC conditions (60+0.6s per game).
-
-  * #### UCI_LimitStrength
-    Enable weaker play aiming for an Elo rating as set by UCI_Elo. This option overrides Skill Level.
-
-  * #### UCI_Elo
-    If enabled by UCI_LimitStrength, aim for an engine strength of the given Elo.
-    This Elo rating has been calibrated at a time control of 60s+0.6s and anchored to CCRL 40/4.
-
-  * #### Skill Level
-    Lower the Skill Level in order to make Stockfish play weaker (see also UCI_LimitStrength).
-    Internally, MultiPV is enabled, and with a certain probability depending on the Skill Level a
-    weaker move will be played.
-
-  * #### SyzygyPath
-    Path to the folders/directories storing the Syzygy tablebase files. Multiple
-    directories are to be separated by ";" on Windows and by ":" on Unix-based
-    operating systems. Do not use spaces around the ";" or ":".
-
-    Example: `C:\tablebases\wdl345;C:\tablebases\wdl6;D:\tablebases\dtz345;D:\tablebases\dtz6`
-
-    It is recommended to store .rtbw files on an SSD. There is no loss in storing
-    the .rtbz files on a regular HD. It is recommended to verify all md5 checksums
-    of the downloaded tablebase files (`md5sum -c checksum.md5`) as corruption will
-    lead to engine crashes.
-
-  * #### SyzygyProbeDepth
-    Minimum remaining search depth for which a position is probed. Set this option
-    to a higher value to probe less aggressively if you experience too much slowdown
-    (in terms of nps) due to tablebase probing.
-
-  * #### Syzygy50MoveRule
-    Disable to let fifty-move rule draws detected by Syzygy tablebase probes count
-    as wins or losses. This is useful for ICCF correspondence games.
-
-  * #### SyzygyProbeLimit
-    Limit Syzygy tablebase probing to positions with at most this many pieces left
-    (including kings and pawns).
-
-  * #### Contempt
-    A positive value for contempt favors middle game positions and avoids draws,
-    effective for the classical evaluation only.
-
-  * #### Analysis Contempt
-    By default, contempt is set to prefer the side to move. Set this option to "White"
-    or "Black" to analyse with contempt for that side, or "Off" to disable contempt.
-
-  * #### Move Overhead
-    Assume a time delay of x ms due to network and GUI overheads. This is useful to
-    avoid losses on time in those cases.
-
-  * #### Slow Mover
-    Lower values will make Stockfish take less time in games, higher values will
-    make it think longer.
-
-  * #### nodestime
-    Tells the engine to use nodes searched instead of wall time to account for
-    elapsed time. Useful for engine testing.
-
-  * #### Debug Log File
-    Write all communication to and from the engine into a text file.
-
-## A note on classical evaluation versus NNUE evaluation
-
-Both approaches assign a value to a position that is used in alpha-beta (PVS) search
-to find the best move. The classical evaluation computes this value as a function
-of various chess concepts, handcrafted by experts, tested and tuned using fishtest.
-The NNUE evaluation computes this value with a neural network based on basic
-inputs (e.g. piece positions only). The network is optimized and trained
-on the evaluations of millions of positions at moderate search depth.
-
-The NNUE evaluation was first introduced in shogi, and ported to Stockfish afterward.
-It can be evaluated efficiently on CPUs, and exploits the fact that only parts
-of the neural network need to be updated after a typical chess move.
-[The nodchip repository](https://github.com/nodchip/Stockfish) provides additional
-tools to train and develop the NNUE networks. On CPUs supporting modern vector instructions
-(avx2 and similar), the NNUE evaluation results in much stronger playing strength, even
-if the nodes per second computed by the engine is somewhat lower (roughly 80% of nps
-is typical).
-
-Notes:
-
-1) the NNUE evaluation depends on the Stockfish binary and the network parameter
-file (see the EvalFile UCI option). Not every parameter file is compatible with a given
-Stockfish binary, but the default value of the EvalFile UCI option is the name of a network
-that is guaranteed to be compatible with that binary.
-
-2) to use the NNUE evaluation, the additional data file with neural network parameters
-needs to be available. Normally, this file is already embedded in the binary or it 
-can be downloaded. The filename for the default (recommended) net can be found as the default
-value of the `EvalFile` UCI option, with the format `nn-[SHA256 first 12 digits].nnue`
-(for instance, `nn-c157e0a5755b.nnue`). This file can be downloaded from
-```
-https://tests.stockfishchess.org/api/nn/[filename]
-```
-replacing `[filename]` as needed.
-
-## What to expect from the Syzygy tablebases?
-
-If the engine is searching a position that is not in the tablebases (e.g.
-a position with 8 pieces), it will access the tablebases during the search.
-If the engine reports a very large score (typically 153.xx), this means 
-it has found a winning line into a tablebase position.
-
-If the engine is given a position to search that is in the tablebases, it
-will use the tablebases at the beginning of the search to preselect all
-good moves, i.e. all moves that preserve the win or preserve the draw while
-taking into account the 50-move rule.
-It will then perform a search only on those moves. **The engine will not move
-immediately**, unless there is only a single good move. **The engine likely
-will not report a mate score, even if the position is known to be won.**
-
-It is therefore clear that this behaviour is not identical to what one might
-be used to with Nalimov tablebases. There are technical reasons for this
-difference, the main technical reason being that Nalimov tablebases use the
-DTM metric (distance-to-mate), while the Syzygy tablebases use a variation of the
-DTZ metric (distance-to-zero, zero meaning any move that resets the 50-move
-counter). This special metric is one of the reasons that the Syzygy tablebases are
-more compact than Nalimov tablebases, while still storing all information
-needed for optimal play and in addition being able to take into account
-the 50-move rule.
-
-## Large Pages
-
-Stockfish supports large pages on Linux and Windows. Large pages make
-the hash access more efficient, improving the engine speed, especially
-on large hash sizes. Typical increases are 5..10% in terms of nodes per
-second, but speed increases up to 30% have been measured. The support is
-automatic. Stockfish attempts to use large pages when available and
-will fall back to regular memory allocation when this is not the case.
-
-### Support on Linux
-
-Large page support on Linux is obtained by the Linux kernel
-transparent huge pages functionality. Typically, transparent huge pages
-are already enabled, and no configuration is needed.
-
-### Support on Windows
-
-The use of large pages requires "Lock Pages in Memory" privilege. See
-[Enable the Lock Pages in Memory Option (Windows)](https://docs.microsoft.com/en-us/sql/database-engine/configure-windows/enable-the-lock-pages-in-memory-option-windows)
-on how to enable this privilege, then run [RAMMap](https://docs.microsoft.com/en-us/sysinternals/downloads/rammap)
-to double-check that large pages are used. We suggest that you reboot
-your computer after you have enabled large pages, because long Windows
-sessions suffer from memory fragmentation, which may prevent Stockfish
-from getting large pages: a fresh session is better in this regard.
-
-## Compiling Stockfish yourself from the sources
-
-Stockfish has support for 32 or 64-bit CPUs, certain hardware
-instructions, big-endian machines such as Power PC, and other platforms.
-
-On Unix-like systems, it should be easy to compile Stockfish
-directly from the source code with the included Makefile in the folder
-`src`. In general it is recommended to run `make help` to see a list of make
-targets with corresponding descriptions.
-
-```
-    cd src
-    make help
-    make net
-    make build ARCH=x86-64-modern
-```
-
-When not using the Makefile to compile (for instance, with Microsoft MSVC) you
-need to manually set/unset some switches in the compiler command line; see
-file *types.h* for a quick reference.
-
-When reporting an issue or a bug, please tell us which version and
-compiler you used to create your executable. These informations can
-be found by typing the following commands in a console:
-
-```
-    ./stockfish compiler
-```
-
-## Understanding the code base and participating in the project
-
-Stockfish's improvement over the last couple of years has been a great
-community effort. There are a few ways to help contribute to its growth.
+__See [Contributing Guide](CONTRIBUTING.md).__

 ### Donating hardware

-Improving Stockfish requires a massive amount of testing. You can donate
-your hardware resources by installing the [Fishtest Worker](https://github.com/glinscott/fishtest/wiki/Running-the-worker:-overview)
-and view the current tests on [Fishtest](https://tests.stockfishchess.org/tests).
+Improving Stockfish requires a massive amount of testing. You can donate your
+hardware resources by installing the [Fishtest Worker][worker-link] and viewing
+the current tests on [Fishtest][fishtest-link].

 ### Improving the code

-If you want to help improve the code, there are several valuable resources:
-
-* [In this wiki,](https://www.chessprogramming.org) many techniques used in
+In the [chessprogramming wiki][programming-link], many techniques used in
 Stockfish are explained with a lot of background information.
+The [section on Stockfish][programmingsf-link] describes many features
+and techniques used by Stockfish. However, it is generic rather than
+focused on Stockfish's precise implementation.

-* [The section on Stockfish](https://www.chessprogramming.org/Stockfish)
-describes many features and techniques used by Stockfish. However, it is
-generic rather than being focused on Stockfish's precise implementation.
-Nevertheless, a helpful resource.
-
-* The latest source can always be found on [GitHub](https://github.com/official-stockfish/Stockfish).
-Discussions about Stockfish take place these days mainly in the [FishCooking](https://groups.google.com/forum/#!forum/fishcooking)
-group and on the [Stockfish Discord channel](https://discord.gg/nv8gDtt).
-The engine testing is done on [Fishtest](https://tests.stockfishchess.org/tests).
-If you want to help improve Stockfish, please read this [guideline](https://github.com/glinscott/fishtest/wiki/Creating-my-first-test)
+The engine testing is done on [Fishtest][fishtest-link].
+If you want to help improve Stockfish, please read this [guideline][guideline-link]
 first, where the basics of Stockfish development are explained.

+Discussions about Stockfish take place these days mainly in the Stockfish
+[Discord server][discord-link]. This is also the best place to ask questions
+about the codebase and how to improve it.
+
+## Compiling Stockfish
+
+Stockfish has support for 32 or 64-bit CPUs, certain hardware instructions,
+big-endian machines such as Power PC, and other platforms.
+
+On Unix-like systems, it should be easy to compile Stockfish directly from the
+source code with the included Makefile in the folder `src`. In general, it is
+recommended to run `make help` to see a list of make targets with corresponding
+descriptions. An example suitable for most Intel and AMD chips:
+
+```
+cd src
+make -j profile-build
+```
+
+Detailed compilation instructions for all platforms can be found in our
+[documentation][wiki-compile-link]. Our wiki also has information about
+the [UCI commands][wiki-uci-link] supported by Stockfish.

 ## Terms of use

-Stockfish is free, and distributed under the **GNU General Public License version 3**
-(GPL v3). Essentially, this means you are free to do almost exactly
-what you want with the program, including distributing it among your
-friends, making it available for download from your website, selling
-it (either by itself or as part of some bigger software package), or
-using it as the starting point for a software project of your own.
+Stockfish is free and distributed under the
+[**GNU General Public License version 3**][license-link] (GPL v3). Essentially,
+this means you are free to do almost exactly what you want with the program,
+including distributing it among your friends, making it available for download
+from your website, selling it (either by itself or as part of some bigger
+software package), or using it as the starting point for a software project of
+your own.

-The only real limitation is that whenever you distribute Stockfish in
-some way, you MUST always include the full source code, or a pointer
-to where the source code can be found, to generate the exact binary
-you are distributing. If you make any changes to the source code,
-these changes must also be made available under the GPL.
+The only real limitation is that whenever you distribute Stockfish in some way,
+you MUST always include the license and the full source code (or a pointer to
+where the source code can be found) to generate the exact binary you are
+distributing. If you make any changes to the source code, these changes must
+also be made available under GPL v3.

-For full details, read the copy of the GPL v3 found in the file named
-*Copying.txt*.
+## Acknowledgements
+
+Stockfish uses neural networks trained on [data provided by the Leela Chess Zero
+project][lc0-data-link], which is made available under the [Open Database License][odbl-link] (ODbL).
+
+
+[authors-link]:       https://github.com/official-stockfish/Stockfish/blob/master/AUTHORS
+[build-link]:         https://github.com/official-stockfish/Stockfish/actions/workflows/stockfish.yml
+[commits-link]:       https://github.com/official-stockfish/Stockfish/commits/master
+[discord-link]:       https://discord.gg/GWDRS3kU6R
+[issue-link]:         https://github.com/official-stockfish/Stockfish/issues/new?assignees=&labels=&template=BUG-REPORT.yml
+[discussions-link]:   https://github.com/official-stockfish/Stockfish/discussions/new
+[fishtest-link]:      https://tests.stockfishchess.org/tests
+[guideline-link]:     https://github.com/official-stockfish/fishtest/wiki/Creating-my-first-test
+[license-link]:       https://github.com/official-stockfish/Stockfish/blob/master/Copying.txt
+[programming-link]:   https://www.chessprogramming.org/Main_Page
+[programmingsf-link]: https://www.chessprogramming.org/Stockfish
+[readme-link]:        https://github.com/official-stockfish/Stockfish/blob/master/README.md
+[release-link]:       https://github.com/official-stockfish/Stockfish/releases/latest
+[src-link]:           https://github.com/official-stockfish/Stockfish/tree/master/src
+[stockfish128-logo]:  https://stockfishchess.org/images/logo/icon_128x128.png
+[uci-link]:           https://backscattering.de/chess/uci/
+[website-link]:       https://stockfishchess.org
+[website-blog-link]:  https://stockfishchess.org/blog/
+[wiki-link]:          https://github.com/official-stockfish/Stockfish/wiki
+[wiki-compile-link]:  https://github.com/official-stockfish/Stockfish/wiki/Compiling-from-source
+[wiki-uci-link]:      https://github.com/official-stockfish/Stockfish/wiki/UCI-&-Commands
+[wiki-usage-link]:    https://github.com/official-stockfish/Stockfish/wiki/Download-and-usage
+[worker-link]:        https://github.com/official-stockfish/fishtest/wiki/Running-the-worker
+[lc0-data-link]:      https://storage.lczero.org/files/training_data
+[odbl-link]:          https://opendatacommons.org/licenses/odbl/odbl-10.txt
+
+[build-badge]:        https://img.shields.io/github/actions/workflow/status/official-stockfish/Stockfish/stockfish.yml?branch=master&style=for-the-badge&label=stockfish&logo=github
+[commits-badge]:      https://img.shields.io/github/commits-since/official-stockfish/Stockfish/latest?style=for-the-badge
+[discord-badge]:      https://img.shields.io/discord/435943710472011776?style=for-the-badge&label=discord&logo=Discord
+[fishtest-badge]:     https://img.shields.io/website?style=for-the-badge&down_color=red&down_message=Offline&label=Fishtest&up_color=success&up_message=Online&url=https%3A%2F%2Ftests.stockfishchess.org%2Ftests%2Ffinished
+[license-badge]:      https://img.shields.io/github/license/official-stockfish/Stockfish?style=for-the-badge&label=license&color=success
+[release-badge]:      https://img.shields.io/github/v/release/official-stockfish/Stockfish?style=for-the-badge&label=official%20release
+[website-badge]:      https://img.shields.io/website?style=for-the-badge&down_color=red&down_message=Offline&label=website&up_color=success&up_message=Online&url=https%3A%2F%2Fstockfishchess.org
@@ -1,189 +1,301 @@
-Contributors to Fishtest with >10,000 CPU hours, as of Feb 15, 2021.
+Contributors to Fishtest with >10,000 CPU hours, as of 2024-08-31.
 Thank you!

-Username                CPU Hours       Games played
----------------------------------------------------
-noobpwnftw               23930906         1560559941
-dew                       1169948           70333008
-mlang                      957168           61657446
-mibere                     703840           46867607
-tvijlbrief                 517888           33379462
-JojoM                      515404           30334272
-cw                         443276           29385549
-crunchy                    427035           27344275
-grandphish2                425794           26347253
-fastgm                     414133           24519696
-gvreuls                    377843           24708884
-CSU_Dynasty                338718           23030006
-Fisherman                  326795           21820747
-TueRens                    313730           19490246
-ctoks                      298442           20052551
-velislav                   270519           17355456
-bcross                     241064           17196165
-glinscott                  217799           13780820
-nordlandia                 211692           13484886
-bking_US                   198894           11876016
-drabel                     191096           13129722
-leszek                     189170           11446821
-mgrabiak                   187153           12013300
-robal                      181389           11539242
-Thanar                     179852           12365359
-vdv                        175274            9889046
-spams                      157128           10319326
-marrco                     150292            9401741
-sqrt2                      147963            9724586
-CoffeeOne                  137086            5022516
-vdbergh                    137041            8926915
-malala                     136182            8002293
-mhoram                     132780            8398229
-xoto                       124729            8652088
-davar                      122092            7960001
-dsmith                     122059            7570238
-Data                       113305            8220352
-BrunoBanani                112960            7436849
-pemo                       109598            5036441
-Dantist                    106768            6431396
-MaZePallas                 102741            6630419
-ElbertoOne                  99028            7023771
-brabos                      92118            6186135
-linrock                     90903            6708639
-psk                         89957            5984901
-sunu                        88614            6020673
-sterni1971                  86948            5613788
-Vizvezdenec                 83761            5344740
-BRAVONE                     81239            5054681
-nssy                        76497            5259388
-cuistot                     76366            4370584
-racerschmacer               75753            5442626
-teddybaer                   75125            5407666
-Pking_cda                   73776            5293873
-0x3C33                      73133            4670293
-jromang                     72117            5054915
-solarlight                  70517            5028306
-dv8silencer                 70287            3883992
-Bobo1239                    68515            4652287
-manap                       66273            4121774
-tinker                      64321            4268390
-robnjr                      57262            4053117
-Freja                       56938            3733019
-ttruscott                   56010            3680085
-rkl                         54986            4150767
-renouve                     53811            3501516
-finfish                     51360            3370515
-eva42                       51272            3599691
-rap                         49985            3219146
-pb00067                     49727            3298270
-amicic                      49691            3042481
-ronaldjerum                 47654            3240695
-bigpen0r                    47278            3291647
-biffhero                    46564            3111352
-VoyagerOne                  45476            3452465
-eastorwest                  45033            3071805
-speedycpu                   43842            3003273
-jbwiebe                     43305            2805433
-Antihistamine               41788            2761312
-mhunt                       41735            2691355
-homyur                      39893            2850481
-gri                         39871            2515779
-oryx                        38282            2944400
-Spprtr                      38157            2470529
-SC                          37290            2731014
-csnodgrass                  36207            2688994
-jmdana                      36157            2210661
-strelock                    34716            2074055
-Garf                        33800            2747562
-skiminki                    33515            2055584
-EthanOConnor                33370            2090311
-slakovv                     32915            2021889
-yurikvelo                   32600            2255966
-Prcuvu                      30377            2170122
-manapbk                     30326            1770143
-anst                        30301            2190091
-jkiiski                     30136            1904470
-hyperbolic.tom              29840            2017394
-Pyafue                      29650            1902349
-qurashee                    27758            1509620
-OuaisBla                    27636            1578800
-chriswk                     26902            1868317
-achambord                   26582            1767323
-Fifis                       26376            1776853
-Patrick_G                   26276            1801617
-yorkman                     26193            1992080
-SFTUser                     25182            1675689
-nabildanial                 24942            1519409
-Sharaf_DG                   24765            1786697
-ncfish1                     24411            1520927
-agg177                      23890            1395014
-JanErik                     23408            1703875
-Isidor                      23388            1680691
-Norabor                     23164            1591830
-cisco2015                   22895            1762069
-Zirie                       22542            1472937
-team-oh                     22272            1636708
-MazeOfGalious               21978            1629593
-sg4032                      21945            1643065
-ianh2105                    21725            1632562
-xor12                       21628            1680365
-dex                         21612            1467203
-nesoneg                     21494            1463031
-jjoshua2                    20997            1422689
-horst.prack                 20878            1465656
-0xB00B1ES                   20590            1208666
-sphinx                      20515            1352368
-j3corre                     20405            941444
-Adrian.Schmidt123           20316            1281436
-Ente                        20017            1432602
-wei                         19973            1745989
-rstoesser                   19569            1293588
-eudhan                      19274            1283717
-jundery                     18445            1115855
-iisiraider                  18247            1101015
-ville                       17883            1384026
-chris                       17698            1487385
-purplefishies               17595            1092533
-DMBK                        17357            1279152
-DragonLord                  17014            1162790
-dju                         16515             929427
-IgorLeMasson                16064            1147232
-ako027ako                   15671            1173203
-Nikolay.IT                  15154            1068349
-Andrew Grant                15114             895539
-OssumOpossum                14857            1007129
-enedene                     14476             905279
-bpfliegel                   14298             884523
-jpulman                     13982             870599
-joster                      13794             950160
-Nesa92                      13786            1114691
-crocogoat                   13753            1114622
-Hjax                        13535             915487
-Dark_wizzie                 13422            1007152
-mpx86                       12941             693640
-mabichito                   12903             749391
-thijsk                      12886             722107
-AdrianSA                    12860             804972
-Flopzee                     12698             894821
-fatmurphy                   12547             853210
-scuzzi                      12511             845761
-Karby                       12429             735880
-SapphireBrand               12416             969604
-modolief                    12386             896470
-pgontarz                    12151             848794
-stocky                      11954             699440
-mschmidt                    11941             803401
-infinity                    11470             727027
-torbjo                      11395             729145
-Thomas A. Anderson          11372             732094
-d64                         11263             789184
-Maxim                       11129             804704
-snicolet                    11106             869170
-MooTheCow                   11008             694942
-savage84                    10965             641068
-Rudolphous                  10915             741268
-Wolfgang                    10809             580032
-rpngn                       10712             688203
-basepi                      10637             744851
-michaelrpg                  10409             735127
-dzjp                        10343             732529
-ali-al-zhrani               10324             726502
-ols                         10259             570669
-lbraesch                    10252             647825
+Username                                CPU Hours     Games played
+------------------------------------------------------------------
+noobpwnftw                               40428649       3164740143
+technologov                              23581394       1076895482
+vdv                                      19425375        718302718
+linrock                                  10034115        643194527
+mlang                                     3026000        200065824
+okrout                                    2572676        237511408
+pemo                                      1836785         62226157
+dew                                       1689162        100033738
+TueRens                                   1648780         77891164
+sebastronomy                              1468328         60859092
+grandphish2                               1466110         91776075
+JojoM                                     1130625         73666098
+olafm                                     1067009         74807270
+tvijlbrief                                 796125         51897690
+oz                                         781847         53910686
+rpngn                                      768460         49812975
+gvreuls                                    751085         52177668
+mibere                                     703840         46867607
+leszek                                     566598         42024615
+cw                                         519601         34988161
+fastgm                                     503862         30260818
+CSU_Dynasty                                468784         31385034
+maximmasiutin                              439192         27893522
+ctoks                                      435148         28541909
+crunchy                                    427414         27371625
+bcross                                     415724         29061187
+robal                                      371112         24642270
+mgrabiak                                   367963         26464704
+velislav                                   342588         22140902
+ncfish1                                    329039         20624527
+Fisherman                                  327231         21829379
+Dantist                                    296386         18031762
+tolkki963                                  262050         22049676
+Sylvain27                                  255595          8864404
+nordlandia                                 249322         16420192
+Fifis                                      237657         13065577
+marrco                                     234581         17714473
+Calis007                                   217537         14450582
+glinscott                                  208125         13277240
+drabel                                     204167         13930674
+mhoram                                     202894         12601997
+bking_US                                   198894         11876016
+Thanar                                     179852         12365359
+javran                                     169679         13481966
+armo9494                                   162863         10937118
+spams                                      157128         10319326
+DesolatedDodo                              156683         10211206
+Wencey                                     152308          8375444
+sqrt2                                      147963          9724586
+vdbergh                                    140311          9225125
+jcAEie                                     140086         10603658
+CoffeeOne                                  137100          5024116
+malala                                     136182          8002293
+xoto                                       133759          9159372
+Dubslow                                    129614          8519312
+davar                                      129023          8376525
+DMBK                                       122960          8980062
+dsmith                                     122059          7570238
+CypressChess                               120784          8672620
+sschnee                                    120526          7547722
+maposora                                   119734         10749710
+amicic                                     119661          7938029
+Wolfgang                                   115713          8159062
+Data                                       113305          8220352
+BrunoBanani                                112960          7436849
+markkulix                                  112897          9133168
+cuistot                                    109802          7121030
+skiminki                                   107583          7218170
+sterni1971                                 104431          5938282
+MaZePallas                                 102823          6633619
+sunu                                       100167          7040199
+zeryl                                       99331          6221261
+thirdlife                                   99156          2245320
+ElbertoOne                                  99028          7023771
+megaman7de                                  98456          6675076
+Goatminola                                  96765          8257832
+bigpen0r                                    94825          6529241
+brabos                                      92118          6186135
+Maxim                                       90818          3283364
+psk                                         89957          5984901
+racerschmacer                               85805          6122790
+Vizvezdenec                                 83761          5344740
+0x3C33                                      82614          5271253
+szupaw                                      82495          7151686
+BRAVONE                                     81239          5054681
+nssy                                        76497          5259388
+cody                                        76126          4492126
+jromang                                     76106          5236025
+MarcusTullius                               76103          5061991
+woutboat                                    76072          6022922
+Spprtr                                      75977          5252287
+teddybaer                                   75125          5407666
+Pking_cda                                   73776          5293873
+yurikvelo                                   73611          5046822
+Mineta                                      71130          4711422
+Bobo1239                                    70579          4794999
+solarlight                                  70517          5028306
+dv8silencer                                 70287          3883992
+manap                                       66273          4121774
+tinker                                      64333          4268790
+qurashee                                    61208          3429862
+AGI                                         58195          4329580
+robnjr                                      57262          4053117
+Freja                                       56938          3733019
+MaxKlaxxMiner                               56879          3423958
+ttruscott                                   56010          3680085
+rkl                                         55132          4164467
+jmdana                                      54697          4012593
+notchris                                    53936          4184018
+renouve                                     53811          3501516
+finfish                                     51360          3370515
+eva42                                       51272          3599691
+eastorwest                                  51117          3454811
+rap                                         49985          3219146
+pb00067                                     49733          3298934
+GPUex                                       48686          3684998
+OuaisBla                                    48626          3445134
+ronaldjerum                                 47654          3240695
+biffhero                                    46564          3111352
+oryx                                        45639          3546530
+VoyagerOne                                  45476          3452465
+speedycpu                                   43842          3003273
+jbwiebe                                     43305          2805433
+Antihistamine                               41788          2761312
+mhunt                                       41735          2691355
+jibarbosa                                   41640          4145702
+homyur                                      39893          2850481
+gri                                         39871          2515779
+DeepnessFulled                              39020          3323102
+Garf                                        37741          2999686
+SC                                          37299          2731694
+Gaster319                                   37118          3279678
+naclosagc                                   36562          1279618
+csnodgrass                                  36207          2688994
+strelock                                    34716          2074055
+gopeto                                      33717          2245606
+EthanOConnor                                33370          2090311
+slakovv                                     32915          2021889
+jojo2357                                    32890          2826662
+shawnxu                                     32019          2802552
+Gelma                                       31771          1551204
+vidar808                                    31560          1351810
+kdave                                       31157          2198362
+manapbk                                     30987          1810399
+ZacHFX                                      30966          2272416
+TataneSan                                   30713          1513402
+votoanthuan                                 30691          2460856
+Prcuvu                                      30377          2170122
+anst                                        30301          2190091
+jkiiski                                     30136          1904470
+spcc                                        29925          1901692
+hyperbolic.tom                              29840          2017394
+chuckstablers                               29659          2093438
+Pyafue                                      29650          1902349
+belzedar94                                  28846          1811530
+mecevdimitar                                27610          1721382
+chriswk                                     26902          1868317
+xwziegtm                                    26897          2124586
+achambord                                   26582          1767323
+somethingintheshadows                       26496          2186404
+Patrick_G                                   26276          1801617
+yorkman                                     26193          1992080
+srowen                                      25743          1490684
+Ulysses                                     25413          1702830
+Jopo12321                                   25227          1652482
+SFTUser                                     25182          1675689
+nabildanial                                 25068          1531665
+Sharaf_DG                                   24765          1786697
+rodneyc                                     24376          1416402
+jsys14                                      24297          1721230
+agg177                                      23890          1395014
+AndreasKrug                                 23754          1890115
+Ente                                        23752          1678188
+JanErik                                     23408          1703875
+Isidor                                      23388          1680691
+Norabor                                     23371          1603244
+WoodMan777                                  23253          2023048
+Nullvalue                                   23155          2022752
+cisco2015                                   22920          1763301
+Zirie                                       22542          1472937
+team-oh                                     22272          1636708
+Roady                                       22220          1465606
+MazeOfGalious                               21978          1629593
+sg4032                                      21950          1643373
+tsim67                                      21747          1330880
+ianh2105                                    21725          1632562
+Skiff84                                     21711          1014212
+xor12                                       21628          1680365
+dex                                         21612          1467203
+nesoneg                                     21494          1463031
+user213718                                  21454          1404128
+Serpensin                                   21452          1790510
+sphinx                                      21211          1384728
+qoo_charly_cai                              21136          1514927
+IslandLambda                                21062          1220838
+jjoshua2                                    21001          1423089
+Zake9298                                    20938          1565848
+horst.prack                                 20878          1465656
+fishtester                                  20729          1348888
+0xB00B1ES                                   20590          1208666
+ols                                         20477          1195945
+Dinde                                       20459          1292774
+j3corre                                     20405           941444
+Adrian.Schmidt123                           20316          1281436
+wei                                         19973          1745989
+teenychess                                  19819          1762006
+rstoesser                                   19569          1293588
+eudhan                                      19274          1283717
+vulcan                                      18871          1729392
+wizardassassin                              18795          1376884
+Karpovbot                                   18766          1053178
+jundery                                     18445          1115855
+mkstockfishtester                           18350          1690676
+ville                                       17883          1384026
+chris                                       17698          1487385
+purplefishies                               17595          1092533
+dju                                         17414           981289
+iisiraider                                  17275          1049015
+DragonLord                                  17014          1162790
+Karby                                       17008          1013160
+pirt                                        16965          1271519
+redstone59                                  16842          1461780
+Alb11747                                    16787          1213990
+Naven94                                     16414           951718
+scuzzi                                      16115           994341
+IgorLeMasson                                16064          1147232
+ako027ako                                   15671          1173203
+infinigon                                   15285           965966
+Nikolay.IT                                  15154          1068349
+Andrew Grant                                15114           895539
+OssumOpossum                                14857          1007129
+LunaticBFF57                                14525          1190310
+enedene                                     14476           905279
+Hjax                                        14394          1005013
+bpfliegel                                   14233           882523
+YELNAMRON                                   14230          1128094
+mpx86                                       14019           759568
+jpulman                                     13982           870599
+getraideBFF                                 13871          1172846
+Nesa92                                      13806          1116101
+crocogoat                                   13803          1117422
+joster                                      13710           946160
+mbeier                                      13650          1044928
+Pablohn26                                   13552          1088532
+wxt9861                                     13550          1312306
+Dark_wizzie                                 13422          1007152
+Rudolphous                                  13244           883140
+Machariel                                   13010           863104
+nalanzeyu                                   12996           232590
+mabichito                                   12903           749391
+Jackfish                                    12895           868928
+thijsk                                      12886           722107
+AdrianSA                                    12860           804972
+Flopzee                                     12698           894821
+whelanh                                     12682           266404
+mschmidt                                    12644           863193
+korposzczur                                 12606           838168
+fatmurphy                                   12547           853210
+Oakwen                                      12532           855759
+icewulf                                     12447           854878
+SapphireBrand                               12416           969604
+deflectooor                                 12386           579392
+modolief                                    12386           896470
+Farseer                                     12249           694108
+Hongildong                                  12201           648712
+pgontarz                                    12151           848794
+dbernier                                    12103           860824
+szczur90                                    12035           942376
+FormazChar                                  12019           910409
+rensonthemove                               11999           971993
+stocky                                      11954           699440
+MooTheCow                                   11923           779432
+3cho                                        11842          1036786
+ckaz                                        11792           732276
+infinity                                    11470           727027
+aga                                         11412           695127
+torbjo                                      11395           729145
+Thomas A. Anderson                          11372           732094
+savage84                                    11358           670860
+Def9Infinity                                11345           696552
+d64                                         11263           789184
+ali-al-zhrani                               11245           779246
+ImperiumAeternum                            11155           952000
+snicolet                                    11106           869170
+dapper                                      11032           771402
+Ethnikoi                                    10993           945906
+Snuuka                                      10938           435504
+Karmatron                                   10871           678306
+basepi                                      10637           744851
+Cubox                                       10621           826448
+gerbil                                      10519           971688
+michaelrpg                                  10509           739239
+OIVAS7572                                   10420           995586
+Garruk                                      10365           706465
+dzjp                                        10343           732529
+RickGroszkiewicz                            10263           990798
@@ -1,88 +0,0 @@
-version: 1.0.{build}
-clone_depth: 50
-
-branches:
-  only:
-    - master
-
-# Operating system (build VM template)
-os: Visual Studio 2019
-
-# Build platform, i.e. x86, x64, AnyCPU. This setting is optional.
-platform:
-  - x86
-  - x64
-
-# build Configuration, i.e. Debug, Release, etc.
-configuration:
-  - Debug
-  - Release
-
-matrix:
-  # The build fail immediately once one of the job fails
-  fast_finish: true
-
-# Scripts that are called at very beginning, before repo cloning
-init:
-  - cmake --version
-  - msbuild /version
-
-before_build:
-  - ps: |
-      # Get sources
-      $src = get-childitem -Path *.cpp -Recurse | select -ExpandProperty FullName
-      $src = $src -join ' '
-      $src = $src.Replace("\", "/")
-
-      # Build CMakeLists.txt
-      $t = 'cmake_minimum_required(VERSION 3.17)',
-           'project(Stockfish)',
-           'set(CMAKE_CXX_STANDARD 17)',
-           'set(CMAKE_CXX_STANDARD_REQUIRED ON)',
-           'set (CMAKE_CXX_EXTENSIONS OFF)',
-           'set(CMAKE_RUNTIME_OUTPUT_DIRECTORY ${CMAKE_SOURCE_DIR}/src)',
-           'set(source_files', $src, ')',
-           'add_executable(stockfish ${source_files})'
-
-      # Write CMakeLists.txt withouth BOM
-      $MyPath = (Get-Item -Path "." -Verbose).FullName + '\CMakeLists.txt'
-      $Utf8NoBomEncoding = New-Object System.Text.UTF8Encoding $False
-      [System.IO.File]::WriteAllLines($MyPath, $t, $Utf8NoBomEncoding)
-
-      # Obtain bench reference from git log
-      $b = git log HEAD | sls "\b[Bb]ench[ :]+[0-9]{7}" | select -first 1
-      $bench = $b -match '\D+(\d+)' | % { $matches[1] }
-      Write-Host "Reference bench:" $bench
-      $g = "Visual Studio 16 2019"
-      If (${env:PLATFORM} -eq 'x64') { $a = "x64" }
-      If (${env:PLATFORM} -eq 'x86') { $a = "Win32" }
-      cmake -G "${g}" -A ${a} .
-      Write-Host "Generated files for: " $g $a
-
-build_script:
-  - cmake --build . --config %CONFIGURATION% -- /verbosity:minimal
-  - ps: |
-      # Download default NNUE net from fishtest
-      $nnuenet = Get-Content -Path src\evaluate.h | Select-String -CaseSensitive -Pattern "EvalFileDefaultName" | Select-String -CaseSensitive -Pattern "nn-[a-z0-9]{12}.nnue"
-      $dummy = $nnuenet -match "(?<nnuenet>nn-[a-z0-9]{12}.nnue)"
-      $nnuenet = $Matches.nnuenet
-      Write-Host "Default net:" $nnuenet
-      $nnuedownloadurl = "https://tests.stockfishchess.org/api/nn/$nnuenet"
-      $nnuefilepath = "src\${env:CONFIGURATION}\$nnuenet"
-      if (Test-Path -Path $nnuefilepath) {
-            Write-Host "Already available."
-      } else {
-            Write-Host "Downloading $nnuedownloadurl to $nnuefilepath"
-            Invoke-WebRequest -Uri $nnuedownloadurl -OutFile $nnuefilepath
-      }
-
-before_test:
-  - cd src/%CONFIGURATION%
-  - stockfish bench 2> out.txt >NUL
-  - ps: |
-      # Verify bench number
-      $s = (gc "./out.txt" | out-string)
-      $r = ($s -match 'Nodes searched \D+(\d+)' | % { $matches[1] })
-      Write-Host "Engine bench:" $r
-      Write-Host "Reference bench:" $bench
-      If ($r -ne $bench) { exit 1 }
@@ -0,0 +1 @@
+*.sh text eol=lf
@@ -0,0 +1,153 @@
+#!/bin/sh
+
+#
+# Returns properties of the native system.
+# best architecture as supported by the CPU
+# filename of the best binary uploaded as an artifact during CI
+#
+
+# Check if all the given flags are present in the CPU flags list
+check_flags() {
+  for flag; do
+    printf '%s\n' "$flags" | grep -q -w "$flag" || return 1
+  done
+}
+
+# Set the CPU flags list
+# remove underscores and points from flags, e.g. gcc uses avx512vnni, while some cpuinfo can have avx512_vnni, some systems use sse4_1 others sse4.1
+get_flags() {
+  flags=$(awk '/^flags[ \t]*:|^Features[ \t]*:/{gsub(/^flags[ \t]*:[ \t]*|^Features[ \t]*:[ \t]*|[_.]/, ""); line=$0} END{print line}' /proc/cpuinfo)
+}
+
+# Check for gcc march "znver1" or "znver2" https://en.wikichip.org/wiki/amd/cpuid
+check_znver_1_2() {
+  vendor_id=$(awk '/^vendor_id/{print $3; exit}' /proc/cpuinfo)
+  cpu_family=$(awk '/^cpu family/{print $4; exit}' /proc/cpuinfo)
+  [ "$vendor_id" = "AuthenticAMD" ] && [ "$cpu_family" = "23" ] && znver_1_2=true
+}
+
+# Set the file CPU loongarch64 architecture
+set_arch_loongarch64() {
+  if check_flags 'lasx'; then
+    true_arch='loongarch64-lasx'
+  elif check_flags 'lsx'; then
+    true_arch='lonngarch64-lsx'
+  else
+    true_arch='loongarch64'
+  fi
+}
+
+# Set the file CPU x86_64 architecture
+set_arch_x86_64() {
+  if check_flags 'avx512vnni' 'avx512dq' 'avx512f' 'avx512bw' 'avx512vl'; then
+    true_arch='x86-64-vnni256'
+  elif check_flags 'avx512f' 'avx512bw'; then
+    true_arch='x86-64-avx512'
+  elif [ -z "${znver_1_2+1}" ] && check_flags 'bmi2'; then
+    true_arch='x86-64-bmi2'
+  elif check_flags 'avx2'; then
+    true_arch='x86-64-avx2'
+  elif check_flags 'sse41' && check_flags 'popcnt'; then
+    true_arch='x86-64-sse41-popcnt'
+  else
+    true_arch='x86-64'
+  fi
+}
+
+set_arch_ppc_64() {
+  if $(grep -q -w "altivec" /proc/cpuinfo); then
+    power=$(grep -oP -m 1 'cpu\t+: POWER\K\d+' /proc/cpuinfo)
+    if [ "0$power" -gt 7 ]; then
+      # VSX started with POWER8
+      true_arch='ppc-64-vsx'
+    else
+      true_arch='ppc-64-altivec'
+    fi
+  else
+    true_arch='ppc-64'
+  fi
+}
+
+# Check the system type
+uname_s=$(uname -s)
+uname_m=$(uname -m)
+case $uname_s in
+  'Darwin') # Mac OSX system
+    case $uname_m in
+      'arm64')
+        true_arch='apple-silicon'
+        file_arch='m1-apple-silicon'
+        ;;
+      'x86_64')
+        flags=$(sysctl -n machdep.cpu.features machdep.cpu.leaf7_features | tr '\n' ' ' | tr '[:upper:]' '[:lower:]' | tr -d '_.')
+        set_arch_x86_64
+        if [ "$true_arch" = 'x86-64-vnni256' ] || [ "$true_arch" = 'x86-64-avx512' ]; then
+           file_arch='x86-64-bmi2'
+        fi
+        ;;
+    esac
+    file_os='macos'
+    file_ext='tar'
+    ;;
+  'Linux') # Linux system
+    get_flags
+    case $uname_m in
+      'x86_64')
+        file_os='ubuntu'
+        check_znver_1_2
+        set_arch_x86_64
+        ;;
+      'i686')
+        file_os='ubuntu'
+        true_arch='x86-32'
+        ;;
+      'ppc64'*)
+        file_os='ubuntu'
+        set_arch_ppc_64
+        ;;
+      'aarch64')
+        file_os='android'
+        true_arch='armv8'
+        if check_flags 'asimddp'; then
+          true_arch="$true_arch-dotprod"
+        fi
+        ;;
+      'armv7'*)
+        file_os='android'
+        true_arch='armv7'
+        if check_flags 'neon'; then
+          true_arch="$true_arch-neon"
+        fi
+        ;;
+      'loongarch64'*)
+        file_os='linux'
+        set_arch_loongarch64
+        ;;
+      *) # Unsupported machine type, exit with error
+        printf 'Unsupported machine type: %s\n' "$uname_m"
+        exit 1
+        ;;
+    esac
+    file_ext='tar'
+    ;;
+  'CYGWIN'*|'MINGW'*|'MSYS'*) # Windows system with POSIX compatibility layer
+    get_flags
+    check_znver_1_2
+    set_arch_x86_64
+    file_os='windows'
+    file_ext='zip'
+    ;;
+  *)
+    # Unknown system type, exit with error
+    printf 'Unsupported system type: %s\n' "$uname_s"
+    exit 1
+    ;;
+esac
+
+if [ -z "$file_arch" ]; then
+  file_arch=$true_arch
+fi
+
+file_name="stockfish-$file_os-$file_arch.$file_ext"
+
+printf '%s %s\n' "$true_arch" "$file_name"
@@ -0,0 +1,75 @@
+#!/bin/sh
+
+wget_or_curl=$( (command -v wget > /dev/null 2>&1 && echo "wget -qO-") || \
+                (command -v curl > /dev/null 2>&1 && echo "curl -skL"))
+
+if [ -z "$wget_or_curl" ]; then
+  >&2 printf "%s\n" "Neither wget or curl is installed." \
+	         "Install one of these tools to download NNUE files automatically."
+  exit 1
+fi
+
+sha256sum=$( (command -v shasum > /dev/null 2>&1 && echo "shasum -a 256") || \
+             (command -v sha256sum > /dev/null 2>&1 && echo "sha256sum"))
+
+if [ -z "$sha256sum" ]; then
+  >&2 echo "sha256sum not found, NNUE files will be assumed valid."
+fi
+
+get_nnue_filename() {
+  grep "$1" evaluate.h | grep "#define" | sed "s/.*\(nn-[a-z0-9]\{12\}.nnue\).*/\1/"
+}
+
+validate_network() {
+  # If no sha256sum command is available, assume the file is always valid.
+  if [ -n "$sha256sum" ] && [ -f "$1" ]; then
+    if [ "$1" != "nn-$($sha256sum "$1" | cut -c 1-12).nnue" ]; then
+      rm -f "$1"
+      return 1
+    fi
+  fi
+}
+
+fetch_network() {
+  _filename="$(get_nnue_filename "$1")"
+
+  if [ -z "$_filename" ]; then
+    >&2 echo "NNUE file name not found for: $1"
+    return 1
+  fi
+
+  if [ -f "$_filename" ]; then
+    if validate_network "$_filename"; then
+      echo "Existing $_filename validated, skipping download"
+      return
+    else
+      echo "Removing invalid NNUE file: $_filename"
+    fi
+  fi
+
+  for url in \
+    "https://tests.stockfishchess.org/api/nn/$_filename" \
+    "https://github.com/official-stockfish/networks/raw/master/$_filename"; do
+    echo "Downloading from $url ..."
+    if $wget_or_curl "$url" > "$_filename"; then
+      if validate_network "$_filename"; then
+        echo "Successfully validated $_filename"
+      else
+        echo "Downloaded $_filename is invalid"
+        continue
+      fi
+    else
+      echo "Failed to download from $url"
+    fi
+    if [ -f "$_filename" ]; then
+      return
+    fi
+  done
+
+  # Download was not successful in the loop, return false.
+  >&2 echo "Failed to download $_filename"
+  return 1
+}
+
+fetch_network EvalFileDefaultNameBig && \
+fetch_network EvalFileDefaultNameSmall
@@ -1,6 +1,6 @@
 /*
  Stockfish, a UCI chess playing engine derived from Glaurung 2.1
-  Copyright (C) 2004-2021 The Stockfish developers (see AUTHORS file)
+  Copyright (C) 2004-2025 The Stockfish developers (see AUTHORS file)

  Stockfish is free software: you can redistribute it and/or modify
  it under the terms of the GNU General Public License as published by
@@ -16,18 +16,18 @@
  along with this program.  If not, see <http://www.gnu.org/licenses/>.
 */

+#include "benchmark.h"
+#include "numa.h"
+
+#include <cstdlib>
 #include <fstream>
 #include <iostream>
-#include <istream>
 #include <vector>

-#include "position.h"
-
-using namespace std;
-
 namespace {

-const vector<string> Defaults = {
+// clang-format off
+const std::vector<std::string> Defaults = {
  "setoption name UCI_Chess960 value false",
  "rnbqkbnr/pppppppp/8/8/8/8/PPPPPPPP/RNBQKBNR w KQkq - 0 1",
  "r3k2r/p1ppqpb1/bn2pnp1/3PN3/1p2P3/2N2Q1p/PPPBBPPP/R3K2R w KQkq - 0 10",
@@ -87,84 +87,426 @@ const vector<string> Defaults = {
  // Chess 960
  "setoption name UCI_Chess960 value true",
  "bbqnnrkr/pppppppp/8/8/8/8/PPPPPPPP/BBQNNRKR w HFhf - 0 1 moves g2g3 d7d5 d2d4 c8h3 c1g5 e8d6 g5e7 f7f6",
+  "nqbnrkrb/pppppppp/8/8/8/8/PPPPPPPP/NQBNRKRB w KQkq - 0 1",
  "setoption name UCI_Chess960 value false"
 };
+// clang-format on

-} // namespace
+// clang-format off
+// human-randomly picked 5 games with <60 moves from
+// https://tests.stockfishchess.org/tests/view/665c71f9fd45fb0f907c21e0
+// only moves for one side
+const std::vector<std::vector<std::string>> BenchmarkPositions = {
+    {
+        "rnbq1k1r/ppp1bppp/4pn2/8/2B5/2NP1N2/PPP2PPP/R1BQR1K1 b - - 2 8",
+        "rnbq1k1r/pp2bppp/4pn2/2p5/2B2B2/2NP1N2/PPP2PPP/R2QR1K1 b - - 1 9",
+        "r1bq1k1r/pp2bppp/2n1pn2/2p5/2B1NB2/3P1N2/PPP2PPP/R2QR1K1 b - - 3 10",
+        "r1bq1k1r/pp2bppp/2n1p3/2p5/2B1PB2/5N2/PPP2PPP/R2QR1K1 b - - 0 11",
+        "r1b2k1r/pp2bppp/2n1p3/2p5/2B1PB2/5N2/PPP2PPP/3RR1K1 b - - 0 12",
+        "r1b1k2r/pp2bppp/2n1p3/2p5/2B1PB2/2P2N2/PP3PPP/3RR1K1 b - - 0 13",
+        "r1b1k2r/1p2bppp/p1n1p3/2p5/4PB2/2P2N2/PP2BPPP/3RR1K1 b - - 1 14",
+        "r1b1k2r/4bppp/p1n1p3/1pp5/P3PB2/2P2N2/1P2BPPP/3RR1K1 b - - 0 15",
+        "r1b1k2r/4bppp/p1n1p3/1P6/2p1PB2/2P2N2/1P2BPPP/3RR1K1 b - - 0 16",
+        "r1b1k2r/4bppp/2n1p3/1p6/2p1PB2/1PP2N2/4BPPP/3RR1K1 b - - 0 17",
+        "r3k2r/3bbppp/2n1p3/1p6/2P1PB2/2P2N2/4BPPP/3RR1K1 b - - 0 18",
+        "r3k2r/3bbppp/2n1p3/8/1pP1P3/2P2N2/3BBPPP/3RR1K1 b - - 1 19",
+        "1r2k2r/3bbppp/2n1p3/8/1pPNP3/2P5/3BBPPP/3RR1K1 b - - 3 20",
+        "1r2k2r/3bbppp/2n1p3/8/2PNP3/2B5/4BPPP/3RR1K1 b - - 0 21",
+        "1r2k2r/3bb1pp/2n1pp2/1N6/2P1P3/2B5/4BPPP/3RR1K1 b - - 1 22",
+        "1r2k2r/3b2pp/2n1pp2/1N6/1BP1P3/8/4BPPP/3RR1K1 b - - 0 23",
+        "1r2k2r/3b2pp/4pp2/1N6/1nP1P3/8/3RBPPP/4R1K1 b - - 1 24",
+        "1r5r/3bk1pp/4pp2/1N6/1nP1PP2/8/3RB1PP/4R1K1 b - - 0 25",
+        "1r5r/3bk1pp/2n1pp2/1N6/2P1PP2/8/3RBKPP/4R3 b - - 2 26",
+        "1r5r/3bk1pp/2n2p2/1N2p3/2P1PP2/6P1/3RBK1P/4R3 b - - 0 27",
+        "1r1r4/3bk1pp/2n2p2/1N2p3/2P1PP2/6P1/3RBK1P/R7 b - - 2 28",
+        "1r1r4/N3k1pp/2n1bp2/4p3/2P1PP2/6P1/3RBK1P/R7 b - - 4 29",
+        "1r1r4/3bk1pp/2N2p2/4p3/2P1PP2/6P1/3RBK1P/R7 b - - 0 30",
+        "1r1R4/4k1pp/2b2p2/4p3/2P1PP2/6P1/4BK1P/R7 b - - 0 31",
+        "3r4/4k1pp/2b2p2/4P3/2P1P3/6P1/4BK1P/R7 b - - 0 32",
+        "3r4/R3k1pp/2b5/4p3/2P1P3/6P1/4BK1P/8 b - - 1 33",
+        "8/3rk1pp/2b5/R3p3/2P1P3/6P1/4BK1P/8 b - - 3 34",
+        "8/3r2pp/2bk4/R1P1p3/4P3/6P1/4BK1P/8 b - - 0 35",
+        "8/2kr2pp/2b5/R1P1p3/4P3/4K1P1/4B2P/8 b - - 2 36",
+        "1k6/3r2pp/2b5/RBP1p3/4P3/4K1P1/7P/8 b - - 4 37",
+        "8/1k1r2pp/2b5/R1P1p3/4P3/3BK1P1/7P/8 b - - 6 38",
+        "1k6/3r2pp/2b5/2P1p3/4P3/3BK1P1/7P/R7 b - - 8 39",
+        "1k6/r5pp/2b5/2P1p3/4P3/3BK1P1/7P/5R2 b - - 10 40",
+        "1k3R2/6pp/2b5/2P1p3/4P3/r2BK1P1/7P/8 b - - 12 41",
+        "5R2/2k3pp/2b5/2P1p3/4P3/r2B2P1/3K3P/8 b - - 14 42",
+        "5R2/2k3pp/2b5/2P1p3/4P3/3BK1P1/r6P/8 b - - 16 43",
+        "5R2/2k3pp/2b5/2P1p3/4P3/r2B2P1/4K2P/8 b - - 18 44",
+        "5R2/2k3pp/2b5/2P1p3/4P3/3B1KP1/r6P/8 b - - 20 45",
+        "8/2k2Rpp/2b5/2P1p3/4P3/r2B1KP1/7P/8 b - - 22 46",
+        "3k4/5Rpp/2b5/2P1p3/4P3/r2B2P1/4K2P/8 b - - 24 47",
+        "3k4/5Rpp/2b5/2P1p3/4P3/3B1KP1/r6P/8 b - - 26 48",
+        "3k4/5Rpp/2b5/2P1p3/4P3/r2B2P1/4K2P/8 b - - 28 49",
+        "3k4/5Rpp/2b5/2P1p3/4P3/3BK1P1/r6P/8 b - - 30 50",
+        "3k4/5Rpp/2b5/2P1p3/4P3/r2B2P1/3K3P/8 b - - 32 51",
+        "3k4/5Rpp/2b5/2P1p3/4P3/2KB2P1/r6P/8 b - - 34 52",
+        "3k4/5Rpp/2b5/2P1p3/4P3/r2B2P1/2K4P/8 b - - 36 53",
+        "3k4/5Rpp/2b5/2P1p3/4P3/1K1B2P1/r6P/8 b - - 38 54",
+        "3k4/6Rp/2b5/2P1p3/4P3/1K1B2P1/7r/8 b - - 0 55",
+        "3k4/8/2b3Rp/2P1p3/4P3/1K1B2P1/7r/8 b - - 1 56",
+        "8/2k3R1/2b4p/2P1p3/4P3/1K1B2P1/7r/8 b - - 3 57",
+        "3k4/8/2b3Rp/2P1p3/4P3/1K1B2P1/7r/8 b - - 5 58",
+        "8/2k5/2b3Rp/2P1p3/1K2P3/3B2P1/7r/8 b - - 7 59",
+        "8/2k5/2b3Rp/2P1p3/4P3/2KB2P1/3r4/8 b - - 9 60",
+        "8/2k5/2b3Rp/2P1p3/1K2P3/3B2P1/6r1/8 b - - 11 61",
+        "8/2k5/2b3Rp/2P1p3/4P3/2KB2P1/3r4/8 b - - 13 62",
+        "8/2k5/2b3Rp/2P1p3/2K1P3/3B2P1/6r1/8 b - - 15 63",
+        "4b3/2k3R1/7p/2P1p3/2K1P3/3B2P1/6r1/8 b - - 17 64",
+    },
+    {
+        "r1bqkbnr/npp1pppp/p7/3P4/4pB2/2N5/PPP2PPP/R2QKBNR w KQkq - 1 6",
+        "r1bqkb1r/npp1pppp/p4n2/3P4/4pB2/2N5/PPP1QPPP/R3KBNR w KQkq - 3 7",
+        "r2qkb1r/npp1pppp/p4n2/3P1b2/4pB2/2N5/PPP1QPPP/2KR1BNR w kq - 5 8",
+        "r2qkb1r/1pp1pppp/p4n2/1n1P1b2/4pB2/2N4P/PPP1QPP1/2KR1BNR w kq - 1 9",
+        "r2qkb1r/1pp1pppp/5n2/1p1P1b2/4pB2/7P/PPP1QPP1/2KR1BNR w kq - 0 10",
+        "r2qkb1r/1ppbpppp/5n2/1Q1P4/4pB2/7P/PPP2PP1/2KR1BNR w kq - 1 11",
+        "3qkb1r/1Qpbpppp/5n2/3P4/4pB2/7P/rPP2PP1/2KR1BNR w k - 0 12",
+        "q3kb1r/1Qpbpppp/5n2/3P4/4pB2/7P/rPP2PP1/1K1R1BNR w k - 2 13",
+        "r3kb1r/2pbpppp/5n2/3P4/4pB2/7P/1PP2PP1/1K1R1BNR w k - 0 14",
+        "r3kb1r/2Bb1ppp/4pn2/3P4/4p3/7P/1PP2PP1/1K1R1BNR w k - 0 15",
+        "r3kb1r/2Bb2pp/4pn2/8/4p3/7P/1PP2PP1/1K1R1BNR w k - 0 16",
+        "r3k2r/2Bb2pp/4pn2/2b5/4p3/7P/1PP1NPP1/1K1R1B1R w k - 2 17",
+        "r6r/2Bbk1pp/4pn2/2b5/3Np3/7P/1PP2PP1/1K1R1B1R w - - 4 18",
+        "r6r/b2bk1pp/4pn2/4B3/3Np3/7P/1PP2PP1/1K1R1B1R w - - 6 19",
+        "r1r5/b2bk1pp/4pn2/4B3/2BNp3/7P/1PP2PP1/1K1R3R w - - 8 20",
+        "r7/b2bk1pp/4pn2/2r1B3/2BNp3/1P5P/2P2PP1/1K1R3R w - - 1 21",
+        "rb6/3bk1pp/4pn2/2r1B3/2BNpP2/1P5P/2P3P1/1K1R3R w - - 1 22",
+        "1r6/3bk1pp/4pn2/2r5/2BNpP2/1P5P/2P3P1/1K1R3R w - - 0 23",
+        "1r6/3bk1p1/4pn1p/2r5/2BNpP2/1P5P/2P3P1/2KR3R w - - 0 24",
+        "8/3bk1p1/1r2pn1p/2r5/2BNpP1P/1P6/2P3P1/2KR3R w - - 1 25",
+        "8/3bk3/1r2pnpp/2r5/2BNpP1P/1P6/2P3P1/2K1R2R w - - 0 26",
+        "2b5/4k3/1r2pnpp/2r5/2BNpP1P/1P4P1/2P5/2K1R2R w - - 1 27",
+        "8/1b2k3/1r2pnpp/2r5/2BNpP1P/1P4P1/2P5/2K1R1R1 w - - 3 28",
+        "8/1b1nk3/1r2p1pp/2r5/2BNpPPP/1P6/2P5/2K1R1R1 w - - 1 29",
+        "8/1b2k3/1r2p1pp/2r1nP2/2BNp1PP/1P6/2P5/2K1R1R1 w - - 1 30",
+        "8/1b2k3/1r2p1p1/2r1nPp1/2BNp2P/1P6/2P5/2K1R1R1 w - - 0 31",
+        "8/1b2k3/1r2p1n1/2r3p1/2BNp2P/1P6/2P5/2K1R1R1 w - - 0 32",
+        "8/1b2k3/1r2p1n1/6r1/2BNp2P/1P6/2P5/2K1R3 w - - 0 33",
+        "8/1b2k3/1r2p3/4n1P1/2BNp3/1P6/2P5/2K1R3 w - - 1 34",
+        "8/1b2k3/1r2p3/4n1P1/2BN4/1P2p3/2P5/2K4R w - - 0 35",
+        "8/1b2k3/1r2p2R/6P1/2nN4/1P2p3/2P5/2K5 w - - 0 36",
+        "8/1b2k3/3rp2R/6P1/2PN4/4p3/2P5/2K5 w - - 1 37",
+        "8/4k3/3rp2R/6P1/2PN4/2P1p3/6b1/2K5 w - - 1 38",
+        "8/4k3/r3p2R/2P3P1/3N4/2P1p3/6b1/2K5 w - - 1 39",
+        "8/3k4/r3p2R/2P2NP1/8/2P1p3/6b1/2K5 w - - 3 40",
+        "8/3k4/4p2R/2P3P1/8/2P1N3/6b1/r1K5 w - - 1 41",
+        "8/3k4/4p2R/2P3P1/8/2P1N3/3K2b1/6r1 w - - 3 42",
+        "8/3k4/4p2R/2P3P1/8/2PKNb2/8/6r1 w - - 5 43",
+        "8/4k3/4p1R1/2P3P1/8/2PKNb2/8/6r1 w - - 7 44",
+        "8/4k3/4p1R1/2P3P1/3K4/2P1N3/8/6rb w - - 9 45",
+        "8/3k4/4p1R1/2P1K1P1/8/2P1N3/8/6rb w - - 11 46",
+        "8/3k4/4p1R1/2P3P1/5K2/2P1N3/8/4r2b w - - 13 47",
+        "8/3k4/2b1p2R/2P3P1/5K2/2P1N3/8/4r3 w - - 15 48",
+        "8/3k4/2b1p3/2P3P1/5K2/2P1N2R/8/6r1 w - - 17 49",
+        "2k5/7R/2b1p3/2P3P1/5K2/2P1N3/8/6r1 w - - 19 50",
+        "2k5/7R/4p3/2P3P1/b1P2K2/4N3/8/6r1 w - - 1 51",
+        "2k5/3bR3/4p3/2P3P1/2P2K2/4N3/8/6r1 w - - 3 52",
+        "3k4/3b2R1/4p3/2P3P1/2P2K2/4N3/8/6r1 w - - 5 53",
+        "3kb3/6R1/4p1P1/2P5/2P2K2/4N3/8/6r1 w - - 1 54",
+        "3kb3/6R1/4p1P1/2P5/2P2KN1/8/8/2r5 w - - 3 55",
+        "3kb3/6R1/4p1P1/2P1N3/2P2K2/8/8/5r2 w - - 5 56",
+        "3kb3/6R1/4p1P1/2P1N3/2P5/4K3/8/4r3 w - - 7 57",
+    },
+    {
+        "rnbq1rk1/ppp1npb1/4p1p1/3P3p/3PP3/2N2N2/PP2BPPP/R1BQ1RK1 b - - 0 8",
+        "rnbq1rk1/ppp1npb1/6p1/3pP2p/3P4/2N2N2/PP2BPPP/R1BQ1RK1 b - - 0 9",
+        "rn1q1rk1/ppp1npb1/6p1/3pP2p/3P2b1/2N2N2/PP2BPPP/R1BQR1K1 b - - 2 10",
+        "r2q1rk1/ppp1npb1/2n3p1/3pP2p/3P2bN/2N5/PP2BPPP/R1BQR1K1 b - - 4 11",
+        "r4rk1/pppqnpb1/2n3p1/3pP2p/3P2bN/2N4P/PP2BPP1/R1BQR1K1 b - - 0 12",
+        "r4rk1/pppqnpb1/2n3p1/3pP2p/3P3N/7P/PP2NPP1/R1BQR1K1 b - - 0 13",
+        "r4rk1/pppq1pb1/2n3p1/3pPN1p/3P4/7P/PP2NPP1/R1BQR1K1 b - - 0 14",
+        "r4rk1/ppp2pb1/2n3p1/3pPq1p/3P1N2/7P/PP3PP1/R1BQR1K1 b - - 1 15",
+        "r4rk1/pppq1pb1/2n3p1/3pP2p/P2P1N2/7P/1P3PP1/R1BQR1K1 b - - 0 16",
+        "r2n1rk1/pppq1pb1/6p1/3pP2p/P2P1N2/R6P/1P3PP1/2BQR1K1 b - - 2 17",
+        "r4rk1/pppq1pb1/4N1p1/3pP2p/P2P4/R6P/1P3PP1/2BQR1K1 b - - 0 18",
+        "r4rk1/ppp2pb1/4q1p1/3pP1Bp/P2P4/R6P/1P3PP1/3QR1K1 b - - 1 19",
+        "r3r1k1/ppp2pb1/4q1p1/3pP1Bp/P2P1P2/R6P/1P4P1/3QR1K1 b - - 0 20",
+        "r3r1k1/ppp3b1/4qpp1/3pP2p/P2P1P1B/R6P/1P4P1/3QR1K1 b - - 1 21",
+        "r3r1k1/ppp3b1/4q1p1/3pP2p/P4P1B/R6P/1P4P1/3QR1K1 b - - 0 22",
+        "r4rk1/ppp3b1/4q1p1/3pP1Bp/P4P2/R6P/1P4P1/3QR1K1 b - - 2 23",
+        "r4rk1/pp4b1/4q1p1/2ppP1Bp/P4P2/3R3P/1P4P1/3QR1K1 b - - 1 24",
+        "r4rk1/pp4b1/4q1p1/2p1P1Bp/P2p1PP1/3R3P/1P6/3QR1K1 b - - 0 25",
+        "r4rk1/pp4b1/4q1p1/2p1P1B1/P2p1PP1/3R4/1P6/3QR1K1 b - - 0 26",
+        "r5k1/pp3rb1/4q1p1/2p1P1B1/P2p1PP1/6R1/1P6/3QR1K1 b - - 2 27",
+        "5rk1/pp3rb1/4q1p1/2p1P1B1/P2pRPP1/6R1/1P6/3Q2K1 b - - 4 28",
+        "5rk1/1p3rb1/p3q1p1/P1p1P1B1/3pRPP1/6R1/1P6/3Q2K1 b - - 0 29",
+        "4r1k1/1p3rb1/p3q1p1/P1p1P1B1/3pRPP1/1P4R1/8/3Q2K1 b - - 0 30",
+        "4r1k1/5rb1/pP2q1p1/2p1P1B1/3pRPP1/1P4R1/8/3Q2K1 b - - 0 31",
+        "4r1k1/5rb1/pq4p1/2p1P1B1/3pRPP1/1P4R1/4Q3/6K1 b - - 1 32",
+        "4r1k1/1r4b1/pq4p1/2p1P1B1/3pRPP1/1P4R1/2Q5/6K1 b - - 3 33",
+        "4r1k1/1r4b1/1q4p1/p1p1P1B1/3p1PP1/1P4R1/2Q5/4R1K1 b - - 1 34",
+        "4r1k1/3r2b1/1q4p1/p1p1P1B1/2Qp1PP1/1P4R1/8/4R1K1 b - - 3 35",
+        "4r1k1/3r2b1/4q1p1/p1p1P1B1/2Qp1PP1/1P4R1/5K2/4R3 b - - 5 36",
+        "4r1k1/3r2b1/6p1/p1p1P1B1/2Pp1PP1/6R1/5K2/4R3 b - - 0 37",
+        "4r1k1/3r2b1/6p1/p1p1P1B1/2P2PP1/3p2R1/5K2/3R4 b - - 1 38",
+        "5rk1/3r2b1/6p1/p1p1P1B1/2P2PP1/3p2R1/8/3RK3 b - - 3 39",
+        "5rk1/6b1/6p1/p1p1P1B1/2Pr1PP1/3R4/8/3RK3 b - - 0 40",
+        "5rk1/3R2b1/6p1/p1p1P1B1/2r2PP1/8/8/3RK3 b - - 1 41",
+        "5rk1/3R2b1/6p1/p1p1P1B1/4rPP1/8/3K4/3R4 b - - 3 42",
+        "1r4k1/3R2b1/6p1/p1p1P1B1/4rPP1/2K5/8/3R4 b - - 5 43",
+        "1r4k1/3R2b1/6p1/p1p1P1B1/2K2PP1/4r3/8/3R4 b - - 7 44",
+        "1r3bk1/8/3R2p1/p1p1P1B1/2K2PP1/4r3/8/3R4 b - - 9 45",
+        "1r3bk1/8/6R1/2p1P1B1/p1K2PP1/4r3/8/3R4 b - - 0 46",
+        "1r3b2/5k2/R7/2p1P1B1/p1K2PP1/4r3/8/3R4 b - - 2 47",
+        "5b2/1r3k2/R7/2p1P1B1/p1K2PP1/4r3/8/7R b - - 4 48",
+        "5b2/5k2/R7/2pKP1B1/pr3PP1/4r3/8/7R b - - 6 49",
+        "5b2/5k2/R1K5/2p1P1B1/p2r1PP1/4r3/8/7R b - - 8 50",
+        "8/R4kb1/2K5/2p1P1B1/p2r1PP1/4r3/8/7R b - - 10 51",
+        "8/R5b1/2K3k1/2p1PPB1/p2r2P1/4r3/8/7R b - - 0 52",
+        "8/6R1/2K5/2p1PPk1/p2r2P1/4r3/8/7R b - - 0 53",
+        "8/6R1/2K5/2p1PP2/p2r1kP1/4r3/8/5R2 b - - 2 54",
+        "8/6R1/2K2P2/2p1P3/p2r2P1/4r1k1/8/5R2 b - - 0 55",
+        "8/5PR1/2K5/2p1P3/p2r2P1/4r3/6k1/5R2 b - - 0 56",
+    },
+    {
+        "rn1qkb1r/p1pbpppp/5n2/8/2pP4/2N5/1PQ1PPPP/R1B1KBNR w KQkq - 0 7",
+        "r2qkb1r/p1pbpppp/2n2n2/8/2pP4/2N2N2/1PQ1PPPP/R1B1KB1R w KQkq - 2 8",
+        "r2qkb1r/p1pbpppp/5n2/8/1npPP3/2N2N2/1PQ2PPP/R1B1KB1R w KQkq - 1 9",
+        "r2qkb1r/p1pb1ppp/4pn2/8/1npPP3/2N2N2/1P3PPP/R1BQKB1R w KQkq - 0 10",
+        "r2qk2r/p1pbbppp/4pn2/8/1nBPP3/2N2N2/1P3PPP/R1BQK2R w KQkq - 1 11",
+        "r2q1rk1/p1pbbppp/4pn2/8/1nBPP3/2N2N2/1P3PPP/R1BQ1RK1 w - - 3 12",
+        "r2q1rk1/2pbbppp/p3pn2/8/1nBPPB2/2N2N2/1P3PPP/R2Q1RK1 w - - 0 13",
+        "r2q1rk1/2p1bppp/p3pn2/1b6/1nBPPB2/2N2N2/1P3PPP/R2QR1K1 w - - 2 14",
+        "r2q1rk1/4bppp/p1p1pn2/1b6/1nBPPB2/1PN2N2/5PPP/R2QR1K1 w - - 0 15",
+        "r4rk1/3qbppp/p1p1pn2/1b6/1nBPPB2/1PN2N2/3Q1PPP/R3R1K1 w - - 2 16",
+        "r4rk1/1q2bppp/p1p1pn2/1b6/1nBPPB2/1PN2N1P/3Q1PP1/R3R1K1 w - - 1 17",
+        "r3r1k1/1q2bppp/p1p1pn2/1b6/1nBPPB2/1PN2N1P/4QPP1/R3R1K1 w - - 3 18",
+        "r3r1k1/1q1nbppp/p1p1p3/1b6/1nBPPB2/1PN2N1P/4QPP1/3RR1K1 w - - 5 19",
+        "r3rbk1/1q1n1ppp/p1p1p3/1b6/1nBPPB2/1PN2N1P/3RQPP1/4R1K1 w - - 7 20",
+        "r3rbk1/1q3ppp/pnp1p3/1b6/1nBPPB2/1PN2N1P/3RQPP1/4R2K w - - 9 21",
+        "2r1rbk1/1q3ppp/pnp1p3/1b6/1nBPPB2/1PN2N1P/3RQPP1/1R5K w - - 11 22",
+        "2r1rbk1/1q4pp/pnp1pp2/1b6/1nBPPB2/1PN2N1P/4QPP1/1R1R3K w - - 0 23",
+        "2r1rbk1/5qpp/pnp1pp2/1b6/1nBPP3/1PN1BN1P/4QPP1/1R1R3K w - - 2 24",
+        "2r1rbk1/5qp1/pnp1pp1p/1b6/1nBPP3/1PN1BN1P/4QPP1/1R1R2K1 w - - 0 25",
+        "2r1rbk1/5qp1/pnp1pp1p/1b6/2BPP3/1P2BN1P/n3QPP1/1R1R2K1 w - - 0 26",
+        "r3rbk1/5qp1/pnp1pp1p/1b6/2BPP3/1P2BN1P/Q4PP1/1R1R2K1 w - - 1 27",
+        "rr3bk1/5qp1/pnp1pp1p/1b6/2BPP3/1P2BN1P/Q4PP1/R2R2K1 w - - 3 28",
+        "rr2qbk1/6p1/pnp1pp1p/1b6/2BPP3/1P2BN1P/4QPP1/R2R2K1 w - - 5 29",
+        "rr2qbk1/6p1/1np1pp1p/pb6/2BPP3/1P1QBN1P/5PP1/R2R2K1 w - - 0 30",
+        "rr2qbk1/6p1/1n2pp1p/pp6/3PP3/1P1QBN1P/5PP1/R2R2K1 w - - 0 31",
+        "rr2qbk1/6p1/1n2pp1p/1p1P4/p3P3/1P1QBN1P/5PP1/R2R2K1 w - - 0 32",
+        "rr2qbk1/3n2p1/3Ppp1p/1p6/p3P3/1P1QBN1P/5PP1/R2R2K1 w - - 1 33",
+        "rr3bk1/3n2p1/3Ppp1p/1p5q/pP2P3/3QBN1P/5PP1/R2R2K1 w - - 1 34",
+        "rr3bk1/3n2p1/3Ppp1p/1p5q/1P2P3/p2QBN1P/5PP1/2RR2K1 w - - 0 35",
+        "1r3bk1/3n2p1/r2Ppp1p/1p5q/1P2P3/pQ2BN1P/5PP1/2RR2K1 w - - 2 36",
+        "1r2qbk1/2Rn2p1/r2Ppp1p/1p6/1P2P3/pQ2BN1P/5PP1/3R2K1 w - - 4 37",
+        "1r2qbk1/2Rn2p1/r2Ppp1p/1pB5/1P2P3/1Q3N1P/p4PP1/3R2K1 w - - 0 38",
+        "1r2q1k1/2Rn2p1/r2bpp1p/1pB5/1P2P3/1Q3N1P/p4PP1/R5K1 w - - 0 39",
+        "1r2q1k1/2Rn2p1/3rpp1p/1p6/1P2P3/1Q3N1P/p4PP1/R5K1 w - - 0 40",
+        "2r1q1k1/2Rn2p1/3rpp1p/1p6/1P2P3/5N1P/Q4PP1/R5K1 w - - 1 41",
+        "1r2q1k1/1R1n2p1/3rpp1p/1p6/1P2P3/5N1P/Q4PP1/R5K1 w - - 3 42",
+        "2r1q1k1/2Rn2p1/3rpp1p/1p6/1P2P3/5N1P/Q4PP1/R5K1 w - - 5 43",
+        "1r2q1k1/1R1n2p1/3rpp1p/1p6/1P2P3/5N1P/Q4PP1/R5K1 w - - 7 44",
+        "1rq3k1/R2n2p1/3rpp1p/1p6/1P2P3/5N1P/Q4PP1/R5K1 w - - 9 45",
+        "2q3k1/Rr1n2p1/3rpp1p/1p6/1P2P3/5N1P/4QPP1/R5K1 w - - 11 46",
+        "Rrq3k1/3n2p1/3rpp1p/1p6/1P2P3/5N1P/4QPP1/R5K1 w - - 13 47",
+    },
+    {
+        "rn1qkb1r/1pp2ppp/p4p2/3p1b2/5P2/1P2PN2/P1PP2PP/RN1QKB1R b KQkq - 1 6",
+        "r2qkb1r/1pp2ppp/p1n2p2/3p1b2/3P1P2/1P2PN2/P1P3PP/RN1QKB1R b KQkq - 0 7",
+        "r2qkb1r/1pp2ppp/p4p2/3p1b2/1n1P1P2/1P1BPN2/P1P3PP/RN1QK2R b KQkq - 2 8",
+        "r2qkb1r/1pp2ppp/p4p2/3p1b2/3P1P2/1P1PPN2/P5PP/RN1QK2R b KQkq - 0 9",
+        "r2qk2r/1pp2ppp/p2b1p2/3p1b2/3P1P2/1PNPPN2/P5PP/R2QK2R b KQkq - 2 10",
+        "r2qk2r/1p3ppp/p1pb1p2/3p1b2/3P1P2/1PNPPN2/P5PP/R2Q1RK1 b kq - 1 11",
+        "r2q1rk1/1p3ppp/p1pb1p2/3p1b2/3P1P2/1PNPPN2/P2Q2PP/R4RK1 b - - 3 12",
+        "r2qr1k1/1p3ppp/p1pb1p2/3p1b2/3P1P2/1P1PPN2/P2QN1PP/R4RK1 b - - 5 13",
+        "r3r1k1/1p3ppp/pqpb1p2/3p1b2/3P1P2/1P1PPNN1/P2Q2PP/R4RK1 b - - 7 14",
+        "r3r1k1/1p3ppp/pqp2p2/3p1b2/1b1P1P2/1P1PPNN1/P1Q3PP/R4RK1 b - - 9 15",
+        "r3r1k1/1p1b1ppp/pqp2p2/3p4/1b1P1P2/1P1PPNN1/P4QPP/R4RK1 b - - 11 16",
+        "2r1r1k1/1p1b1ppp/pqp2p2/3p4/1b1PPP2/1P1P1NN1/P4QPP/R4RK1 b - - 0 17",
+        "2r1r1k1/1p1b1ppp/pq3p2/2pp4/1b1PPP2/PP1P1NN1/5QPP/R4RK1 b - - 0 18",
+        "2r1r1k1/1p1b1ppp/pq3p2/2Pp4/4PP2/PPbP1NN1/5QPP/R4RK1 b - - 0 19",
+        "2r1r1k1/1p1b1ppp/p4p2/2Pp4/4PP2/PqbP1NN1/5QPP/RR4K1 b - - 1 20",
+        "2r1r1k1/1p1b1ppp/p4p2/2Pp4/q3PP2/P1bP1NN1/R4QPP/1R4K1 b - - 3 21",
+        "2r1r1k1/1p3ppp/p4p2/1bPP4/q4P2/P1bP1NN1/R4QPP/1R4K1 b - - 0 22",
+        "2r1r1k1/1p3ppp/p4p2/2PP4/q4P2/P1bb1NN1/R4QPP/2R3K1 b - - 1 23",
+        "2r1r1k1/1p3ppp/p2P1p2/2P5/2q2P2/P1bb1NN1/R4QPP/2R3K1 b - - 0 24",
+        "2rr2k1/1p3ppp/p2P1p2/2P5/2q2P2/P1bb1NN1/R4QPP/2R4K b - - 2 25",
+        "2rr2k1/1p3ppp/p2P1p2/2Q5/5P2/P1bb1NN1/R5PP/2R4K b - - 0 26",
+        "3r2k1/1p3ppp/p2P1p2/2r5/5P2/P1bb1N2/R3N1PP/2R4K b - - 1 27",
+        "3r2k1/1p3ppp/p2P1p2/2r5/5P2/P1b2N2/4R1PP/2R4K b - - 0 28",
+        "3r2k1/1p3ppp/p2P1p2/2r5/1b3P2/P4N2/4R1PP/3R3K b - - 2 29",
+        "3r2k1/1p2Rppp/p2P1p2/b1r5/5P2/P4N2/6PP/3R3K b - - 4 30",
+        "3r2k1/1R3ppp/p1rP1p2/b7/5P2/P4N2/6PP/3R3K b - - 0 31",
+        "3r2k1/1R3ppp/p2R1p2/b7/5P2/P4N2/6PP/7K b - - 0 32",
+        "6k1/1R3ppp/p2r1p2/b7/5P2/P4NP1/7P/7K b - - 0 33",
+        "6k1/1R3p1p/p2r1pp1/b7/5P1P/P4NP1/8/7K b - - 0 34",
+        "6k1/3R1p1p/pr3pp1/b7/5P1P/P4NP1/8/7K b - - 2 35",
+        "6k1/5p2/pr3pp1/b2R3p/5P1P/P4NP1/8/7K b - - 1 36",
+        "6k1/5p2/pr3pp1/7p/5P1P/P1bR1NP1/8/7K b - - 3 37",
+        "6k1/5p2/p1r2pp1/7p/5P1P/P1bR1NP1/6K1/8 b - - 5 38",
+        "6k1/5p2/p1r2pp1/b2R3p/5P1P/P4NP1/6K1/8 b - - 7 39",
+        "6k1/5p2/p4pp1/b2R3p/5P1P/P4NPK/2r5/8 b - - 9 40",
+        "6k1/2b2p2/p4pp1/7p/5P1P/P2R1NPK/2r5/8 b - - 11 41",
+        "6k1/2b2p2/5pp1/p6p/3N1P1P/P2R2PK/2r5/8 b - - 1 42",
+        "6k1/2b2p2/5pp1/p6p/3N1P1P/P1R3PK/r7/8 b - - 3 43",
+        "6k1/5p2/1b3pp1/p6p/5P1P/P1R3PK/r1N5/8 b - - 5 44",
+        "8/5pk1/1bR2pp1/p6p/5P1P/P5PK/r1N5/8 b - - 7 45",
+        "3b4/5pk1/2R2pp1/p4P1p/7P/P5PK/r1N5/8 b - - 0 46",
+        "8/4bpk1/2R2pp1/p4P1p/6PP/P6K/r1N5/8 b - - 0 47",
+        "8/5pk1/2R2pP1/p6p/6PP/b6K/r1N5/8 b - - 0 48",
+        "8/6k1/2R2pp1/p6P/7P/b6K/r1N5/8 b - - 0 49",
+        "8/6k1/2R2p2/p6p/7P/b5K1/r1N5/8 b - - 1 50",
+        "8/8/2R2pk1/p6p/7P/b4K2/r1N5/8 b - - 3 51",
+        "8/8/2R2pk1/p6p/7P/4NK2/rb6/8 b - - 5 52",
+        "2R5/8/5pk1/7p/p6P/4NK2/rb6/8 b - - 1 53",
+        "6R1/8/5pk1/7p/p6P/4NK2/1b6/r7 b - - 3 54",
+        "R7/5k2/5p2/7p/p6P/4NK2/1b6/r7 b - - 5 55",
+        "R7/5k2/5p2/7p/7P/p3N3/1b2K3/r7 b - - 1 56",
+        "8/R4k2/5p2/7p/7P/p3N3/1b2K3/7r b - - 3 57",
+        "8/8/5pk1/7p/R6P/p3N3/1b2K3/7r b - - 5 58",
+        "8/8/5pk1/7p/R6P/p7/4K3/2bN3r b - - 7 59",
+        "8/8/5pk1/7p/R6P/p7/4KN1r/2b5 b - - 9 60",
+        "8/8/5pk1/7p/R6P/p3K3/1b3N1r/8 b - - 11 61",
+        "8/8/R4pk1/7p/7P/p1b1K3/5N1r/8 b - - 13 62",
+        "8/8/5pk1/7p/7P/2b1K3/R4N1r/8 b - - 0 63",
+        "8/8/5pk1/7p/3K3P/8/R4N1r/4b3 b - - 2 64",
+    }
+};
+// clang-format on

-/// setup_bench() builds a list of UCI commands to be run by bench. There
-/// are five parameters: TT size in MB, number of search threads that
-/// should be used, the limit value spent for each position, a file name
-/// where to look for positions in FEN format, the type of the limit:
-/// depth, perft, nodes and movetime (in millisecs), and evaluation type
-/// mixed (default), classical, NNUE.
-///
-/// bench -> search default positions up to depth 13
-/// bench 64 1 15 -> search default positions up to depth 15 (TT = 64MB)
-/// bench 64 4 5000 current movetime -> search current position with 4 threads for 5 sec
-/// bench 64 1 100000 default nodes -> search default positions for 100K nodes each
-/// bench 16 1 5 default perft -> run a perft 5 on default positions
+}  // namespace

-vector<string> setup_bench(const Position& current, istream& is) {
+namespace Stockfish::Benchmark {

-  vector<string> fens, list;
-  string go, token;
+// Builds a list of UCI commands to be run by bench. There
+// are five parameters: TT size in MB, number of search threads that
+// should be used, the limit value spent for each position, a file name
+// where to look for positions in FEN format, and the type of the limit:
+// depth, perft, nodes and movetime (in milliseconds). Examples:
+//
+// bench                            : search default positions up to depth 13
+// bench 64 1 15                    : search default positions up to depth 15 (TT = 64MB)
+// bench 64 1 100000 default nodes  : search default positions for 100K nodes each
+// bench 64 4 5000 current movetime : search current position with 4 threads for 5 sec
+// bench 16 1 5 blah perft          : run a perft 5 on positions in file "blah"
+std::vector<std::string> setup_bench(const std::string& currentFen, std::istream& is) {

-  // Assign default values to missing arguments
-  string ttSize    = (is >> token) ? token : "16";
-  string threads   = (is >> token) ? token : "1";
-  string limit     = (is >> token) ? token : "13";
-  string fenFile   = (is >> token) ? token : "default";
-  string limitType = (is >> token) ? token : "depth";
-  string evalType  = (is >> token) ? token : "mixed";
+    std::vector<std::string> fens, list;
+    std::string              go, token;

-  go = limitType == "eval" ? "eval" : "go " + limitType + " " + limit;
+    // Assign default values to missing arguments
+    std::string ttSize    = (is >> token) ? token : "16";
+    std::string threads   = (is >> token) ? token : "1";
+    std::string limit     = (is >> token) ? token : "13";
+    std::string fenFile   = (is >> token) ? token : "default";
+    std::string limitType = (is >> token) ? token : "depth";

-  if (fenFile == "default")
-      fens = Defaults;
+    go = limitType == "eval" ? "eval" : "go " + limitType + " " + limit;

-  else if (fenFile == "current")
-      fens.push_back(current.fen());
+    if (fenFile == "default")
+        fens = Defaults;

-  else
-  {
-      string fen;
-      ifstream file(fenFile);
+    else if (fenFile == "current")
+        fens.push_back(currentFen);

-      if (!file.is_open())
-      {
-          cerr << "Unable to open file " << fenFile << endl;
-          exit(EXIT_FAILURE);
-      }
+    else
+    {
+        std::string   fen;
+        std::ifstream file(fenFile);

-      while (getline(file, fen))
-          if (!fen.empty())
-              fens.push_back(fen);
+        if (!file.is_open())
+        {
+            std::cerr << "Unable to open file " << fenFile << std::endl;
+            exit(EXIT_FAILURE);
+        }

-      file.close();
-  }
+        while (getline(file, fen))
+            if (!fen.empty())
+                fens.push_back(fen);

-  list.emplace_back("setoption name Threads value " + threads);
-  list.emplace_back("setoption name Hash value " + ttSize);
-  list.emplace_back("ucinewgame");
+        file.close();
+    }

-  size_t posCounter = 0;
+    list.emplace_back("setoption name Threads value " + threads);
+    list.emplace_back("setoption name Hash value " + ttSize);
+    list.emplace_back("ucinewgame");

-  for (const string& fen : fens)
-      if (fen.find("setoption") != string::npos)
-          list.emplace_back(fen);
-      else
-      {
-          if (evalType == "classical" || (evalType == "mixed" && posCounter % 2 == 0))
-              list.emplace_back("setoption name Use NNUE value false");
-          else if (evalType == "NNUE" || (evalType == "mixed" && posCounter % 2 != 0))
-              list.emplace_back("setoption name Use NNUE value true");
-          list.emplace_back("position fen " + fen);
-          list.emplace_back(go);
-          ++posCounter;
-      }
+    for (const std::string& fen : fens)
+        if (fen.find("setoption") != std::string::npos)
+            list.emplace_back(fen);
+        else
+        {
+            list.emplace_back("position fen " + fen);
+            list.emplace_back(go);
+        }

-  list.emplace_back("setoption name Use NNUE value true");
-
-  return list;
+    return list;
 }
+
+BenchmarkSetup setup_benchmark(std::istream& is) {
+    // TT_SIZE_PER_THREAD is chosen such that roughly half of the hash is used all positions
+    // for the current sequence have been searched.
+    static constexpr int TT_SIZE_PER_THREAD = 128;
+
+    static constexpr int DEFAULT_DURATION_S = 150;
+
+    BenchmarkSetup setup{};
+
+    // Assign default values to missing arguments
+    int desiredTimeS;
+
+    if (!(is >> setup.threads))
+        setup.threads = get_hardware_concurrency();
+    else
+        setup.originalInvocation += std::to_string(setup.threads);
+
+    if (!(is >> setup.ttSize))
+        setup.ttSize = TT_SIZE_PER_THREAD * setup.threads;
+    else
+        setup.originalInvocation += " " + std::to_string(setup.ttSize);
+
+    if (!(is >> desiredTimeS))
+        desiredTimeS = DEFAULT_DURATION_S;
+    else
+        setup.originalInvocation += " " + std::to_string(desiredTimeS);
+
+    setup.filledInvocation += std::to_string(setup.threads) + " " + std::to_string(setup.ttSize)
+                            + " " + std::to_string(desiredTimeS);
+
+    auto getCorrectedTime = [&](int ply) {
+        // time per move is fit roughly based on LTC games
+        // seconds = 50/{ply+15}
+        // ms = 50000/{ply+15}
+        // with this fit 10th move gets 2000ms
+        // adjust for desired 10th move time
+        return 50000.0 / (static_cast<double>(ply) + 15.0);
+    };
+
+    float totalTime = 0;
+    for (const auto& game : BenchmarkPositions)
+    {
+        setup.commands.emplace_back("ucinewgame");
+        int ply = 1;
+        for (int i = 0; i < static_cast<int>(game.size()); ++i)
+        {
+            const float correctedTime = getCorrectedTime(ply);
+            totalTime += correctedTime;
+            ply += 1;
+        }
+    }
+
+    float timeScaleFactor = static_cast<float>(desiredTimeS * 1000) / totalTime;
+
+    for (const auto& game : BenchmarkPositions)
+    {
+        setup.commands.emplace_back("ucinewgame");
+        int ply = 1;
+        for (const std::string& fen : game)
+        {
+            setup.commands.emplace_back("position fen " + fen);
+
+            const int correctedTime = static_cast<int>(getCorrectedTime(ply) * timeScaleFactor);
+            setup.commands.emplace_back("go movetime " + std::to_string(correctedTime));
+
+            ply += 1;
+        }
+    }
+
+    return setup;
+}
+
+}  // namespace Stockfish
@@ -1,6 +1,6 @@
 /*
  Stockfish, a UCI chess playing engine derived from Glaurung 2.1
-  Copyright (C) 2004-2021 The Stockfish developers (see AUTHORS file)
+  Copyright (C) 2004-2025 The Stockfish developers (see AUTHORS file)

  Stockfish is free software: you can redistribute it and/or modify
  it under the terms of the GNU General Public License as published by
@@ -16,23 +16,27 @@
  along with this program.  If not, see <http://www.gnu.org/licenses/>.
 */

+#ifndef BENCHMARK_H_INCLUDED
+#define BENCHMARK_H_INCLUDED

-#ifndef PSQT_H_INCLUDED
-#define PSQT_H_INCLUDED
+#include <iosfwd>
+#include <string>
+#include <vector>

+namespace Stockfish::Benchmark {

-#include "types.h"
+std::vector<std::string> setup_bench(const std::string&, std::istream&);

+struct BenchmarkSetup {
+    int                      ttSize;
+    int                      threads;
+    std::vector<std::string> commands;
+    std::string              originalInvocation;
+    std::string              filledInvocation;
+};

-namespace PSQT
-{
+BenchmarkSetup setup_benchmark(std::istream&);

-extern Score psq[PIECE_NB][SQUARE_NB];
+}  // namespace Stockfish

-// Fill psqt array from a set of internally linked parameters
-extern void init();
-
-} // namespace PSQT
-
-
-#endif // PSQT_H_INCLUDED
+#endif  // #ifndef BENCHMARK_H_INCLUDED
@@ -1,170 +0,0 @@
-/*
-  Stockfish, a UCI chess playing engine derived from Glaurung 2.1
-  Copyright (C) 2004-2021 The Stockfish developers (see AUTHORS file)
-
-  Stockfish is free software: you can redistribute it and/or modify
-  it under the terms of the GNU General Public License as published by
-  the Free Software Foundation, either version 3 of the License, or
-  (at your option) any later version.
-
-  Stockfish is distributed in the hope that it will be useful,
-  but WITHOUT ANY WARRANTY; without even the implied warranty of
-  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-  GNU General Public License for more details.
-
-  You should have received a copy of the GNU General Public License
-  along with this program.  If not, see <http://www.gnu.org/licenses/>.
-*/
-
-#include <cassert>
-#include <vector>
-#include <bitset>
-
-#include "bitboard.h"
-#include "types.h"
-
-namespace {
-
-  // There are 24 possible pawn squares: files A to D and ranks from 2 to 7.
-  // Positions with the pawn on files E to H will be mirrored before probing.
-  constexpr unsigned MAX_INDEX = 2*24*64*64; // stm * psq * wksq * bksq = 196608
-
-  std::bitset<MAX_INDEX> KPKBitbase;
-
-  // A KPK bitbase index is an integer in [0, IndexMax] range
-  //
-  // Information is mapped in a way that minimizes the number of iterations:
-  //
-  // bit  0- 5: white king square (from SQ_A1 to SQ_H8)
-  // bit  6-11: black king square (from SQ_A1 to SQ_H8)
-  // bit    12: side to move (WHITE or BLACK)
-  // bit 13-14: white pawn file (from FILE_A to FILE_D)
-  // bit 15-17: white pawn RANK_7 - rank (from RANK_7 - RANK_7 to RANK_7 - RANK_2)
-  unsigned index(Color stm, Square bksq, Square wksq, Square psq) {
-    return int(wksq) | (bksq << 6) | (stm << 12) | (file_of(psq) << 13) | ((RANK_7 - rank_of(psq)) << 15);
-  }
-
-  enum Result {
-    INVALID = 0,
-    UNKNOWN = 1,
-    DRAW    = 2,
-    WIN     = 4
-  };
-
-  Result& operator|=(Result& r, Result v) { return r = Result(r | v); }
-
-  struct KPKPosition {
-    KPKPosition() = default;
-    explicit KPKPosition(unsigned idx);
-    operator Result() const { return result; }
-    Result classify(const std::vector<KPKPosition>& db);
-
-    Color stm;
-    Square ksq[COLOR_NB], psq;
-    Result result;
-  };
-
-} // namespace
-
-
-bool Bitbases::probe(Square wksq, Square wpsq, Square bksq, Color stm) {
-
-  assert(file_of(wpsq) <= FILE_D);
-
-  return KPKBitbase[index(stm, bksq, wksq, wpsq)];
-}
-
-
-void Bitbases::init() {
-
-  std::vector<KPKPosition> db(MAX_INDEX);
-  unsigned idx, repeat = 1;
-
-  // Initialize db with known win / draw positions
-  for (idx = 0; idx < MAX_INDEX; ++idx)
-      db[idx] = KPKPosition(idx);
-
-  // Iterate through the positions until none of the unknown positions can be
-  // changed to either wins or draws (15 cycles needed).
-  while (repeat)
-      for (repeat = idx = 0; idx < MAX_INDEX; ++idx)
-          repeat |= (db[idx] == UNKNOWN && db[idx].classify(db) != UNKNOWN);
-
-  // Fill the bitbase with the decisive results
-  for (idx = 0; idx < MAX_INDEX; ++idx)
-      if (db[idx] == WIN)
-          KPKBitbase.set(idx);
-}
-
-
-namespace {
-
-  KPKPosition::KPKPosition(unsigned idx) {
-
-    ksq[WHITE] = Square((idx >>  0) & 0x3F);
-    ksq[BLACK] = Square((idx >>  6) & 0x3F);
-    stm        = Color ((idx >> 12) & 0x01);
-    psq        = make_square(File((idx >> 13) & 0x3), Rank(RANK_7 - ((idx >> 15) & 0x7)));
-
-    // Invalid if two pieces are on the same square or if a king can be captured
-    if (   distance(ksq[WHITE], ksq[BLACK]) <= 1
-        || ksq[WHITE] == psq
-        || ksq[BLACK] == psq
-        || (stm == WHITE && (pawn_attacks_bb(WHITE, psq) & ksq[BLACK])))
-        result = INVALID;
-
-    // Win if the pawn can be promoted without getting captured
-    else if (   stm == WHITE
-             && rank_of(psq) == RANK_7
-             && ksq[WHITE] != psq + NORTH
-             && (    distance(ksq[BLACK], psq + NORTH) > 1
-                 || (distance(ksq[WHITE], psq + NORTH) == 1)))
-        result = WIN;
-
-    // Draw if it is stalemate or the black king can capture the pawn
-    else if (   stm == BLACK
-             && (  !(attacks_bb<KING>(ksq[BLACK]) & ~(attacks_bb<KING>(ksq[WHITE]) | pawn_attacks_bb(WHITE, psq)))
-                 || (attacks_bb<KING>(ksq[BLACK]) & ~attacks_bb<KING>(ksq[WHITE]) & psq)))
-        result = DRAW;
-
-    // Position will be classified later
-    else
-        result = UNKNOWN;
-  }
-
-  Result KPKPosition::classify(const std::vector<KPKPosition>& db) {
-
-    // White to move: If one move leads to a position classified as WIN, the result
-    // of the current position is WIN. If all moves lead to positions classified
-    // as DRAW, the current position is classified as DRAW, otherwise the current
-    // position is classified as UNKNOWN.
-    //
-    // Black to move: If one move leads to a position classified as DRAW, the result
-    // of the current position is DRAW. If all moves lead to positions classified
-    // as WIN, the position is classified as WIN, otherwise the current position is
-    // classified as UNKNOWN.
-    const Result Good = (stm == WHITE ? WIN   : DRAW);
-    const Result Bad  = (stm == WHITE ? DRAW  : WIN);
-
-    Result r = INVALID;
-    Bitboard b = attacks_bb<KING>(ksq[stm]);
-
-    while (b)
-        r |= stm == WHITE ? db[index(BLACK, ksq[BLACK] , pop_lsb(&b), psq)]
-                          : db[index(WHITE, pop_lsb(&b),  ksq[WHITE], psq)];
-
-    if (stm == WHITE)
-    {
-        if (rank_of(psq) < RANK_7)      // Single push
-            r |= db[index(BLACK, ksq[BLACK], ksq[WHITE], psq + NORTH)];
-
-        if (   rank_of(psq) == RANK_2   // Double push
-            && psq + NORTH != ksq[WHITE]
-            && psq + NORTH != ksq[BLACK])
-            r |= db[index(BLACK, ksq[BLACK], ksq[WHITE], psq + NORTH + NORTH)];
-    }
-
-    return result = r & Good  ? Good  : r & UNKNOWN ? UNKNOWN : Bad;
-  }
-
-} // namespace
@@ -1,6 +1,6 @@
 /*
  Stockfish, a UCI chess playing engine derived from Glaurung 2.1
-  Copyright (C) 2004-2021 The Stockfish developers (see AUTHORS file)
+  Copyright (C) 2004-2025 The Stockfish developers (see AUTHORS file)

  Stockfish is free software: you can redistribute it and/or modify
  it under the terms of the GNU General Public License as published by
@@ -16,158 +16,172 @@
  along with this program.  If not, see <http://www.gnu.org/licenses/>.
 */

+#include "bitboard.h"
+
 #include <algorithm>
 #include <bitset>
+#include <initializer_list>

-#include "bitboard.h"
 #include "misc.h"

+namespace Stockfish {
+
 uint8_t PopCnt16[1 << 16];
 uint8_t SquareDistance[SQUARE_NB][SQUARE_NB];

-Bitboard SquareBB[SQUARE_NB];
 Bitboard LineBB[SQUARE_NB][SQUARE_NB];
+Bitboard BetweenBB[SQUARE_NB][SQUARE_NB];
 Bitboard PseudoAttacks[PIECE_TYPE_NB][SQUARE_NB];
 Bitboard PawnAttacks[COLOR_NB][SQUARE_NB];

-Magic RookMagics[SQUARE_NB];
-Magic BishopMagics[SQUARE_NB];
+alignas(64) Magic Magics[SQUARE_NB][2];

 namespace {

-  Bitboard RookTable[0x19000];  // To store rook attacks
-  Bitboard BishopTable[0x1480]; // To store bishop attacks
+Bitboard RookTable[0x19000];   // To store rook attacks
+Bitboard BishopTable[0x1480];  // To store bishop attacks

-  void init_magics(PieceType pt, Bitboard table[], Magic magics[]);
+void init_magics(PieceType pt, Bitboard table[], Magic magics[][2]);

-}
-
-
-/// safe_destination() returns the bitboard of target square for the given step
-/// from the given square. If the step is off the board, returns empty bitboard.
-
-inline Bitboard safe_destination(Square s, int step) {
+// Returns the bitboard of target square for the given step
+// from the given square. If the step is off the board, returns empty bitboard.
+Bitboard safe_destination(Square s, int step) {
    Square to = Square(s + step);
    return is_ok(to) && distance(s, to) <= 2 ? square_bb(to) : Bitboard(0);
 }
+}

+// Returns an ASCII representation of a bitboard suitable
+// to be printed to standard output. Useful for debugging.
+std::string Bitboards::pretty(Bitboard b) {

-/// Bitboards::pretty() returns an ASCII representation of a bitboard suitable
-/// to be printed to standard output. Useful for debugging.
+    std::string s = "+---+---+---+---+---+---+---+---+\n";

-const std::string Bitboards::pretty(Bitboard b) {
+    for (Rank r = RANK_8; r >= RANK_1; --r)
+    {
+        for (File f = FILE_A; f <= FILE_H; ++f)
+            s += b & make_square(f, r) ? "| X " : "|   ";

-  std::string s = "+---+---+---+---+---+---+---+---+\n";
+        s += "| " + std::to_string(1 + r) + "\n+---+---+---+---+---+---+---+---+\n";
+    }
+    s += "  a   b   c   d   e   f   g   h\n";

-  for (Rank r = RANK_8; r >= RANK_1; --r)
-  {
-      for (File f = FILE_A; f <= FILE_H; ++f)
-          s += b & make_square(f, r) ? "| X " : "|   ";
-
-      s += "| " + std::to_string(1 + r) + "\n+---+---+---+---+---+---+---+---+\n";
-  }
-  s += "  a   b   c   d   e   f   g   h\n";
-
-  return s;
+    return s;
 }


-/// Bitboards::init() initializes various bitboard tables. It is called at
-/// startup and relies on global objects to be already zero-initialized.
-
+// Initializes various bitboard tables. It is called at
+// startup and relies on global objects to be already zero-initialized.
 void Bitboards::init() {

-  for (unsigned i = 0; i < (1 << 16); ++i)
-      PopCnt16[i] = uint8_t(std::bitset<16>(i).count());
+    for (unsigned i = 0; i < (1 << 16); ++i)
+        PopCnt16[i] = uint8_t(std::bitset<16>(i).count());

-  for (Square s = SQ_A1; s <= SQ_H8; ++s)
-      SquareBB[s] = (1ULL << s);
+    for (Square s1 = SQ_A1; s1 <= SQ_H8; ++s1)
+        for (Square s2 = SQ_A1; s2 <= SQ_H8; ++s2)
+            SquareDistance[s1][s2] = std::max(distance<File>(s1, s2), distance<Rank>(s1, s2));

-  for (Square s1 = SQ_A1; s1 <= SQ_H8; ++s1)
-      for (Square s2 = SQ_A1; s2 <= SQ_H8; ++s2)
-          SquareDistance[s1][s2] = std::max(distance<File>(s1, s2), distance<Rank>(s1, s2));
+    init_magics(ROOK, RookTable, Magics);
+    init_magics(BISHOP, BishopTable, Magics);

-  init_magics(ROOK, RookTable, RookMagics);
-  init_magics(BISHOP, BishopTable, BishopMagics);
+    for (Square s1 = SQ_A1; s1 <= SQ_H8; ++s1)
+    {
+        PawnAttacks[WHITE][s1] = pawn_attacks_bb<WHITE>(square_bb(s1));
+        PawnAttacks[BLACK][s1] = pawn_attacks_bb<BLACK>(square_bb(s1));

-  for (Square s1 = SQ_A1; s1 <= SQ_H8; ++s1)
-  {
-      PawnAttacks[WHITE][s1] = pawn_attacks_bb<WHITE>(square_bb(s1));
-      PawnAttacks[BLACK][s1] = pawn_attacks_bb<BLACK>(square_bb(s1));
+        for (int step : {-9, -8, -7, -1, 1, 7, 8, 9})
+            PseudoAttacks[KING][s1] |= safe_destination(s1, step);

-      for (int step : {-9, -8, -7, -1, 1, 7, 8, 9} )
-         PseudoAttacks[KING][s1] |= safe_destination(s1, step);
+        for (int step : {-17, -15, -10, -6, 6, 10, 15, 17})
+            PseudoAttacks[KNIGHT][s1] |= safe_destination(s1, step);

-      for (int step : {-17, -15, -10, -6, 6, 10, 15, 17} )
-         PseudoAttacks[KNIGHT][s1] |= safe_destination(s1, step);
+        PseudoAttacks[QUEEN][s1] = PseudoAttacks[BISHOP][s1] = attacks_bb<BISHOP>(s1, 0);
+        PseudoAttacks[QUEEN][s1] |= PseudoAttacks[ROOK][s1]  = attacks_bb<ROOK>(s1, 0);

-      PseudoAttacks[QUEEN][s1]  = PseudoAttacks[BISHOP][s1] = attacks_bb<BISHOP>(s1, 0);
-      PseudoAttacks[QUEEN][s1] |= PseudoAttacks[  ROOK][s1] = attacks_bb<  ROOK>(s1, 0);
-
-      for (PieceType pt : { BISHOP, ROOK })
-          for (Square s2 = SQ_A1; s2 <= SQ_H8; ++s2)
-              if (PseudoAttacks[pt][s1] & s2)
-                  LineBB[s1][s2] = (attacks_bb(pt, s1, 0) & attacks_bb(pt, s2, 0)) | s1 | s2;
-  }
+        for (PieceType pt : {BISHOP, ROOK})
+            for (Square s2 = SQ_A1; s2 <= SQ_H8; ++s2)
+            {
+                if (PseudoAttacks[pt][s1] & s2)
+                {
+                    LineBB[s1][s2] = (attacks_bb(pt, s1, 0) & attacks_bb(pt, s2, 0)) | s1 | s2;
+                    BetweenBB[s1][s2] =
+                      (attacks_bb(pt, s1, square_bb(s2)) & attacks_bb(pt, s2, square_bb(s1)));
+                }
+                BetweenBB[s1][s2] |= s2;
+            }
+    }
 }

-
 namespace {

-  Bitboard sliding_attack(PieceType pt, Square sq, Bitboard occupied) {
+Bitboard sliding_attack(PieceType pt, Square sq, Bitboard occupied) {

-    Bitboard attacks = 0;
-    Direction   RookDirections[4] = {NORTH, SOUTH, EAST, WEST};
+    Bitboard  attacks             = 0;
+    Direction RookDirections[4]   = {NORTH, SOUTH, EAST, WEST};
    Direction BishopDirections[4] = {NORTH_EAST, SOUTH_EAST, SOUTH_WEST, NORTH_WEST};

    for (Direction d : (pt == ROOK ? RookDirections : BishopDirections))
    {
        Square s = sq;
-        while(safe_destination(s, d) && !(occupied & s))
+        while (safe_destination(s, d))
+        {
            attacks |= (s += d);
+            if (occupied & s)
+            {
+                break;
+            }
+        }
    }

    return attacks;
-  }
+}


-  // init_magics() computes all rook and bishop attacks at startup. Magic
-  // bitboards are used to look up attacks of sliding pieces. As a reference see
-  // www.chessprogramming.org/Magic_Bitboards. In particular, here we use the so
-  // called "fancy" approach.
-
-  void init_magics(PieceType pt, Bitboard table[], Magic magics[]) {
+// Computes all rook and bishop attacks at startup. Magic
+// bitboards are used to look up attacks of sliding pieces. As a reference see
+// https://www.chessprogramming.org/Magic_Bitboards. In particular, here we use
+// the so called "fancy" approach.
+void init_magics(PieceType pt, Bitboard table[], Magic magics[][2]) {

+#ifndef USE_PEXT
    // Optimal PRNG seeds to pick the correct magics in the shortest time
-    int seeds[][RANK_NB] = { { 8977, 44560, 54343, 38998,  5731, 95205, 104912, 17020 },
-                             {  728, 10316, 55013, 32803, 12281, 15100,  16645,   255 } };
+    int seeds[][RANK_NB] = {{8977, 44560, 54343, 38998, 5731, 95205, 104912, 17020},
+                            {728, 10316, 55013, 32803, 12281, 15100, 16645, 255}};

-    Bitboard occupancy[4096], reference[4096], edges, b;
-    int epoch[4096] = {}, cnt = 0, size = 0;
+    Bitboard occupancy[4096];
+    int      epoch[4096] = {}, cnt = 0;
+#endif
+    Bitboard reference[4096];
+    int      size = 0;

    for (Square s = SQ_A1; s <= SQ_H8; ++s)
    {
        // Board edges are not considered in the relevant occupancies
-        edges = ((Rank1BB | Rank8BB) & ~rank_bb(s)) | ((FileABB | FileHBB) & ~file_bb(s));
+        Bitboard edges = ((Rank1BB | Rank8BB) & ~rank_bb(s)) | ((FileABB | FileHBB) & ~file_bb(s));

        // Given a square 's', the mask is the bitboard of sliding attacks from
        // 's' computed on an empty board. The index must be big enough to contain
        // all the attacks for each possible subset of the mask and so is 2 power
        // the number of 1s of the mask. Hence we deduce the size of the shift to
        // apply to the 64 or 32 bits word to get the index.
-        Magic& m = magics[s];
-        m.mask  = sliding_attack(pt, s, 0) & ~edges;
+        Magic& m = magics[s][pt - BISHOP];
+        m.mask   = sliding_attack(pt, s, 0) & ~edges;
+#ifndef USE_PEXT
        m.shift = (Is64Bit ? 64 : 32) - popcount(m.mask);
-
+#endif
        // Set the offset for the attacks table of the square. We have individual
        // table sizes for each square with "Fancy Magic Bitboards".
-        m.attacks = s == SQ_A1 ? table : magics[s - 1].attacks + size;
+        m.attacks = s == SQ_A1 ? table : magics[s - 1][pt - BISHOP].attacks + size;
+        size      = 0;

        // Use Carry-Rippler trick to enumerate all subsets of masks[s] and
        // store the corresponding sliding attack bitboard in reference[].
-        b = size = 0;
-        do {
+        Bitboard b = 0;
+        do
+        {
+#ifndef USE_PEXT
            occupancy[size] = b;
+#endif
            reference[size] = sliding_attack(pt, s, b);

            if (HasPext)
@@ -177,16 +191,14 @@ namespace {
            b = (b - m.mask) & m.mask;
        } while (b);

-        if (HasPext)
-            continue;
-
+#ifndef USE_PEXT
        PRNG rng(seeds[Is64Bit][rank_of(s)]);

        // Find a magic for square 's' picking up an (almost) random number
        // until we find the one that passes the verification test.
-        for (int i = 0; i < size; )
+        for (int i = 0; i < size;)
        {
-            for (m.magic = 0; popcount((m.magic * m.mask) >> 56) < 6; )
+            for (m.magic = 0; popcount((m.magic * m.mask) >> 56) < 6;)
                m.magic = rng.sparse_rand<Bitboard>();

            // A good magic must map every possible occupancy to an index that
@@ -201,13 +213,16 @@ namespace {

                if (epoch[idx] < cnt)
                {
-                    epoch[idx] = cnt;
+                    epoch[idx]     = cnt;
                    m.attacks[idx] = reference[i];
                }
                else if (m.attacks[idx] != reference[i])
                    break;
            }
        }
+#endif
    }
-  }
 }
+}
+
+}  // namespace Stockfish
@@ -1,6 +1,6 @@
 /*
  Stockfish, a UCI chess playing engine derived from Glaurung 2.1
-  Copyright (C) 2004-2021 The Stockfish developers (see AUTHORS file)
+  Copyright (C) 2004-2025 The Stockfish developers (see AUTHORS file)

  Stockfish is free software: you can redistribute it and/or modify
  it under the terms of the GNU General Public License as published by
@@ -19,26 +19,24 @@
 #ifndef BITBOARD_H_INCLUDED
 #define BITBOARD_H_INCLUDED

+#include <algorithm>
+#include <cassert>
+#include <cmath>
+#include <cstring>
+#include <cstdint>
+#include <cstdlib>
 #include <string>

 #include "types.h"

-namespace Bitbases {
-
-void init();
-bool probe(Square wksq, Square wpsq, Square bksq, Color us);
-
-}
+namespace Stockfish {

 namespace Bitboards {

-void init();
-const std::string pretty(Bitboard b);
+void        init();
+std::string pretty(Bitboard b);

-}
-
-constexpr Bitboard AllSquares = ~Bitboard(0);
-constexpr Bitboard DarkSquares = 0xAA55AA55AA55AA55ULL;
+}  // namespace Stockfish::Bitboards

 constexpr Bitboard FileABB = 0x0101010101010101ULL;
 constexpr Bitboard FileBBB = FileABB << 1;
@@ -58,376 +56,319 @@ constexpr Bitboard Rank6BB = Rank1BB << (8 * 5);
 constexpr Bitboard Rank7BB = Rank1BB << (8 * 6);
 constexpr Bitboard Rank8BB = Rank1BB << (8 * 7);

-constexpr Bitboard QueenSide   = FileABB | FileBBB | FileCBB | FileDBB;
-constexpr Bitboard CenterFiles = FileCBB | FileDBB | FileEBB | FileFBB;
-constexpr Bitboard KingSide    = FileEBB | FileFBB | FileGBB | FileHBB;
-constexpr Bitboard Center      = (FileDBB | FileEBB) & (Rank4BB | Rank5BB);
-
-constexpr Bitboard KingFlank[FILE_NB] = {
-  QueenSide ^ FileDBB, QueenSide, QueenSide,
-  CenterFiles, CenterFiles,
-  KingSide, KingSide, KingSide ^ FileEBB
-};
-
 extern uint8_t PopCnt16[1 << 16];
 extern uint8_t SquareDistance[SQUARE_NB][SQUARE_NB];

-extern Bitboard SquareBB[SQUARE_NB];
+extern Bitboard BetweenBB[SQUARE_NB][SQUARE_NB];
 extern Bitboard LineBB[SQUARE_NB][SQUARE_NB];
 extern Bitboard PseudoAttacks[PIECE_TYPE_NB][SQUARE_NB];
 extern Bitboard PawnAttacks[COLOR_NB][SQUARE_NB];


-/// Magic holds all magic bitboards relevant data for a single square
+// Magic holds all magic bitboards relevant data for a single square
 struct Magic {
-  Bitboard  mask;
-  Bitboard  magic;
-  Bitboard* attacks;
-  unsigned  shift;
+    Bitboard  mask;
+    Bitboard* attacks;
+#ifndef USE_PEXT
+    Bitboard magic;
+    unsigned shift;
+#endif

-  // Compute the attack's index using the 'magic bitboards' approach
-  unsigned index(Bitboard occupied) const {
+    // Compute the attack's index using the 'magic bitboards' approach
+    unsigned index(Bitboard occupied) const {

-    if (HasPext)
+#ifdef USE_PEXT
        return unsigned(pext(occupied, mask));
+#else
+        if (Is64Bit)
+            return unsigned(((occupied & mask) * magic) >> shift);

-    if (Is64Bit)
-        return unsigned(((occupied & mask) * magic) >> shift);
+        unsigned lo = unsigned(occupied) & unsigned(mask);
+        unsigned hi = unsigned(occupied >> 32) & unsigned(mask >> 32);
+        return (lo * unsigned(magic) ^ hi * unsigned(magic >> 32)) >> shift;
+#endif
+    }

-    unsigned lo = unsigned(occupied) & unsigned(mask);
-    unsigned hi = unsigned(occupied >> 32) & unsigned(mask >> 32);
-    return (lo * unsigned(magic) ^ hi * unsigned(magic >> 32)) >> shift;
-  }
+    Bitboard attacks_bb(Bitboard occupied) const { return attacks[index(occupied)]; }
 };

-extern Magic RookMagics[SQUARE_NB];
-extern Magic BishopMagics[SQUARE_NB];
+extern Magic Magics[SQUARE_NB][2];

-inline Bitboard square_bb(Square s) {
-  assert(is_ok(s));
-  return SquareBB[s];
+constexpr Bitboard square_bb(Square s) {
+    assert(is_ok(s));
+    return (1ULL << s);
 }


-/// Overloads of bitwise operators between a Bitboard and a Square for testing
-/// whether a given bit is set in a bitboard, and for setting and clearing bits.
+// Overloads of bitwise operators between a Bitboard and a Square for testing
+// whether a given bit is set in a bitboard, and for setting and clearing bits.

-inline Bitboard  operator&( Bitboard  b, Square s) { return b &  square_bb(s); }
-inline Bitboard  operator|( Bitboard  b, Square s) { return b |  square_bb(s); }
-inline Bitboard  operator^( Bitboard  b, Square s) { return b ^  square_bb(s); }
+inline Bitboard  operator&(Bitboard b, Square s) { return b & square_bb(s); }
+inline Bitboard  operator|(Bitboard b, Square s) { return b | square_bb(s); }
+inline Bitboard  operator^(Bitboard b, Square s) { return b ^ square_bb(s); }
 inline Bitboard& operator|=(Bitboard& b, Square s) { return b |= square_bb(s); }
 inline Bitboard& operator^=(Bitboard& b, Square s) { return b ^= square_bb(s); }

-inline Bitboard  operator&(Square s, Bitboard b) { return b & s; }
-inline Bitboard  operator|(Square s, Bitboard b) { return b | s; }
-inline Bitboard  operator^(Square s, Bitboard b) { return b ^ s; }
+inline Bitboard operator&(Square s, Bitboard b) { return b & s; }
+inline Bitboard operator|(Square s, Bitboard b) { return b | s; }
+inline Bitboard operator^(Square s, Bitboard b) { return b ^ s; }

-inline Bitboard  operator|(Square s1, Square s2) { return square_bb(s1) | s2; }
+inline Bitboard operator|(Square s1, Square s2) { return square_bb(s1) | s2; }

-constexpr bool more_than_one(Bitboard b) {
-  return b & (b - 1);
-}
+constexpr bool more_than_one(Bitboard b) { return b & (b - 1); }


-constexpr bool opposite_colors(Square s1, Square s2) {
-  return (s1 + rank_of(s1) + s2 + rank_of(s2)) & 1;
-}
+// rank_bb() and file_bb() return a bitboard representing all the squares on
+// the given file or rank.
+
+constexpr Bitboard rank_bb(Rank r) { return Rank1BB << (8 * r); }
+
+constexpr Bitboard rank_bb(Square s) { return rank_bb(rank_of(s)); }
+
+constexpr Bitboard file_bb(File f) { return FileABB << f; }
+
+constexpr Bitboard file_bb(Square s) { return file_bb(file_of(s)); }


-/// rank_bb() and file_bb() return a bitboard representing all the squares on
-/// the given file or rank.
-
-constexpr Bitboard rank_bb(Rank r) {
-  return Rank1BB << (8 * r);
-}
-
-constexpr Bitboard rank_bb(Square s) {
-  return rank_bb(rank_of(s));
-}
-
-constexpr Bitboard file_bb(File f) {
-  return FileABB << f;
-}
-
-constexpr Bitboard file_bb(Square s) {
-  return file_bb(file_of(s));
-}
-
-
-/// shift() moves a bitboard one or two steps as specified by the direction D
-
+// Moves a bitboard one or two steps as specified by the direction D
 template<Direction D>
 constexpr Bitboard shift(Bitboard b) {
-  return  D == NORTH      ?  b             << 8 : D == SOUTH      ?  b             >> 8
-        : D == NORTH+NORTH?  b             <<16 : D == SOUTH+SOUTH?  b             >>16
-        : D == EAST       ? (b & ~FileHBB) << 1 : D == WEST       ? (b & ~FileABB) >> 1
-        : D == NORTH_EAST ? (b & ~FileHBB) << 9 : D == NORTH_WEST ? (b & ~FileABB) << 7
-        : D == SOUTH_EAST ? (b & ~FileHBB) >> 7 : D == SOUTH_WEST ? (b & ~FileABB) >> 9
-        : 0;
+    return D == NORTH         ? b << 8
+         : D == SOUTH         ? b >> 8
+         : D == NORTH + NORTH ? b << 16
+         : D == SOUTH + SOUTH ? b >> 16
+         : D == EAST          ? (b & ~FileHBB) << 1
+         : D == WEST          ? (b & ~FileABB) >> 1
+         : D == NORTH_EAST    ? (b & ~FileHBB) << 9
+         : D == NORTH_WEST    ? (b & ~FileABB) << 7
+         : D == SOUTH_EAST    ? (b & ~FileHBB) >> 7
+         : D == SOUTH_WEST    ? (b & ~FileABB) >> 9
+                              : 0;
 }


-/// pawn_attacks_bb() returns the squares attacked by pawns of the given color
-/// from the squares in the given bitboard.
-
+// Returns the squares attacked by pawns of the given color
+// from the squares in the given bitboard.
 template<Color C>
 constexpr Bitboard pawn_attacks_bb(Bitboard b) {
-  return C == WHITE ? shift<NORTH_WEST>(b) | shift<NORTH_EAST>(b)
-                    : shift<SOUTH_WEST>(b) | shift<SOUTH_EAST>(b);
+    return C == WHITE ? shift<NORTH_WEST>(b) | shift<NORTH_EAST>(b)
+                      : shift<SOUTH_WEST>(b) | shift<SOUTH_EAST>(b);
 }

 inline Bitboard pawn_attacks_bb(Color c, Square s) {

-  assert(is_ok(s));
-  return PawnAttacks[c][s];
+    assert(is_ok(s));
+    return PawnAttacks[c][s];
 }

-
-/// pawn_double_attacks_bb() returns the squares doubly attacked by pawns of the
-/// given color from the squares in the given bitboard.
-
-template<Color C>
-constexpr Bitboard pawn_double_attacks_bb(Bitboard b) {
-  return C == WHITE ? shift<NORTH_WEST>(b) & shift<NORTH_EAST>(b)
-                    : shift<SOUTH_WEST>(b) & shift<SOUTH_EAST>(b);
-}
-
-
-/// adjacent_files_bb() returns a bitboard representing all the squares on the
-/// adjacent files of a given square.
-
-constexpr Bitboard adjacent_files_bb(Square s) {
-  return shift<EAST>(file_bb(s)) | shift<WEST>(file_bb(s));
-}
-
-
-/// line_bb() returns a bitboard representing an entire line (from board edge
-/// to board edge) that intersects the two given squares. If the given squares
-/// are not on a same file/rank/diagonal, the function returns 0. For instance,
-/// line_bb(SQ_C4, SQ_F7) will return a bitboard with the A2-G8 diagonal.
-
+// Returns a bitboard representing an entire line (from board edge
+// to board edge) that intersects the two given squares. If the given squares
+// are not on a same file/rank/diagonal, the function returns 0. For instance,
+// line_bb(SQ_C4, SQ_F7) will return a bitboard with the A2-G8 diagonal.
 inline Bitboard line_bb(Square s1, Square s2) {

-  assert(is_ok(s1) && is_ok(s2));
-  return LineBB[s1][s2];
+    assert(is_ok(s1) && is_ok(s2));
+    return LineBB[s1][s2];
 }


-/// between_bb() returns a bitboard representing squares that are linearly
-/// between the two given squares (excluding the given squares). If the given
-/// squares are not on a same file/rank/diagonal, we return 0. For instance,
-/// between_bb(SQ_C4, SQ_F7) will return a bitboard with squares D5 and E6.
-
+// Returns a bitboard representing the squares in the semi-open
+// segment between the squares s1 and s2 (excluding s1 but including s2). If the
+// given squares are not on a same file/rank/diagonal, it returns s2. For instance,
+// between_bb(SQ_C4, SQ_F7) will return a bitboard with squares D5, E6 and F7, but
+// between_bb(SQ_E6, SQ_F8) will return a bitboard with the square F8. This trick
+// allows to generate non-king evasion moves faster: the defending piece must either
+// interpose itself to cover the check or capture the checking piece.
 inline Bitboard between_bb(Square s1, Square s2) {
-  Bitboard b = line_bb(s1, s2) & ((AllSquares << s1) ^ (AllSquares << s2));
-  return b & (b - 1); //exclude lsb
+
+    assert(is_ok(s1) && is_ok(s2));
+    return BetweenBB[s1][s2];
 }

+// Returns true if the squares s1, s2 and s3 are aligned either on a
+// straight or on a diagonal line.
+inline bool aligned(Square s1, Square s2, Square s3) { return line_bb(s1, s2) & s3; }

-/// forward_ranks_bb() returns a bitboard representing the squares on the ranks
-/// in front of the given one, from the point of view of the given color. For instance,
-/// forward_ranks_bb(BLACK, SQ_D3) will return the 16 squares on ranks 1 and 2.

-constexpr Bitboard forward_ranks_bb(Color c, Square s) {
-  return c == WHITE ? ~Rank1BB << 8 * relative_rank(WHITE, s)
-                    : ~Rank8BB >> 8 * relative_rank(BLACK, s);
+// distance() functions return the distance between x and y, defined as the
+// number of steps for a king in x to reach y.
+
+template<typename T1 = Square>
+inline int distance(Square x, Square y);
+
+template<>
+inline int distance<File>(Square x, Square y) {
+    return std::abs(file_of(x) - file_of(y));
 }

-
-/// forward_file_bb() returns a bitboard representing all the squares along the
-/// line in front of the given one, from the point of view of the given color.
-
-constexpr Bitboard forward_file_bb(Color c, Square s) {
-  return forward_ranks_bb(c, s) & file_bb(s);
+template<>
+inline int distance<Rank>(Square x, Square y) {
+    return std::abs(rank_of(x) - rank_of(y));
 }

-
-/// pawn_attack_span() returns a bitboard representing all the squares that can
-/// be attacked by a pawn of the given color when it moves along its file, starting
-/// from the given square.
-
-constexpr Bitboard pawn_attack_span(Color c, Square s) {
-  return forward_ranks_bb(c, s) & adjacent_files_bb(s);
+template<>
+inline int distance<Square>(Square x, Square y) {
+    return SquareDistance[x][y];
 }

-
-/// passed_pawn_span() returns a bitboard which can be used to test if a pawn of
-/// the given color and on the given square is a passed pawn.
-
-constexpr Bitboard passed_pawn_span(Color c, Square s) {
-  return pawn_attack_span(c, s) | forward_file_bb(c, s);
-}
-
-
-/// aligned() returns true if the squares s1, s2 and s3 are aligned either on a
-/// straight or on a diagonal line.
-
-inline bool aligned(Square s1, Square s2, Square s3) {
-  return line_bb(s1, s2) & s3;
-}
-
-
-/// distance() functions return the distance between x and y, defined as the
-/// number of steps for a king in x to reach y.
-
-template<typename T1 = Square> inline int distance(Square x, Square y);
-template<> inline int distance<File>(Square x, Square y) { return std::abs(file_of(x) - file_of(y)); }
-template<> inline int distance<Rank>(Square x, Square y) { return std::abs(rank_of(x) - rank_of(y)); }
-template<> inline int distance<Square>(Square x, Square y) { return SquareDistance[x][y]; }
-
 inline int edge_distance(File f) { return std::min(f, File(FILE_H - f)); }
-inline int edge_distance(Rank r) { return std::min(r, Rank(RANK_8 - r)); }
-
-
-/// attacks_bb(Square) returns the pseudo attacks of the give piece type
-/// assuming an empty board.

+// Returns the pseudo attacks of the given piece type
+// assuming an empty board.
 template<PieceType Pt>
 inline Bitboard attacks_bb(Square s) {

-  assert((Pt != PAWN) && (is_ok(s)));
-
-  return PseudoAttacks[Pt][s];
+    assert((Pt != PAWN) && (is_ok(s)));
+    return PseudoAttacks[Pt][s];
 }


-/// attacks_bb(Square, Bitboard) returns the attacks by the given piece
-/// assuming the board is occupied according to the passed Bitboard.
-/// Sliding piece attacks do not continue passed an occupied square.
-
+// Returns the attacks by the given piece
+// assuming the board is occupied according to the passed Bitboard.
+// Sliding piece attacks do not continue passed an occupied square.
 template<PieceType Pt>
 inline Bitboard attacks_bb(Square s, Bitboard occupied) {

-  assert((Pt != PAWN) && (is_ok(s)));
+    assert((Pt != PAWN) && (is_ok(s)));

-  switch (Pt)
-  {
-  case BISHOP: return BishopMagics[s].attacks[BishopMagics[s].index(occupied)];
-  case ROOK  : return   RookMagics[s].attacks[  RookMagics[s].index(occupied)];
-  case QUEEN : return attacks_bb<BISHOP>(s, occupied) | attacks_bb<ROOK>(s, occupied);
-  default    : return PseudoAttacks[Pt][s];
-  }
+    switch (Pt)
+    {
+    case BISHOP :
+    case ROOK :
+        return Magics[s][Pt - BISHOP].attacks_bb(occupied);
+    case QUEEN :
+        return attacks_bb<BISHOP>(s, occupied) | attacks_bb<ROOK>(s, occupied);
+    default :
+        return PseudoAttacks[Pt][s];
+    }
 }

+// Returns the attacks by the given piece
+// assuming the board is occupied according to the passed Bitboard.
+// Sliding piece attacks do not continue passed an occupied square.
 inline Bitboard attacks_bb(PieceType pt, Square s, Bitboard occupied) {

-  assert((pt != PAWN) && (is_ok(s)));
+    assert((pt != PAWN) && (is_ok(s)));

-  switch (pt)
-  {
-  case BISHOP: return attacks_bb<BISHOP>(s, occupied);
-  case ROOK  : return attacks_bb<  ROOK>(s, occupied);
-  case QUEEN : return attacks_bb<BISHOP>(s, occupied) | attacks_bb<ROOK>(s, occupied);
-  default    : return PseudoAttacks[pt][s];
-  }
+    switch (pt)
+    {
+    case BISHOP :
+        return attacks_bb<BISHOP>(s, occupied);
+    case ROOK :
+        return attacks_bb<ROOK>(s, occupied);
+    case QUEEN :
+        return attacks_bb<BISHOP>(s, occupied) | attacks_bb<ROOK>(s, occupied);
+    default :
+        return PseudoAttacks[pt][s];
+    }
 }


-/// popcount() counts the number of non-zero bits in a bitboard
-
+// Counts the number of non-zero bits in a bitboard.
 inline int popcount(Bitboard b) {

 #ifndef USE_POPCNT

-  union { Bitboard bb; uint16_t u[4]; } v = { b };
-  return PopCnt16[v.u[0]] + PopCnt16[v.u[1]] + PopCnt16[v.u[2]] + PopCnt16[v.u[3]];
+    std::uint16_t indices[4];
+    std::memcpy(indices, &b, sizeof(b));
+    return PopCnt16[indices[0]] + PopCnt16[indices[1]] + PopCnt16[indices[2]]
+         + PopCnt16[indices[3]];

-#elif defined(_MSC_VER) || defined(__INTEL_COMPILER)
+#elif defined(_MSC_VER)

-  return (int)_mm_popcnt_u64(b);
+    return int(_mm_popcnt_u64(b));

-#else // Assumed gcc or compatible compiler
+#else  // Assumed gcc or compatible compiler

-  return __builtin_popcountll(b);
+    return __builtin_popcountll(b);

 #endif
 }

-
-/// lsb() and msb() return the least/most significant bit in a non-zero bitboard
-
-#if defined(__GNUC__)  // GCC, Clang, ICC
-
+// Returns the least significant bit in a non-zero bitboard.
 inline Square lsb(Bitboard b) {
-  assert(b);
-  return Square(__builtin_ctzll(b));
-}
+    assert(b);

-inline Square msb(Bitboard b) {
-  assert(b);
-  return Square(63 ^ __builtin_clzll(b));
-}
+#if defined(__GNUC__)  // GCC, Clang, ICX

-#elif defined(_MSC_VER)  // MSVC
+    return Square(__builtin_ctzll(b));

-#ifdef _WIN64  // MSVC, WIN64
+#elif defined(_MSC_VER)
+    #ifdef _WIN64  // MSVC, WIN64

-inline Square lsb(Bitboard b) {
-  assert(b);
-  unsigned long idx;
-  _BitScanForward64(&idx, b);
-  return (Square) idx;
-}
+    unsigned long idx;
+    _BitScanForward64(&idx, b);
+    return Square(idx);

-inline Square msb(Bitboard b) {
-  assert(b);
-  unsigned long idx;
-  _BitScanReverse64(&idx, b);
-  return (Square) idx;
-}
-
-#else  // MSVC, WIN32
-
-inline Square lsb(Bitboard b) {
-  assert(b);
-  unsigned long idx;
-
-  if (b & 0xffffffff) {
-      _BitScanForward(&idx, int32_t(b));
-      return Square(idx);
-  } else {
-      _BitScanForward(&idx, int32_t(b >> 32));
-      return Square(idx + 32);
-  }
-}
-
-inline Square msb(Bitboard b) {
-  assert(b);
-  unsigned long idx;
-
-  if (b >> 32) {
-      _BitScanReverse(&idx, int32_t(b >> 32));
-      return Square(idx + 32);
-  } else {
-      _BitScanReverse(&idx, int32_t(b));
-      return Square(idx);
-  }
-}
-
-#endif
+    #else  // MSVC, WIN32
+    unsigned long idx;

+    if (b & 0xffffffff)
+    {
+        _BitScanForward(&idx, int32_t(b));
+        return Square(idx);
+    }
+    else
+    {
+        _BitScanForward(&idx, int32_t(b >> 32));
+        return Square(idx + 32);
+    }
+    #endif
 #else  // Compiler is neither GCC nor MSVC compatible
-
-#error "Compiler not supported."
-
+    #error "Compiler not supported."
 #endif
-
-
-/// pop_lsb() finds and clears the least significant bit in a non-zero bitboard
-
-inline Square pop_lsb(Bitboard* b) {
-  assert(*b);
-  const Square s = lsb(*b);
-  *b &= *b - 1;
-  return s;
 }

+// Returns the most significant bit in a non-zero bitboard.
+inline Square msb(Bitboard b) {
+    assert(b);

-/// frontmost_sq() returns the most advanced square for the given color,
-/// requires a non-zero bitboard.
-inline Square frontmost_sq(Color c, Bitboard b) {
-  assert(b);
-  return c == WHITE ? msb(b) : lsb(b);
+#if defined(__GNUC__)  // GCC, Clang, ICX
+
+    return Square(63 ^ __builtin_clzll(b));
+
+#elif defined(_MSC_VER)
+    #ifdef _WIN64  // MSVC, WIN64
+
+    unsigned long idx;
+    _BitScanReverse64(&idx, b);
+    return Square(idx);
+
+    #else  // MSVC, WIN32
+
+    unsigned long idx;
+
+    if (b >> 32)
+    {
+        _BitScanReverse(&idx, int32_t(b >> 32));
+        return Square(idx + 32);
+    }
+    else
+    {
+        _BitScanReverse(&idx, int32_t(b));
+        return Square(idx);
+    }
+    #endif
+#else  // Compiler is neither GCC nor MSVC compatible
+    #error "Compiler not supported."
+#endif
 }

-#endif // #ifndef BITBOARD_H_INCLUDED
+// Returns the bitboard of the least significant
+// square of a non-zero bitboard. It is equivalent to square_bb(lsb(bb)).
+inline Bitboard least_significant_square_bb(Bitboard b) {
+    assert(b);
+    return b & -b;
+}
+
+// Finds and clears the least significant bit in a non-zero bitboard.
+inline Square pop_lsb(Bitboard& b) {
+    assert(b);
+    const Square s = lsb(b);
+    b &= b - 1;
+    return s;
+}
+
+}  // namespace Stockfish
+
+#endif  // #ifndef BITBOARD_H_INCLUDED
@@ -1,743 +0,0 @@
-/*
-  Stockfish, a UCI chess playing engine derived from Glaurung 2.1
-  Copyright (C) 2004-2021 The Stockfish developers (see AUTHORS file)
-
-  Stockfish is free software: you can redistribute it and/or modify
-  it under the terms of the GNU General Public License as published by
-  the Free Software Foundation, either version 3 of the License, or
-  (at your option) any later version.
-
-  Stockfish is distributed in the hope that it will be useful,
-  but WITHOUT ANY WARRANTY; without even the implied warranty of
-  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-  GNU General Public License for more details.
-
-  You should have received a copy of the GNU General Public License
-  along with this program.  If not, see <http://www.gnu.org/licenses/>.
-*/
-
-#include <cassert>
-
-#include "bitboard.h"
-#include "endgame.h"
-#include "movegen.h"
-
-namespace {
-
-  // Used to drive the king towards the edge of the board
-  // in KX vs K and KQ vs KR endgames.
-  // Values range from 27 (center squares) to 90 (in the corners)
-  inline int push_to_edge(Square s) {
-      int rd = edge_distance(rank_of(s)), fd = edge_distance(file_of(s));
-      return 90 - (7 * fd * fd / 2 + 7 * rd * rd / 2);
-  }
-
-  // Used to drive the king towards A1H8 corners in KBN vs K endgames.
-  // Values range from 0 on A8H1 diagonal to 7 in A1H8 corners
-  inline int push_to_corner(Square s) {
-      return abs(7 - rank_of(s) - file_of(s));
-  }
-
-  // Drive a piece close to or away from another piece
-  inline int push_close(Square s1, Square s2) { return 140 - 20 * distance(s1, s2); }
-  inline int push_away(Square s1, Square s2) { return 120 - push_close(s1, s2); }
-
-#ifndef NDEBUG
-  bool verify_material(const Position& pos, Color c, Value npm, int pawnsCnt) {
-    return pos.non_pawn_material(c) == npm && pos.count<PAWN>(c) == pawnsCnt;
-  }
-#endif
-
-  // Map the square as if strongSide is white and strongSide's only pawn
-  // is on the left half of the board.
-  Square normalize(const Position& pos, Color strongSide, Square sq) {
-
-    assert(pos.count<PAWN>(strongSide) == 1);
-
-    if (file_of(pos.square<PAWN>(strongSide)) >= FILE_E)
-        sq = flip_file(sq);
-
-    return strongSide == WHITE ? sq : flip_rank(sq);
-  }
-
-} // namespace
-
-
-namespace Endgames {
-
-  std::pair<Map<Value>, Map<ScaleFactor>> maps;
-
-  void init() {
-
-    add<KPK>("KPK");
-    add<KNNK>("KNNK");
-    add<KBNK>("KBNK");
-    add<KRKP>("KRKP");
-    add<KRKB>("KRKB");
-    add<KRKN>("KRKN");
-    add<KQKP>("KQKP");
-    add<KQKR>("KQKR");
-    add<KNNKP>("KNNKP");
-
-    add<KRPKR>("KRPKR");
-    add<KRPKB>("KRPKB");
-    add<KBPKB>("KBPKB");
-    add<KBPKN>("KBPKN");
-    add<KBPPKB>("KBPPKB");
-    add<KRPPKRP>("KRPPKRP");
-  }
-}
-
-
-/// Mate with KX vs K. This function is used to evaluate positions with
-/// king and plenty of material vs a lone king. It simply gives the
-/// attacking side a bonus for driving the defending king towards the edge
-/// of the board, and for keeping the distance between the two kings small.
-template<>
-Value Endgame<KXK>::operator()(const Position& pos) const {
-
-  assert(verify_material(pos, weakSide, VALUE_ZERO, 0));
-  assert(!pos.checkers()); // Eval is never called when in check
-
-  // Stalemate detection with lone king
-  if (pos.side_to_move() == weakSide && !MoveList<LEGAL>(pos).size())
-      return VALUE_DRAW;
-
-  Square strongKing = pos.square<KING>(strongSide);
-  Square weakKing   = pos.square<KING>(weakSide);
-
-  Value result =  pos.non_pawn_material(strongSide)
-                + pos.count<PAWN>(strongSide) * PawnValueEg
-                + push_to_edge(weakKing)
-                + push_close(strongKing, weakKing);
-
-  if (   pos.count<QUEEN>(strongSide)
-      || pos.count<ROOK>(strongSide)
-      ||(pos.count<BISHOP>(strongSide) && pos.count<KNIGHT>(strongSide))
-      || (   (pos.pieces(strongSide, BISHOP) & ~DarkSquares)
-          && (pos.pieces(strongSide, BISHOP) &  DarkSquares)))
-      result = std::min(result + VALUE_KNOWN_WIN, VALUE_TB_WIN_IN_MAX_PLY - 1);
-
-  return strongSide == pos.side_to_move() ? result : -result;
-}
-
-
-/// Mate with KBN vs K. This is similar to KX vs K, but we have to drive the
-/// defending king towards a corner square that our bishop attacks.
-template<>
-Value Endgame<KBNK>::operator()(const Position& pos) const {
-
-  assert(verify_material(pos, strongSide, KnightValueMg + BishopValueMg, 0));
-  assert(verify_material(pos, weakSide, VALUE_ZERO, 0));
-
-  Square strongKing   = pos.square<KING>(strongSide);
-  Square strongBishop = pos.square<BISHOP>(strongSide);
-  Square weakKing     = pos.square<KING>(weakSide);
-
-  // If our bishop does not attack A1/H8, we flip the enemy king square
-  // to drive to opposite corners (A8/H1).
-
-  Value result =  (VALUE_KNOWN_WIN + 3520)
-                + push_close(strongKing, weakKing)
-                + 420 * push_to_corner(opposite_colors(strongBishop, SQ_A1) ? flip_file(weakKing) : weakKing);
-
-  assert(abs(result) < VALUE_TB_WIN_IN_MAX_PLY);
-  return strongSide == pos.side_to_move() ? result : -result;
-}
-
-
-/// KP vs K. This endgame is evaluated with the help of a bitbase
-template<>
-Value Endgame<KPK>::operator()(const Position& pos) const {
-
-  assert(verify_material(pos, strongSide, VALUE_ZERO, 1));
-  assert(verify_material(pos, weakSide, VALUE_ZERO, 0));
-
-  // Assume strongSide is white and the pawn is on files A-D
-  Square strongKing = normalize(pos, strongSide, pos.square<KING>(strongSide));
-  Square strongPawn = normalize(pos, strongSide, pos.square<PAWN>(strongSide));
-  Square weakKing   = normalize(pos, strongSide, pos.square<KING>(weakSide));
-
-  Color us = strongSide == pos.side_to_move() ? WHITE : BLACK;
-
-  if (!Bitbases::probe(strongKing, strongPawn, weakKing, us))
-      return VALUE_DRAW;
-
-  Value result = VALUE_KNOWN_WIN + PawnValueEg + Value(rank_of(strongPawn));
-
-  return strongSide == pos.side_to_move() ? result : -result;
-}
-
-
-/// KR vs KP. This is a somewhat tricky endgame to evaluate precisely without
-/// a bitbase. The function below returns drawish scores when the pawn is
-/// far advanced with support of the king, while the attacking king is far
-/// away.
-template<>
-Value Endgame<KRKP>::operator()(const Position& pos) const {
-
-  assert(verify_material(pos, strongSide, RookValueMg, 0));
-  assert(verify_material(pos, weakSide, VALUE_ZERO, 1));
-
-  Square strongKing = pos.square<KING>(strongSide);
-  Square weakKing   = pos.square<KING>(weakSide);
-  Square strongRook = pos.square<ROOK>(strongSide);
-  Square weakPawn   = pos.square<PAWN>(weakSide);
-  Square queeningSquare = make_square(file_of(weakPawn), relative_rank(weakSide, RANK_8));
-  Value result;
-
-  // If the stronger side's king is in front of the pawn, it's a win
-  if (forward_file_bb(strongSide, strongKing) & weakPawn)
-      result = RookValueEg - distance(strongKing, weakPawn);
-
-  // If the weaker side's king is too far from the pawn and the rook,
-  // it's a win.
-  else if (   distance(weakKing, weakPawn) >= 3 + (pos.side_to_move() == weakSide)
-           && distance(weakKing, strongRook) >= 3)
-      result = RookValueEg - distance(strongKing, weakPawn);
-
-  // If the pawn is far advanced and supported by the defending king,
-  // the position is drawish
-  else if (   relative_rank(strongSide, weakKing) <= RANK_3
-           && distance(weakKing, weakPawn) == 1
-           && relative_rank(strongSide, strongKing) >= RANK_4
-           && distance(strongKing, weakPawn) > 2 + (pos.side_to_move() == strongSide))
-      result = Value(80) - 8 * distance(strongKing, weakPawn);
-
-  else
-      result =  Value(200) - 8 * (  distance(strongKing, weakPawn + pawn_push(weakSide))
-                                  - distance(weakKing, weakPawn + pawn_push(weakSide))
-                                  - distance(weakPawn, queeningSquare));
-
-  return strongSide == pos.side_to_move() ? result : -result;
-}
-
-
-/// KR vs KB. This is very simple, and always returns drawish scores. The
-/// score is slightly bigger when the defending king is close to the edge.
-template<>
-Value Endgame<KRKB>::operator()(const Position& pos) const {
-
-  assert(verify_material(pos, strongSide, RookValueMg, 0));
-  assert(verify_material(pos, weakSide, BishopValueMg, 0));
-
-  Value result = Value(push_to_edge(pos.square<KING>(weakSide)));
-  return strongSide == pos.side_to_move() ? result : -result;
-}
-
-
-/// KR vs KN. The attacking side has slightly better winning chances than
-/// in KR vs KB, particularly if the king and the knight are far apart.
-template<>
-Value Endgame<KRKN>::operator()(const Position& pos) const {
-
-  assert(verify_material(pos, strongSide, RookValueMg, 0));
-  assert(verify_material(pos, weakSide, KnightValueMg, 0));
-
-  Square weakKing   = pos.square<KING>(weakSide);
-  Square weakKnight = pos.square<KNIGHT>(weakSide);
-  Value result = Value(push_to_edge(weakKing) + push_away(weakKing, weakKnight));
-  return strongSide == pos.side_to_move() ? result : -result;
-}
-
-
-/// KQ vs KP. In general, this is a win for the stronger side, but there are a
-/// few important exceptions. A pawn on 7th rank and on the A,C,F or H files
-/// with a king positioned next to it can be a draw, so in that case, we only
-/// use the distance between the kings.
-template<>
-Value Endgame<KQKP>::operator()(const Position& pos) const {
-
-  assert(verify_material(pos, strongSide, QueenValueMg, 0));
-  assert(verify_material(pos, weakSide, VALUE_ZERO, 1));
-
-  Square strongKing = pos.square<KING>(strongSide);
-  Square weakKing   = pos.square<KING>(weakSide);
-  Square weakPawn   = pos.square<PAWN>(weakSide);
-
-  Value result = Value(push_close(strongKing, weakKing));
-
-  if (   relative_rank(weakSide, weakPawn) != RANK_7
-      || distance(weakKing, weakPawn) != 1
-      || ((FileBBB | FileDBB | FileEBB | FileGBB) & weakPawn))
-      result += QueenValueEg - PawnValueEg;
-
-  return strongSide == pos.side_to_move() ? result : -result;
-}
-
-
-/// KQ vs KR. This is almost identical to KX vs K: we give the attacking
-/// king a bonus for having the kings close together, and for forcing the
-/// defending king towards the edge. If we also take care to avoid null move for
-/// the defending side in the search, this is usually sufficient to win KQ vs KR.
-template<>
-Value Endgame<KQKR>::operator()(const Position& pos) const {
-
-  assert(verify_material(pos, strongSide, QueenValueMg, 0));
-  assert(verify_material(pos, weakSide, RookValueMg, 0));
-
-  Square strongKing = pos.square<KING>(strongSide);
-  Square weakKing   = pos.square<KING>(weakSide);
-
-  Value result =  QueenValueEg
-                - RookValueEg
-                + push_to_edge(weakKing)
-                + push_close(strongKing, weakKing);
-
-  return strongSide == pos.side_to_move() ? result : -result;
-}
-
-
-/// KNN vs KP. Very drawish, but there are some mate opportunities if we can
-/// press the weakSide King to a corner before the pawn advances too much.
-template<>
-Value Endgame<KNNKP>::operator()(const Position& pos) const {
-
-  assert(verify_material(pos, strongSide, 2 * KnightValueMg, 0));
-  assert(verify_material(pos, weakSide, VALUE_ZERO, 1));
-
-  Square weakKing = pos.square<KING>(weakSide);
-  Square weakPawn = pos.square<PAWN>(weakSide);
-
-  Value result =      PawnValueEg
-               +  2 * push_to_edge(weakKing)
-               - 10 * relative_rank(weakSide, weakPawn);
-
-  return strongSide == pos.side_to_move() ? result : -result;
-}
-
-
-/// Some cases of trivial draws
-template<> Value Endgame<KNNK>::operator()(const Position&) const { return VALUE_DRAW; }
-
-
-/// KB and one or more pawns vs K. It checks for draws with rook pawns and
-/// a bishop of the wrong color. If such a draw is detected, SCALE_FACTOR_DRAW
-/// is returned. If not, the return value is SCALE_FACTOR_NONE, i.e. no scaling
-/// will be used.
-template<>
-ScaleFactor Endgame<KBPsK>::operator()(const Position& pos) const {
-
-  assert(pos.non_pawn_material(strongSide) == BishopValueMg);
-  assert(pos.count<PAWN>(strongSide) >= 1);
-
-  // No assertions about the material of weakSide, because we want draws to
-  // be detected even when the weaker side has some pawns.
-
-  Bitboard strongPawns = pos.pieces(strongSide, PAWN);
-  Bitboard allPawns = pos.pieces(PAWN);
-
-  Square strongBishop = pos.square<BISHOP>(strongSide);
-  Square weakKing = pos.square<KING>(weakSide);
-  Square strongKing = pos.square<KING>(strongSide);
-
-  // All strongSide pawns are on a single rook file?
-  if (!(strongPawns & ~FileABB) || !(strongPawns & ~FileHBB))
-  {
-      Square queeningSquare = relative_square(strongSide, make_square(file_of(lsb(strongPawns)), RANK_8));
-
-      if (   opposite_colors(queeningSquare, strongBishop)
-          && distance(queeningSquare, weakKing) <= 1)
-          return SCALE_FACTOR_DRAW;
-  }
-
-  // If all the pawns are on the same B or G file, then it's potentially a draw
-  if ((!(allPawns & ~FileBBB) || !(allPawns & ~FileGBB))
-      && pos.non_pawn_material(weakSide) == 0
-      && pos.count<PAWN>(weakSide) >= 1)
-  {
-      // Get the least advanced weakSide pawn
-      Square weakPawn = frontmost_sq(strongSide, pos.pieces(weakSide, PAWN));
-
-      // There's potential for a draw if our pawn is blocked on the 7th rank,
-      // the bishop cannot attack it or they only have one pawn left.
-      if (   relative_rank(strongSide, weakPawn) == RANK_7
-          && (strongPawns & (weakPawn + pawn_push(weakSide)))
-          && (opposite_colors(strongBishop, weakPawn) || !more_than_one(strongPawns)))
-      {
-          int strongKingDist = distance(weakPawn, strongKing);
-          int weakKingDist = distance(weakPawn, weakKing);
-
-          // It's a draw if the weak king is on its back two ranks, within 2
-          // squares of the blocking pawn and the strong king is not
-          // closer. (I think this rule only fails in practically
-          // unreachable positions such as 5k1K/6p1/6P1/8/8/3B4/8/8 w
-          // and positions where qsearch will immediately correct the
-          // problem such as 8/4k1p1/6P1/1K6/3B4/8/8/8 w).
-          if (   relative_rank(strongSide, weakKing) >= RANK_7
-              && weakKingDist <= 2
-              && weakKingDist <= strongKingDist)
-              return SCALE_FACTOR_DRAW;
-      }
-  }
-
-  return SCALE_FACTOR_NONE;
-}
-
-
-/// KQ vs KR and one or more pawns. It tests for fortress draws with a rook on
-/// the third rank defended by a pawn.
-template<>
-ScaleFactor Endgame<KQKRPs>::operator()(const Position& pos) const {
-
-  assert(verify_material(pos, strongSide, QueenValueMg, 0));
-  assert(pos.count<ROOK>(weakSide) == 1);
-  assert(pos.count<PAWN>(weakSide) >= 1);
-
-  Square strongKing = pos.square<KING>(strongSide);
-  Square weakKing   = pos.square<KING>(weakSide);
-  Square weakRook   = pos.square<ROOK>(weakSide);
-
-  if (    relative_rank(weakSide,   weakKing) <= RANK_2
-      &&  relative_rank(weakSide, strongKing) >= RANK_4
-      &&  relative_rank(weakSide,   weakRook) == RANK_3
-      && (  pos.pieces(weakSide, PAWN)
-          & attacks_bb<KING>(weakKing)
-          & pawn_attacks_bb(strongSide, weakRook)))
-          return SCALE_FACTOR_DRAW;
-
-  return SCALE_FACTOR_NONE;
-}
-
-
-/// KRP vs KR. This function knows a handful of the most important classes of
-/// drawn positions, but is far from perfect. It would probably be a good idea
-/// to add more knowledge in the future.
-///
-/// It would also be nice to rewrite the actual code for this function,
-/// which is mostly copied from Glaurung 1.x, and isn't very pretty.
-template<>
-ScaleFactor Endgame<KRPKR>::operator()(const Position& pos) const {
-
-  assert(verify_material(pos, strongSide, RookValueMg, 1));
-  assert(verify_material(pos, weakSide,   RookValueMg, 0));
-
-  // Assume strongSide is white and the pawn is on files A-D
-  Square strongKing = normalize(pos, strongSide, pos.square<KING>(strongSide));
-  Square strongRook = normalize(pos, strongSide, pos.square<ROOK>(strongSide));
-  Square strongPawn = normalize(pos, strongSide, pos.square<PAWN>(strongSide));
-  Square weakKing = normalize(pos, strongSide, pos.square<KING>(weakSide));
-  Square weakRook = normalize(pos, strongSide, pos.square<ROOK>(weakSide));
-
-  File pawnFile = file_of(strongPawn);
-  Rank pawnRank = rank_of(strongPawn);
-  Square queeningSquare = make_square(pawnFile, RANK_8);
-  int tempo = (pos.side_to_move() == strongSide);
-
-  // If the pawn is not too far advanced and the defending king defends the
-  // queening square, use the third-rank defence.
-  if (   pawnRank <= RANK_5
-      && distance(weakKing, queeningSquare) <= 1
-      && strongKing <= SQ_H5
-      && (rank_of(weakRook) == RANK_6 || (pawnRank <= RANK_3 && rank_of(strongRook) != RANK_6)))
-      return SCALE_FACTOR_DRAW;
-
-  // The defending side saves a draw by checking from behind in case the pawn
-  // has advanced to the 6th rank with the king behind.
-  if (   pawnRank == RANK_6
-      && distance(weakKing, queeningSquare) <= 1
-      && rank_of(strongKing) + tempo <= RANK_6
-      && (rank_of(weakRook) == RANK_1 || (!tempo && distance<File>(weakRook, strongPawn) >= 3)))
-      return SCALE_FACTOR_DRAW;
-
-  if (   pawnRank >= RANK_6
-      && weakKing == queeningSquare
-      && rank_of(weakRook) == RANK_1
-      && (!tempo || distance(strongKing, strongPawn) >= 2))
-      return SCALE_FACTOR_DRAW;
-
-  // White pawn on a7 and rook on a8 is a draw if black's king is on g7 or h7
-  // and the black rook is behind the pawn.
-  if (   strongPawn == SQ_A7
-      && strongRook == SQ_A8
-      && (weakKing == SQ_H7 || weakKing == SQ_G7)
-      && file_of(weakRook) == FILE_A
-      && (rank_of(weakRook) <= RANK_3 || file_of(strongKing) >= FILE_D || rank_of(strongKing) <= RANK_5))
-      return SCALE_FACTOR_DRAW;
-
-  // If the defending king blocks the pawn and the attacking king is too far
-  // away, it's a draw.
-  if (   pawnRank <= RANK_5
-      && weakKing == strongPawn + NORTH
-      && distance(strongKing, strongPawn) - tempo >= 2
-      && distance(strongKing, weakRook) - tempo >= 2)
-      return SCALE_FACTOR_DRAW;
-
-  // Pawn on the 7th rank supported by the rook from behind usually wins if the
-  // attacking king is closer to the queening square than the defending king,
-  // and the defending king cannot gain tempi by threatening the attacking rook.
-  if (   pawnRank == RANK_7
-      && pawnFile != FILE_A
-      && file_of(strongRook) == pawnFile
-      && strongRook != queeningSquare
-      && (distance(strongKing, queeningSquare) < distance(weakKing, queeningSquare) - 2 + tempo)
-      && (distance(strongKing, queeningSquare) < distance(weakKing, strongRook) + tempo))
-      return ScaleFactor(SCALE_FACTOR_MAX - 2 * distance(strongKing, queeningSquare));
-
-  // Similar to the above, but with the pawn further back
-  if (   pawnFile != FILE_A
-      && file_of(strongRook) == pawnFile
-      && strongRook < strongPawn
-      && (distance(strongKing, queeningSquare) < distance(weakKing, queeningSquare) - 2 + tempo)
-      && (distance(strongKing, strongPawn + NORTH) < distance(weakKing, strongPawn + NORTH) - 2 + tempo)
-      && (  distance(weakKing, strongRook) + tempo >= 3
-          || (    distance(strongKing, queeningSquare) < distance(weakKing, strongRook) + tempo
-              && (distance(strongKing, strongPawn + NORTH) < distance(weakKing, strongPawn) + tempo))))
-      return ScaleFactor(  SCALE_FACTOR_MAX
-                         - 8 * distance(strongPawn, queeningSquare)
-                         - 2 * distance(strongKing, queeningSquare));
-
-  // If the pawn is not far advanced and the defending king is somewhere in
-  // the pawn's path, it's probably a draw.
-  if (pawnRank <= RANK_4 && weakKing > strongPawn)
-  {
-      if (file_of(weakKing) == file_of(strongPawn))
-          return ScaleFactor(10);
-      if (   distance<File>(weakKing, strongPawn) == 1
-          && distance(strongKing, weakKing) > 2)
-          return ScaleFactor(24 - 2 * distance(strongKing, weakKing));
-  }
-  return SCALE_FACTOR_NONE;
-}
-
-template<>
-ScaleFactor Endgame<KRPKB>::operator()(const Position& pos) const {
-
-  assert(verify_material(pos, strongSide, RookValueMg, 1));
-  assert(verify_material(pos, weakSide, BishopValueMg, 0));
-
-  // Test for a rook pawn
-  if (pos.pieces(PAWN) & (FileABB | FileHBB))
-  {
-      Square weakKing = pos.square<KING>(weakSide);
-      Square weakBishop = pos.square<BISHOP>(weakSide);
-      Square strongKing = pos.square<KING>(strongSide);
-      Square strongPawn = pos.square<PAWN>(strongSide);
-      Rank pawnRank = relative_rank(strongSide, strongPawn);
-      Direction push = pawn_push(strongSide);
-
-      // If the pawn is on the 5th rank and the pawn (currently) is on
-      // the same color square as the bishop then there is a chance of
-      // a fortress. Depending on the king position give a moderate
-      // reduction or a stronger one if the defending king is near the
-      // corner but not trapped there.
-      if (pawnRank == RANK_5 && !opposite_colors(weakBishop, strongPawn))
-      {
-          int d = distance(strongPawn + 3 * push, weakKing);
-
-          if (d <= 2 && !(d == 0 && weakKing == strongKing + 2 * push))
-              return ScaleFactor(24);
-          else
-              return ScaleFactor(48);
-      }
-
-      // When the pawn has moved to the 6th rank we can be fairly sure
-      // it's drawn if the bishop attacks the square in front of the
-      // pawn from a reasonable distance and the defending king is near
-      // the corner
-      if (   pawnRank == RANK_6
-          && distance(strongPawn + 2 * push, weakKing) <= 1
-          && (attacks_bb<BISHOP>(weakBishop) & (strongPawn + push))
-          && distance<File>(weakBishop, strongPawn) >= 2)
-          return ScaleFactor(8);
-  }
-
-  return SCALE_FACTOR_NONE;
-}
-
-/// KRPP vs KRP. There is just a single rule: if the stronger side has no passed
-/// pawns and the defending king is actively placed, the position is drawish.
-template<>
-ScaleFactor Endgame<KRPPKRP>::operator()(const Position& pos) const {
-
-  assert(verify_material(pos, strongSide, RookValueMg, 2));
-  assert(verify_material(pos, weakSide,   RookValueMg, 1));
-
-  Square strongPawn1 = lsb(pos.pieces(strongSide, PAWN));
-  Square strongPawn2 = msb(pos.pieces(strongSide, PAWN));
-  Square weakKing = pos.square<KING>(weakSide);
-
-  // Does the stronger side have a passed pawn?
-  if (pos.pawn_passed(strongSide, strongPawn1) || pos.pawn_passed(strongSide, strongPawn2))
-      return SCALE_FACTOR_NONE;
-
-  Rank pawnRank = std::max(relative_rank(strongSide, strongPawn1), relative_rank(strongSide, strongPawn2));
-
-  if (   distance<File>(weakKing, strongPawn1) <= 1
-      && distance<File>(weakKing, strongPawn2) <= 1
-      && relative_rank(strongSide, weakKing) > pawnRank)
-  {
-      assert(pawnRank > RANK_1 && pawnRank < RANK_7);
-      return ScaleFactor(7 * pawnRank);
-  }
-  return SCALE_FACTOR_NONE;
-}
-
-
-/// K and two or more pawns vs K. There is just a single rule here: if all pawns
-/// are on the same rook file and are blocked by the defending king, it's a draw.
-template<>
-ScaleFactor Endgame<KPsK>::operator()(const Position& pos) const {
-
-  assert(pos.non_pawn_material(strongSide) == VALUE_ZERO);
-  assert(pos.count<PAWN>(strongSide) >= 2);
-  assert(verify_material(pos, weakSide, VALUE_ZERO, 0));
-
-  Square weakKing = pos.square<KING>(weakSide);
-  Bitboard strongPawns = pos.pieces(strongSide, PAWN);
-
-  // If all pawns are ahead of the king on a single rook file, it's a draw.
-  if (   !(strongPawns & ~(FileABB | FileHBB))
-      && !(strongPawns & ~passed_pawn_span(weakSide, weakKing)))
-      return SCALE_FACTOR_DRAW;
-
-  return SCALE_FACTOR_NONE;
-}
-
-
-/// KBP vs KB. There are two rules: if the defending king is somewhere along the
-/// path of the pawn, and the square of the king is not of the same color as the
-/// stronger side's bishop, it's a draw. If the two bishops have opposite color,
-/// it's almost always a draw.
-template<>
-ScaleFactor Endgame<KBPKB>::operator()(const Position& pos) const {
-
-  assert(verify_material(pos, strongSide, BishopValueMg, 1));
-  assert(verify_material(pos, weakSide,   BishopValueMg, 0));
-
-  Square strongPawn = pos.square<PAWN>(strongSide);
-  Square strongBishop = pos.square<BISHOP>(strongSide);
-  Square weakBishop = pos.square<BISHOP>(weakSide);
-  Square weakKing = pos.square<KING>(weakSide);
-
-  // Case 1: Defending king blocks the pawn, and cannot be driven away
-  if (   (forward_file_bb(strongSide, strongPawn) & weakKing)
-      && (   opposite_colors(weakKing, strongBishop)
-          || relative_rank(strongSide, weakKing) <= RANK_6))
-      return SCALE_FACTOR_DRAW;
-
-  // Case 2: Opposite colored bishops
-  if (opposite_colors(strongBishop, weakBishop))
-      return SCALE_FACTOR_DRAW;
-
-  return SCALE_FACTOR_NONE;
-}
-
-
-/// KBPP vs KB. It detects a few basic draws with opposite-colored bishops
-template<>
-ScaleFactor Endgame<KBPPKB>::operator()(const Position& pos) const {
-
-  assert(verify_material(pos, strongSide, BishopValueMg, 2));
-  assert(verify_material(pos, weakSide,   BishopValueMg, 0));
-
-  Square strongBishop = pos.square<BISHOP>(strongSide);
-  Square weakBishop   = pos.square<BISHOP>(weakSide);
-
-  if (!opposite_colors(strongBishop, weakBishop))
-      return SCALE_FACTOR_NONE;
-
-  Square weakKing = pos.square<KING>(weakSide);
-  Square strongPawn1 = lsb(pos.pieces(strongSide, PAWN));
-  Square strongPawn2 = msb(pos.pieces(strongSide, PAWN));
-  Square blockSq1, blockSq2;
-
-  if (relative_rank(strongSide, strongPawn1) > relative_rank(strongSide, strongPawn2))
-  {
-      blockSq1 = strongPawn1 + pawn_push(strongSide);
-      blockSq2 = make_square(file_of(strongPawn2), rank_of(strongPawn1));
-  }
-  else
-  {
-      blockSq1 = strongPawn2 + pawn_push(strongSide);
-      blockSq2 = make_square(file_of(strongPawn1), rank_of(strongPawn2));
-  }
-
-  switch (distance<File>(strongPawn1, strongPawn2))
-  {
-  case 0:
-    // Both pawns are on the same file. It's an easy draw if the defender firmly
-    // controls some square in the frontmost pawn's path.
-    if (   file_of(weakKing) == file_of(blockSq1)
-        && relative_rank(strongSide, weakKing) >= relative_rank(strongSide, blockSq1)
-        && opposite_colors(weakKing, strongBishop))
-        return SCALE_FACTOR_DRAW;
-    else
-        return SCALE_FACTOR_NONE;
-
-  case 1:
-    // Pawns on adjacent files. It's a draw if the defender firmly controls the
-    // square in front of the frontmost pawn's path, and the square diagonally
-    // behind this square on the file of the other pawn.
-    if (   weakKing == blockSq1
-        && opposite_colors(weakKing, strongBishop)
-        && (   weakBishop == blockSq2
-            || (attacks_bb<BISHOP>(blockSq2, pos.pieces()) & pos.pieces(weakSide, BISHOP))
-            || distance<Rank>(strongPawn1, strongPawn2) >= 2))
-        return SCALE_FACTOR_DRAW;
-
-    else if (   weakKing == blockSq2
-             && opposite_colors(weakKing, strongBishop)
-             && (   weakBishop == blockSq1
-                 || (attacks_bb<BISHOP>(blockSq1, pos.pieces()) & pos.pieces(weakSide, BISHOP))))
-        return SCALE_FACTOR_DRAW;
-    else
-        return SCALE_FACTOR_NONE;
-
-  default:
-    // The pawns are not on the same file or adjacent files. No scaling.
-    return SCALE_FACTOR_NONE;
-  }
-}
-
-
-/// KBP vs KN. There is a single rule: if the defending king is somewhere along
-/// the path of the pawn, and the square of the king is not of the same color as
-/// the stronger side's bishop, it's a draw.
-template<>
-ScaleFactor Endgame<KBPKN>::operator()(const Position& pos) const {
-
-  assert(verify_material(pos, strongSide, BishopValueMg, 1));
-  assert(verify_material(pos, weakSide, KnightValueMg, 0));
-
-  Square strongPawn = pos.square<PAWN>(strongSide);
-  Square strongBishop = pos.square<BISHOP>(strongSide);
-  Square weakKing = pos.square<KING>(weakSide);
-
-  if (   file_of(weakKing) == file_of(strongPawn)
-      && relative_rank(strongSide, strongPawn) < relative_rank(strongSide, weakKing)
-      && (   opposite_colors(weakKing, strongBishop)
-          || relative_rank(strongSide, weakKing) <= RANK_6))
-      return SCALE_FACTOR_DRAW;
-
-  return SCALE_FACTOR_NONE;
-}
-
-
-/// KP vs KP. This is done by removing the weakest side's pawn and probing the
-/// KP vs K bitbase: if the weakest side has a draw without the pawn, it probably
-/// has at least a draw with the pawn as well. The exception is when the stronger
-/// side's pawn is far advanced and not on a rook file; in this case it is often
-/// possible to win (e.g. 8/4k3/3p4/3P4/6K1/8/8/8 w - - 0 1).
-template<>
-ScaleFactor Endgame<KPKP>::operator()(const Position& pos) const {
-
-  assert(verify_material(pos, strongSide, VALUE_ZERO, 1));
-  assert(verify_material(pos, weakSide,   VALUE_ZERO, 1));
-
-  // Assume strongSide is white and the pawn is on files A-D
-  Square strongKing = normalize(pos, strongSide, pos.square<KING>(strongSide));
-  Square weakKing   = normalize(pos, strongSide, pos.square<KING>(weakSide));
-  Square strongPawn = normalize(pos, strongSide, pos.square<PAWN>(strongSide));
-
-  Color us = strongSide == pos.side_to_move() ? WHITE : BLACK;
-
-  // If the pawn has advanced to the fifth rank or further, and is not a
-  // rook pawn, it's too dangerous to assume that it's at least a draw.
-  if (rank_of(strongPawn) >= RANK_5 && file_of(strongPawn) != FILE_A)
-      return SCALE_FACTOR_NONE;
-
-  // Probe the KPK bitbase with the weakest side's pawn removed. If it's a draw,
-  // it's probably at least a draw even with the pawn.
-  return Bitbases::probe(strongKing, strongPawn, weakKing, us) ? SCALE_FACTOR_NONE : SCALE_FACTOR_DRAW;
-}
@@ -1,123 +0,0 @@
-/*
-  Stockfish, a UCI chess playing engine derived from Glaurung 2.1
-  Copyright (C) 2004-2021 The Stockfish developers (see AUTHORS file)
-
-  Stockfish is free software: you can redistribute it and/or modify
-  it under the terms of the GNU General Public License as published by
-  the Free Software Foundation, either version 3 of the License, or
-  (at your option) any later version.
-
-  Stockfish is distributed in the hope that it will be useful,
-  but WITHOUT ANY WARRANTY; without even the implied warranty of
-  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-  GNU General Public License for more details.
-
-  You should have received a copy of the GNU General Public License
-  along with this program.  If not, see <http://www.gnu.org/licenses/>.
-*/
-
-#ifndef ENDGAME_H_INCLUDED
-#define ENDGAME_H_INCLUDED
-
-#include <memory>
-#include <string>
-#include <type_traits>
-#include <unordered_map>
-#include <utility>
-
-#include "position.h"
-#include "types.h"
-
-
-/// EndgameCode lists all supported endgame functions by corresponding codes
-
-enum EndgameCode {
-
-  EVALUATION_FUNCTIONS,
-  KNNK,  // KNN vs K
-  KNNKP, // KNN vs KP
-  KXK,   // Generic "mate lone king" eval
-  KBNK,  // KBN vs K
-  KPK,   // KP vs K
-  KRKP,  // KR vs KP
-  KRKB,  // KR vs KB
-  KRKN,  // KR vs KN
-  KQKP,  // KQ vs KP
-  KQKR,  // KQ vs KR
-
-  SCALING_FUNCTIONS,
-  KBPsK,   // KB and pawns vs K
-  KQKRPs,  // KQ vs KR and pawns
-  KRPKR,   // KRP vs KR
-  KRPKB,   // KRP vs KB
-  KRPPKRP, // KRPP vs KRP
-  KPsK,    // K and pawns vs K
-  KBPKB,   // KBP vs KB
-  KBPPKB,  // KBPP vs KB
-  KBPKN,   // KBP vs KN
-  KPKP     // KP vs KP
-};
-
-
-/// Endgame functions can be of two types depending on whether they return a
-/// Value or a ScaleFactor.
-
-template<EndgameCode E> using
-eg_type = typename std::conditional<(E < SCALING_FUNCTIONS), Value, ScaleFactor>::type;
-
-
-/// Base and derived functors for endgame evaluation and scaling functions
-
-template<typename T>
-struct EndgameBase {
-
-  explicit EndgameBase(Color c) : strongSide(c), weakSide(~c) {}
-  virtual ~EndgameBase() = default;
-  virtual T operator()(const Position&) const = 0;
-
-  const Color strongSide, weakSide;
-};
-
-
-template<EndgameCode E, typename T = eg_type<E>>
-struct Endgame : public EndgameBase<T> {
-
-  explicit Endgame(Color c) : EndgameBase<T>(c) {}
-  T operator()(const Position&) const override;
-};
-
-
-/// The Endgames namespace handles the pointers to endgame evaluation and scaling
-/// base objects in two std::map. We use polymorphism to invoke the actual
-/// endgame function by calling its virtual operator().
-
-namespace Endgames {
-
-  template<typename T> using Ptr = std::unique_ptr<EndgameBase<T>>;
-  template<typename T> using Map = std::unordered_map<Key, Ptr<T>>;
-
-  extern std::pair<Map<Value>, Map<ScaleFactor>> maps;
-
-  void init();
-
-  template<typename T>
-  Map<T>& map() {
-    return std::get<std::is_same<T, ScaleFactor>::value>(maps);
-  }
-
-  template<EndgameCode E, typename T = eg_type<E>>
-  void add(const std::string& code) {
-
-    StateInfo st;
-    map<T>()[Position().set(code, WHITE, &st).material_key()] = Ptr<T>(new Endgame<E>(WHITE));
-    map<T>()[Position().set(code, BLACK, &st).material_key()] = Ptr<T>(new Endgame<E>(BLACK));
-  }
-
-  template<typename T>
-  const EndgameBase<T>* probe(Key key) {
-    auto it = map<T>().find(key);
-    return it != map<T>().end() ? it->second.get() : nullptr;
-  }
-}
-
-#endif // #ifndef ENDGAME_H_INCLUDED
@@ -0,0 +1,369 @@
+/*
+  Stockfish, a UCI chess playing engine derived from Glaurung 2.1
+  Copyright (C) 2004-2025 The Stockfish developers (see AUTHORS file)
+
+  Stockfish is free software: you can redistribute it and/or modify
+  it under the terms of the GNU General Public License as published by
+  the Free Software Foundation, either version 3 of the License, or
+  (at your option) any later version.
+
+  Stockfish is distributed in the hope that it will be useful,
+  but WITHOUT ANY WARRANTY; without even the implied warranty of
+  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+  GNU General Public License for more details.
+
+  You should have received a copy of the GNU General Public License
+  along with this program.  If not, see <http://www.gnu.org/licenses/>.
+*/
+
+#include "engine.h"
+
+#include <cassert>
+#include <deque>
+#include <iosfwd>
+#include <memory>
+#include <ostream>
+#include <sstream>
+#include <string_view>
+#include <utility>
+#include <vector>
+
+#include "evaluate.h"
+#include "misc.h"
+#include "nnue/network.h"
+#include "nnue/nnue_common.h"
+#include "perft.h"
+#include "position.h"
+#include "search.h"
+#include "syzygy/tbprobe.h"
+#include "types.h"
+#include "uci.h"
+#include "ucioption.h"
+
+namespace Stockfish {
+
+namespace NN = Eval::NNUE;
+
+constexpr auto StartFEN  = "rnbqkbnr/pppppppp/8/8/8/8/PPPPPPPP/RNBQKBNR w KQkq - 0 1";
+constexpr int  MaxHashMB = Is64Bit ? 33554432 : 2048;
+
+Engine::Engine(std::optional<std::string> path) :
+    binaryDirectory(path ? CommandLine::get_binary_directory(*path) : ""),
+    numaContext(NumaConfig::from_system()),
+    states(new std::deque<StateInfo>(1)),
+    threads(),
+    networks(
+      numaContext,
+      NN::Networks(
+        NN::NetworkBig({EvalFileDefaultNameBig, "None", ""}, NN::EmbeddedNNUEType::BIG),
+        NN::NetworkSmall({EvalFileDefaultNameSmall, "None", ""}, NN::EmbeddedNNUEType::SMALL))) {
+    pos.set(StartFEN, false, &states->back());
+
+
+    options.add(  //
+      "Debug Log File", Option("", [](const Option& o) {
+          start_logger(o);
+          return std::nullopt;
+      }));
+
+    options.add(  //
+      "NumaPolicy", Option("auto", [this](const Option& o) {
+          set_numa_config_from_option(o);
+          return numa_config_information_as_string() + "\n"
+               + thread_allocation_information_as_string();
+      }));
+
+    options.add(  //
+      "Threads", Option(1, 1, 1024, [this](const Option&) {
+          resize_threads();
+          return thread_allocation_information_as_string();
+      }));
+
+    options.add(  //
+      "Hash", Option(16, 1, MaxHashMB, [this](const Option& o) {
+          set_tt_size(o);
+          return std::nullopt;
+      }));
+
+    options.add(  //
+      "Clear Hash", Option([this](const Option&) {
+          search_clear();
+          return std::nullopt;
+      }));
+
+    options.add(  //
+      "Ponder", Option(false));
+
+    options.add(  //
+      "MultiPV", Option(1, 1, MAX_MOVES));
+
+    options.add("Skill Level", Option(20, 0, 20));
+
+    options.add("Move Overhead", Option(10, 0, 5000));
+
+    options.add("nodestime", Option(0, 0, 10000));
+
+    options.add("UCI_Chess960", Option(false));
+
+    options.add("UCI_LimitStrength", Option(false));
+
+    options.add("UCI_Elo",
+                Option(Stockfish::Search::Skill::LowestElo, Stockfish::Search::Skill::LowestElo,
+                       Stockfish::Search::Skill::HighestElo));
+
+    options.add("UCI_ShowWDL", Option(false));
+
+    options.add(  //
+      "SyzygyPath", Option("", [](const Option& o) {
+          Tablebases::init(o);
+          return std::nullopt;
+      }));
+
+    options.add("SyzygyProbeDepth", Option(1, 1, 100));
+
+    options.add("Syzygy50MoveRule", Option(true));
+
+    options.add("SyzygyProbeLimit", Option(7, 0, 7));
+
+    options.add(  //
+      "EvalFile", Option(EvalFileDefaultNameBig, [this](const Option& o) {
+          load_big_network(o);
+          return std::nullopt;
+      }));
+
+    options.add(  //
+      "EvalFileSmall", Option(EvalFileDefaultNameSmall, [this](const Option& o) {
+          load_small_network(o);
+          return std::nullopt;
+      }));
+
+    load_networks();
+    resize_threads();
+}
+
+std::uint64_t Engine::perft(const std::string& fen, Depth depth, bool isChess960) {
+    verify_networks();
+
+    return Benchmark::perft(fen, depth, isChess960);
+}
+
+void Engine::go(Search::LimitsType& limits) {
+    assert(limits.perft == 0);
+    verify_networks();
+
+    threads.start_thinking(options, pos, states, limits);
+}
+void Engine::stop() { threads.stop = true; }
+
+void Engine::search_clear() {
+    wait_for_search_finished();
+
+    tt.clear(threads);
+    threads.clear();
+
+    // @TODO wont work with multiple instances
+    Tablebases::init(options["SyzygyPath"]);  // Free mapped files
+}
+
+void Engine::set_on_update_no_moves(std::function<void(const Engine::InfoShort&)>&& f) {
+    updateContext.onUpdateNoMoves = std::move(f);
+}
+
+void Engine::set_on_update_full(std::function<void(const Engine::InfoFull&)>&& f) {
+    updateContext.onUpdateFull = std::move(f);
+}
+
+void Engine::set_on_iter(std::function<void(const Engine::InfoIter&)>&& f) {
+    updateContext.onIter = std::move(f);
+}
+
+void Engine::set_on_bestmove(std::function<void(std::string_view, std::string_view)>&& f) {
+    updateContext.onBestmove = std::move(f);
+}
+
+void Engine::set_on_verify_networks(std::function<void(std::string_view)>&& f) {
+    onVerifyNetworks = std::move(f);
+}
+
+void Engine::wait_for_search_finished() { threads.main_thread()->wait_for_search_finished(); }
+
+void Engine::set_position(const std::string& fen, const std::vector<std::string>& moves) {
+    // Drop the old state and create a new one
+    states = StateListPtr(new std::deque<StateInfo>(1));
+    pos.set(fen, options["UCI_Chess960"], &states->back());
+
+    for (const auto& move : moves)
+    {
+        auto m = UCIEngine::to_move(pos, move);
+
+        if (m == Move::none())
+            break;
+
+        states->emplace_back();
+        pos.do_move(m, states->back());
+    }
+}
+
+// modifiers
+
+void Engine::set_numa_config_from_option(const std::string& o) {
+    if (o == "auto" || o == "system")
+    {
+        numaContext.set_numa_config(NumaConfig::from_system());
+    }
+    else if (o == "hardware")
+    {
+        // Don't respect affinity set in the system.
+        numaContext.set_numa_config(NumaConfig::from_system(false));
+    }
+    else if (o == "none")
+    {
+        numaContext.set_numa_config(NumaConfig{});
+    }
+    else
+    {
+        numaContext.set_numa_config(NumaConfig::from_string(o));
+    }
+
+    // Force reallocation of threads in case affinities need to change.
+    resize_threads();
+    threads.ensure_network_replicated();
+}
+
+void Engine::resize_threads() {
+    threads.wait_for_search_finished();
+    threads.set(numaContext.get_numa_config(), {options, threads, tt, networks}, updateContext);
+
+    // Reallocate the hash with the new threadpool size
+    set_tt_size(options["Hash"]);
+    threads.ensure_network_replicated();
+}
+
+void Engine::set_tt_size(size_t mb) {
+    wait_for_search_finished();
+    tt.resize(mb, threads);
+}
+
+void Engine::set_ponderhit(bool b) { threads.main_manager()->ponder = b; }
+
+// network related
+
+void Engine::verify_networks() const {
+    networks->big.verify(options["EvalFile"], onVerifyNetworks);
+    networks->small.verify(options["EvalFileSmall"], onVerifyNetworks);
+}
+
+void Engine::load_networks() {
+    networks.modify_and_replicate([this](NN::Networks& networks_) {
+        networks_.big.load(binaryDirectory, options["EvalFile"]);
+        networks_.small.load(binaryDirectory, options["EvalFileSmall"]);
+    });
+    threads.clear();
+    threads.ensure_network_replicated();
+}
+
+void Engine::load_big_network(const std::string& file) {
+    networks.modify_and_replicate(
+      [this, &file](NN::Networks& networks_) { networks_.big.load(binaryDirectory, file); });
+    threads.clear();
+    threads.ensure_network_replicated();
+}
+
+void Engine::load_small_network(const std::string& file) {
+    networks.modify_and_replicate(
+      [this, &file](NN::Networks& networks_) { networks_.small.load(binaryDirectory, file); });
+    threads.clear();
+    threads.ensure_network_replicated();
+}
+
+void Engine::save_network(const std::pair<std::optional<std::string>, std::string> files[2]) {
+    networks.modify_and_replicate([&files](NN::Networks& networks_) {
+        networks_.big.save(files[0].first);
+        networks_.small.save(files[1].first);
+    });
+}
+
+// utility functions
+
+void Engine::trace_eval() const {
+    StateListPtr trace_states(new std::deque<StateInfo>(1));
+    Position     p;
+    p.set(pos.fen(), options["UCI_Chess960"], &trace_states->back());
+
+    verify_networks();
+
+    sync_cout << "\n" << Eval::trace(p, *networks) << sync_endl;
+}
+
+const OptionsMap& Engine::get_options() const { return options; }
+OptionsMap&       Engine::get_options() { return options; }
+
+std::string Engine::fen() const { return pos.fen(); }
+
+void Engine::flip() { pos.flip(); }
+
+std::string Engine::visualize() const {
+    std::stringstream ss;
+    ss << pos;
+    return ss.str();
+}
+
+int Engine::get_hashfull(int maxAge) const { return tt.hashfull(maxAge); }
+
+std::vector<std::pair<size_t, size_t>> Engine::get_bound_thread_count_by_numa_node() const {
+    auto                                   counts = threads.get_bound_thread_count_by_numa_node();
+    const NumaConfig&                      cfg    = numaContext.get_numa_config();
+    std::vector<std::pair<size_t, size_t>> ratios;
+    NumaIndex                              n = 0;
+    for (; n < counts.size(); ++n)
+        ratios.emplace_back(counts[n], cfg.num_cpus_in_numa_node(n));
+    if (!counts.empty())
+        for (; n < cfg.num_numa_nodes(); ++n)
+            ratios.emplace_back(0, cfg.num_cpus_in_numa_node(n));
+    return ratios;
+}
+
+std::string Engine::get_numa_config_as_string() const {
+    return numaContext.get_numa_config().to_string();
+}
+
+std::string Engine::numa_config_information_as_string() const {
+    auto cfgStr = get_numa_config_as_string();
+    return "Available processors: " + cfgStr;
+}
+
+std::string Engine::thread_binding_information_as_string() const {
+    auto              boundThreadsByNode = get_bound_thread_count_by_numa_node();
+    std::stringstream ss;
+    if (boundThreadsByNode.empty())
+        return ss.str();
+
+    bool isFirst = true;
+
+    for (auto&& [current, total] : boundThreadsByNode)
+    {
+        if (!isFirst)
+            ss << ":";
+        ss << current << "/" << total;
+        isFirst = false;
+    }
+
+    return ss.str();
+}
+
+std::string Engine::thread_allocation_information_as_string() const {
+    std::stringstream ss;
+
+    size_t threadsSize = threads.size();
+    ss << "Using " << threadsSize << (threadsSize > 1 ? " threads" : " thread");
+
+    auto boundThreadsByNodeStr = thread_binding_information_as_string();
+    if (boundThreadsByNodeStr.empty())
+        return ss.str();
+
+    ss << " with NUMA node thread binding: ";
+    ss << boundThreadsByNodeStr;
+
+    return ss.str();
+}
+}
@@ -0,0 +1,130 @@
+/*
+  Stockfish, a UCI chess playing engine derived from Glaurung 2.1
+  Copyright (C) 2004-2025 The Stockfish developers (see AUTHORS file)
+
+  Stockfish is free software: you can redistribute it and/or modify
+  it under the terms of the GNU General Public License as published by
+  the Free Software Foundation, either version 3 of the License, or
+  (at your option) any later version.
+
+  Stockfish is distributed in the hope that it will be useful,
+  but WITHOUT ANY WARRANTY; without even the implied warranty of
+  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+  GNU General Public License for more details.
+
+  You should have received a copy of the GNU General Public License
+  along with this program.  If not, see <http://www.gnu.org/licenses/>.
+*/
+
+#ifndef ENGINE_H_INCLUDED
+#define ENGINE_H_INCLUDED
+
+#include <cstddef>
+#include <cstdint>
+#include <functional>
+#include <optional>
+#include <string>
+#include <string_view>
+#include <utility>
+#include <vector>
+
+#include "nnue/network.h"
+#include "numa.h"
+#include "position.h"
+#include "search.h"
+#include "syzygy/tbprobe.h"  // for Stockfish::Depth
+#include "thread.h"
+#include "tt.h"
+#include "ucioption.h"
+
+namespace Stockfish {
+
+class Engine {
+   public:
+    using InfoShort = Search::InfoShort;
+    using InfoFull  = Search::InfoFull;
+    using InfoIter  = Search::InfoIteration;
+
+    Engine(std::optional<std::string> path = std::nullopt);
+
+    // Cannot be movable due to components holding backreferences to fields
+    Engine(const Engine&)            = delete;
+    Engine(Engine&&)                 = delete;
+    Engine& operator=(const Engine&) = delete;
+    Engine& operator=(Engine&&)      = delete;
+
+    ~Engine() { wait_for_search_finished(); }
+
+    std::uint64_t perft(const std::string& fen, Depth depth, bool isChess960);
+
+    // non blocking call to start searching
+    void go(Search::LimitsType&);
+    // non blocking call to stop searching
+    void stop();
+
+    // blocking call to wait for search to finish
+    void wait_for_search_finished();
+    // set a new position, moves are in UCI format
+    void set_position(const std::string& fen, const std::vector<std::string>& moves);
+
+    // modifiers
+
+    void set_numa_config_from_option(const std::string& o);
+    void resize_threads();
+    void set_tt_size(size_t mb);
+    void set_ponderhit(bool);
+    void search_clear();
+
+    void set_on_update_no_moves(std::function<void(const InfoShort&)>&&);
+    void set_on_update_full(std::function<void(const InfoFull&)>&&);
+    void set_on_iter(std::function<void(const InfoIter&)>&&);
+    void set_on_bestmove(std::function<void(std::string_view, std::string_view)>&&);
+    void set_on_verify_networks(std::function<void(std::string_view)>&&);
+
+    // network related
+
+    void verify_networks() const;
+    void load_networks();
+    void load_big_network(const std::string& file);
+    void load_small_network(const std::string& file);
+    void save_network(const std::pair<std::optional<std::string>, std::string> files[2]);
+
+    // utility functions
+
+    void trace_eval() const;
+
+    const OptionsMap& get_options() const;
+    OptionsMap&       get_options();
+
+    int get_hashfull(int maxAge = 0) const;
+
+    std::string                            fen() const;
+    void                                   flip();
+    std::string                            visualize() const;
+    std::vector<std::pair<size_t, size_t>> get_bound_thread_count_by_numa_node() const;
+    std::string                            get_numa_config_as_string() const;
+    std::string                            numa_config_information_as_string() const;
+    std::string                            thread_allocation_information_as_string() const;
+    std::string                            thread_binding_information_as_string() const;
+
+   private:
+    const std::string binaryDirectory;
+
+    NumaReplicationContext numaContext;
+
+    Position     pos;
+    StateListPtr states;
+
+    OptionsMap                               options;
+    ThreadPool                               threads;
+    TranspositionTable                       tt;
+    LazyNumaReplicated<Eval::NNUE::Networks> networks;
+
+    Search::SearchManager::UpdateContext  updateContext;
+    std::function<void(std::string_view)> onVerifyNetworks;
+};
+
+}  // namespace Stockfish
+
+
+#endif  // #ifndef ENGINE_H_INCLUDED
@@ -1,6 +1,6 @@
 /*
  Stockfish, a UCI chess playing engine derived from Glaurung 2.1
-  Copyright (C) 2004-2021 The Stockfish developers (see AUTHORS file)
+  Copyright (C) 2004-2025 The Stockfish developers (see AUTHORS file)

  Stockfish is free software: you can redistribute it and/or modify
  it under the terms of the GNU General Public License as published by
@@ -23,30 +23,34 @@

 #include "types.h"

+namespace Stockfish {
+
 class Position;

 namespace Eval {

-  std::string trace(const Position& pos);
-  Value evaluate(const Position& pos);
+// The default net name MUST follow the format nn-[SHA256 first 12 digits].nnue
+// for the build process (profile-build and fishtest) to work. Do not change the
+// name of the macro or the location where this macro is defined, as it is used
+// in the Makefile/Fishtest.
+#define EvalFileDefaultNameBig "nn-1c0000000000.nnue"
+#define EvalFileDefaultNameSmall "nn-37f18f62d772.nnue"

-  extern bool useNNUE;
-  extern std::string eval_file_loaded;
+namespace NNUE {
+struct Networks;
+struct AccumulatorCaches;
+}

-  // The default net name MUST follow the format nn-[SHA256 first 12 digits].nnue
-  // for the build process (profile-build and fishtest) to work. Do not change the
-  // name of the macro, as it is used in the Makefile.
-  #define EvalFileDefaultName   "nn-62ef826d1a6d.nnue"
+std::string trace(Position& pos, const Eval::NNUE::Networks& networks);

-  namespace NNUE {
+int   simple_eval(const Position& pos, Color c);
+bool  use_smallnet(const Position& pos);
+Value evaluate(const NNUE::Networks&          networks,
+               const Position&                pos,
+               Eval::NNUE::AccumulatorCaches& caches,
+               int                            optimism);
+}  // namespace Eval

-    Value evaluate(const Position& pos);
-    bool load_eval(std::string name, std::istream& stream);
-    void init();
-    void verify();
+}  // namespace Stockfish

-  } // namespace NNUE
-
-} // namespace Eval
-
-#endif // #ifndef EVALUATE_H_INCLUDED
+#endif  // #ifndef EVALUATE_H_INCLUDED
@@ -0,0 +1,165 @@
+/*
+  Stockfish, a UCI chess playing engine derived from Glaurung 2.1
+  Copyright (C) 2004-2025 The Stockfish developers (see AUTHORS file)
+
+  Stockfish is free software: you can redistribute it and/or modify
+  it under the terms of the GNU General Public License as published by
+  the Free Software Foundation, either version 3 of the License, or
+  (at your option) any later version.
+
+  Stockfish is distributed in the hope that it will be useful,
+  but WITHOUT ANY WARRANTY; without even the implied warranty of
+  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+  GNU General Public License for more details.
+
+  You should have received a copy of the GNU General Public License
+  along with this program.  If not, see <http://www.gnu.org/licenses/>.
+*/
+
+#ifndef HISTORY_H_INCLUDED
+#define HISTORY_H_INCLUDED
+
+#include <algorithm>
+#include <array>
+#include <cassert>
+#include <cmath>
+#include <cstdint>
+#include <cstdlib>
+#include <limits>
+#include <type_traits>  // IWYU pragma: keep
+
+#include "misc.h"
+#include "position.h"
+
+namespace Stockfish {
+
+constexpr int PAWN_HISTORY_SIZE        = 512;    // has to be a power of 2
+constexpr int CORRECTION_HISTORY_SIZE  = 32768;  // has to be a power of 2
+constexpr int CORRECTION_HISTORY_LIMIT = 1024;
+constexpr int LOW_PLY_HISTORY_SIZE     = 4;
+
+static_assert((PAWN_HISTORY_SIZE & (PAWN_HISTORY_SIZE - 1)) == 0,
+              "PAWN_HISTORY_SIZE has to be a power of 2");
+
+static_assert((CORRECTION_HISTORY_SIZE & (CORRECTION_HISTORY_SIZE - 1)) == 0,
+              "CORRECTION_HISTORY_SIZE has to be a power of 2");
+
+enum PawnHistoryType {
+    Normal,
+    Correction
+};
+
+template<PawnHistoryType T = Normal>
+inline int pawn_structure_index(const Position& pos) {
+    return pos.pawn_key() & ((T == Normal ? PAWN_HISTORY_SIZE : CORRECTION_HISTORY_SIZE) - 1);
+}
+
+inline int minor_piece_index(const Position& pos) {
+    return pos.minor_piece_key() & (CORRECTION_HISTORY_SIZE - 1);
+}
+
+template<Color c>
+inline int non_pawn_index(const Position& pos) {
+    return pos.non_pawn_key(c) & (CORRECTION_HISTORY_SIZE - 1);
+}
+
+// StatsEntry is the container of various numerical statistics. We use a class
+// instead of a naked value to directly call history update operator<<() on
+// the entry. The first template parameter T is the base type of the array,
+// and the second template parameter D limits the range of updates in [-D, D]
+// when we update values with the << operator
+template<typename T, int D>
+class StatsEntry {
+
+    static_assert(std::is_arithmetic<T>::value, "Not an arithmetic type");
+    static_assert(D <= std::numeric_limits<T>::max(), "D overflows T");
+
+    T entry;
+
+   public:
+    StatsEntry& operator=(const T& v) {
+        entry = v;
+        return *this;
+    }
+    operator const T&() const { return entry; }
+
+    void operator<<(int bonus) {
+        // Make sure that bonus is in range [-D, D]
+        int clampedBonus = std::clamp(bonus, -D, D);
+        entry += clampedBonus - entry * std::abs(clampedBonus) / D;
+
+        assert(std::abs(entry) <= D);
+    }
+};
+
+enum StatsType {
+    NoCaptures,
+    Captures
+};
+
+template<typename T, int D, std::size_t... Sizes>
+using Stats = MultiArray<StatsEntry<T, D>, Sizes...>;
+
+// ButterflyHistory records how often quiet moves have been successful or unsuccessful
+// during the current search, and is used for reduction and move ordering decisions.
+// It uses 2 tables (one for each color) indexed by the move's from and to squares,
+// see https://www.chessprogramming.org/Butterfly_Boards (~11 elo)
+using ButterflyHistory = Stats<std::int16_t, 7183, COLOR_NB, int(SQUARE_NB) * int(SQUARE_NB)>;
+
+// LowPlyHistory is adressed by play and move's from and to squares, used
+// to improve move ordering near the root
+using LowPlyHistory =
+  Stats<std::int16_t, 7183, LOW_PLY_HISTORY_SIZE, int(SQUARE_NB) * int(SQUARE_NB)>;
+
+// CapturePieceToHistory is addressed by a move's [piece][to][captured piece type]
+using CapturePieceToHistory = Stats<std::int16_t, 10692, PIECE_NB, SQUARE_NB, PIECE_TYPE_NB>;
+
+// PieceToHistory is like ButterflyHistory but is addressed by a move's [piece][to]
+using PieceToHistory = Stats<std::int16_t, 30000, PIECE_NB, SQUARE_NB>;
+
+// ContinuationHistory is the combined history of a given pair of moves, usually
+// the current one given a previous one. The nested history table is based on
+// PieceToHistory instead of ButterflyBoards.
+// (~63 elo)
+using ContinuationHistory = MultiArray<PieceToHistory, PIECE_NB, SQUARE_NB>;
+
+// PawnHistory is addressed by the pawn structure and a move's [piece][to]
+using PawnHistory = Stats<std::int16_t, 8192, PAWN_HISTORY_SIZE, PIECE_NB, SQUARE_NB>;
+
+// Correction histories record differences between the static evaluation of
+// positions and their search score. It is used to improve the static evaluation
+// used by some search heuristics.
+// see https://www.chessprogramming.org/Static_Evaluation_Correction_History
+enum CorrHistType {
+    Pawn,          // By color and pawn structure
+    Minor,         // By color and positions of minor pieces (Knight, Bishop)
+    NonPawn,       // By non-pawn material positions and color
+    PieceTo,       // By [piece][to] move
+    Continuation,  // Combined history of move pairs
+};
+
+namespace Detail {
+
+template<CorrHistType>
+struct CorrHistTypedef {
+    using type = Stats<std::int16_t, CORRECTION_HISTORY_LIMIT, CORRECTION_HISTORY_SIZE, COLOR_NB>;
+};
+
+template<>
+struct CorrHistTypedef<PieceTo> {
+    using type = Stats<std::int16_t, CORRECTION_HISTORY_LIMIT, PIECE_NB, SQUARE_NB>;
+};
+
+template<>
+struct CorrHistTypedef<Continuation> {
+    using type = MultiArray<CorrHistTypedef<PieceTo>::type, PIECE_NB, SQUARE_NB>;
+};
+
+}
+
+template<CorrHistType T>
+using CorrectionHistory = typename Detail::CorrHistTypedef<T>::type;
+
+}  // namespace Stockfish
+
+#endif  // #ifndef HISTORY_H_INCLUDED
@@ -26,7 +26,9 @@
      defined(__SSSE3__)    || \
      defined(__SSE4_1__)   || \
      defined(__SSE4_2__)   || \
-      defined(__neon__)
+      defined(__neon__)     || \
+      defined(__ARM_NEON)   || \
+      defined(__ALTIVEC__)
 # define INCBIN_ALIGNMENT_INDEX 4
 #elif ULONG_MAX != 0xffffffffu
 # define INCBIN_ALIGNMENT_INDEX 3
@@ -64,6 +66,9 @@
    X
 #define INCBIN_INVOKE(N, ...) \
    INCBIN_EVAL(N(__VA_ARGS__))
+/* Variable argument count for overloading by arity */
+#define INCBIN_VA_ARG_COUNTER(_1, _2, _3, N, ...) N
+#define INCBIN_VA_ARGC(...) INCBIN_VA_ARG_COUNTER(__VA_ARGS__, 3, 2, 1, 0)

 /* Green Hills uses a different directive for including binary data */
 #if defined(__ghs__)
@@ -117,28 +122,49 @@
 #endif

 /**
- * @brief Optionally override the linker section into which data is emitted.
- *
- * @warning If you use this facility, you'll have to deal with platform-specific linker output
- * section naming on your own
- *
- * Overriding the default linker output section, e.g for esp8266/Arduino:
- * @code
- * #define INCBIN_OUTPUT_SECTION ".irom.text"
- * #include "incbin.h"
- * INCBIN(Foo, "foo.txt");
- * // Data is emitted into program memory that never gets copied to RAM
- * @endcode
+ * @brief Optionally override the linker section into which size and data is
+ * emitted.
+ * 
+ * @warning If you use this facility, you might have to deal with
+ * platform-specific linker output section naming on your own.
 */
 #if !defined(INCBIN_OUTPUT_SECTION)
 #  if defined(__APPLE__)
-#    define INCBIN_OUTPUT_SECTION         ".const_data"
+#    define INCBIN_OUTPUT_SECTION ".const_data"
 #  else
-#    define INCBIN_OUTPUT_SECTION         ".rodata"
+#    define INCBIN_OUTPUT_SECTION ".rodata"
 #  endif
 #endif

+/**
+ * @brief Optionally override the linker section into which data is emitted.
+ *
+ * @warning If you use this facility, you might have to deal with
+ * platform-specific linker output section naming on your own.
+ */
+#if !defined(INCBIN_OUTPUT_DATA_SECTION)
+#  define INCBIN_OUTPUT_DATA_SECTION INCBIN_OUTPUT_SECTION
+#endif
+
+/**
+ * @brief Optionally override the linker section into which size is emitted.
+ *
+ * @warning If you use this facility, you might have to deal with
+ * platform-specific linker output section naming on your own.
+ * 
+ * @note This is useful for Harvard architectures where program memory cannot
+ * be directly read from the program without special instructions. With this you
+ * can chose to put the size variable in RAM rather than ROM.
+ */
+#if !defined(INCBIN_OUTPUT_SIZE_SECTION)
+#  define INCBIN_OUTPUT_SIZE_SECTION INCBIN_OUTPUT_SECTION
+#endif
+
 #if defined(__APPLE__)
+#  include "TargetConditionals.h"
+#  if defined(TARGET_OS_IPHONE) && !defined(INCBIN_SILENCE_BITCODE_WARNING)
+#    warning "incbin is incompatible with bitcode. Using the library will break upload to App Store if you have bitcode enabled. Add `#define INCBIN_SILENCE_BITCODE_WARNING` before including this header to silence this warning."
+#  endif
 /* The directives are different for Apple branded compilers */
 #  define INCBIN_SECTION         INCBIN_OUTPUT_SECTION "\n"
 #  define INCBIN_GLOBAL(NAME)    ".globl " INCBIN_MANGLE INCBIN_STRINGIZE(INCBIN_PREFIX) #NAME "\n"
@@ -179,27 +205,17 @@
 /**
 * @brief Specify the prefix to use for symbol names.
 *
- * By default this is `g', producing symbols of the form:
- * @code
- * #include "incbin.h"
- * INCBIN(Foo, "foo.txt");
+ * @note By default this is "g".
 *
- * // Now you have the following symbols:
- * // const unsigned char gFooData[];
- * // const unsigned char *const gFooEnd;
- * // const unsigned int gFooSize;
- * @endcode
- *
- * If however you specify a prefix before including: e.g:
 * @code
 * #define INCBIN_PREFIX incbin
 * #include "incbin.h"
 * INCBIN(Foo, "foo.txt");
 *
 * // Now you have the following symbols instead:
- * // const unsigned char incbinFooData[];
- * // const unsigned char *const incbinFooEnd;
- * // const unsigned int incbinFooSize;
+ * // const unsigned char incbinFoo<data>[];
+ * // const unsigned char *const incbinFoo<end>;
+ * // const unsigned int incbinFoo<size>;
 * @endcode
 */
 #if !defined(INCBIN_PREFIX)
@@ -213,18 +229,8 @@
 * - INCBIN_STYLE_CAMEL "CamelCase"
 * - INCBIN_STYLE_SNAKE "snake_case"
 *
- * Default option is *INCBIN_STYLE_CAMEL* producing symbols of the form:
- * @code
- * #include "incbin.h"
- * INCBIN(Foo, "foo.txt");
+ * @note By default this is INCBIN_STYLE_CAMEL
 *
- * // Now you have the following symbols:
- * // const unsigned char <prefix>FooData[];
- * // const unsigned char *const <prefix>FooEnd;
- * // const unsigned int <prefix>FooSize;
- * @endcode
- *
- * If however you specify a style before including: e.g:
 * @code
 * #define INCBIN_STYLE INCBIN_STYLE_SNAKE
 * #include "incbin.h"
@@ -288,23 +294,38 @@
 * The symbol names are a concatenation of `INCBIN_PREFIX' before *NAME*; with
 * "Data", as well as "End" and "Size" after. An example is provided below.
 *
+ * @param TYPE Optional array type. Omitting this picks a default of `unsigned char`.
 * @param NAME The name given for the binary data
 *
 * @code
 * INCBIN_EXTERN(Foo);
 *
 * // Now you have the following symbols:
- * // extern const unsigned char <prefix>FooData[];
- * // extern const unsigned char *const <prefix>FooEnd;
- * // extern const unsigned int <prefix>FooSize;
+ * // extern const unsigned char <prefix>Foo<data>[];
+ * // extern const unsigned char *const <prefix>Foo<end>;
+ * // extern const unsigned int <prefix>Foo<size>;
+ * @endcode
+ * 
+ * You may specify a custom optional data type as well as the first argument.
+ * @code
+ * INCBIN_EXTERN(custom_type, Foo);
+ * 
+ * // Now you have the following symbols:
+ * // extern const custom_type <prefix>Foo<data>[];
+ * // extern const custom_type *const <prefix>Foo<end>;
+ * // extern const unsigned int <prefix>Foo<size>;
 * @endcode
 */
-#define INCBIN_EXTERN(NAME) \
-    INCBIN_EXTERNAL const INCBIN_ALIGN unsigned char \
+#define INCBIN_EXTERN(...) \
+    INCBIN_CONCATENATE(INCBIN_EXTERN_, INCBIN_VA_ARGC(__VA_ARGS__))(__VA_ARGS__)
+#define INCBIN_EXTERN_1(NAME, ...) \
+    INCBIN_EXTERN_2(unsigned char, NAME)
+#define INCBIN_EXTERN_2(TYPE, NAME) \
+    INCBIN_EXTERNAL const INCBIN_ALIGN TYPE \
        INCBIN_CONCATENATE( \
            INCBIN_CONCATENATE(INCBIN_PREFIX, NAME), \
            INCBIN_STYLE_IDENT(DATA))[]; \
-    INCBIN_EXTERNAL const INCBIN_ALIGN unsigned char *const \
+    INCBIN_EXTERNAL const INCBIN_ALIGN TYPE *const \
    INCBIN_CONCATENATE( \
        INCBIN_CONCATENATE(INCBIN_PREFIX, NAME), \
        INCBIN_STYLE_IDENT(END)); \
@@ -313,6 +334,29 @@
            INCBIN_CONCATENATE(INCBIN_PREFIX, NAME), \
            INCBIN_STYLE_IDENT(SIZE))

+/**
+ * @brief Externally reference textual data included in another translation unit.
+ *
+ * Produces three external symbols that reference the textual data included in
+ * another translation unit.
+ *
+ * The symbol names are a concatenation of `INCBIN_PREFIX' before *NAME*; with
+ * "Data", as well as "End" and "Size" after. An example is provided below.
+ *
+ * @param NAME The name given for the textual data
+ *
+ * @code
+ * INCBIN_EXTERN(Foo);
+ *
+ * // Now you have the following symbols:
+ * // extern const char <prefix>Foo<data>[];
+ * // extern const char *const <prefix>Foo<end>;
+ * // extern const unsigned int <prefix>Foo<size>;
+ * @endcode
+ */
+#define INCTXT_EXTERN(NAME) \
+    INCBIN_EXTERN_2(char, NAME)
+
 /**
 * @brief Include a binary file into the current translation unit.
 *
@@ -322,6 +366,7 @@
 * The symbol names are a concatenation of `INCBIN_PREFIX' before *NAME*; with
 * "Data", as well as "End" and "Size" after. An example is provided below.
 *
+ * @param TYPE Optional array type. Omitting this picks a default of `unsigned char`.
 * @param NAME The name to associate with this binary data (as an identifier.)
 * @param FILENAME The file to include (as a string literal.)
 *
@@ -329,9 +374,20 @@
 * INCBIN(Icon, "icon.png");
 *
 * // Now you have the following symbols:
- * // const unsigned char <prefix>IconData[];
- * // const unsigned char *const <prefix>IconEnd;
- * // const unsigned int <prefix>IconSize;
+ * // const unsigned char <prefix>Icon<data>[];
+ * // const unsigned char *const <prefix>Icon<end>;
+ * // const unsigned int <prefix>Icon<size>;
+ * @endcode
+ * 
+ * You may specify a custom optional data type as well as the first argument.
+ * These macros are specialized by arity.
+ * @code
+ * INCBIN(custom_type, Icon, "icon.png");
+ *
+ * // Now you have the following symbols:
+ * // const custom_type <prefix>Icon<data>[];
+ * // const custom_type *const <prefix>Icon<end>;
+ * // const unsigned int <prefix>Icon<size>;
 * @endcode
 *
 * @warning This must be used in global scope
@@ -341,15 +397,28 @@
 * please @see INCBIN_EXTERN.
 */
 #ifdef _MSC_VER
-#define INCBIN(NAME, FILENAME) \
-    INCBIN_EXTERN(NAME)
+#  define INCBIN(NAME, FILENAME) \
+      INCBIN_EXTERN(NAME)
 #else
-#define INCBIN(NAME, FILENAME) \
+#  define INCBIN(...) \
+     INCBIN_CONCATENATE(INCBIN_, INCBIN_VA_ARGC(__VA_ARGS__))(__VA_ARGS__)
+#  if defined(__GNUC__)
+#    define INCBIN_1(...) _Pragma("GCC error \"Single argument INCBIN not allowed\"")
+#  elif defined(__clang__)
+#    define INCBIN_1(...) _Pragma("clang error \"Single argument INCBIN not allowed\"")
+#  else
+#    define INCBIN_1(...) /* Cannot do anything here */
+#  endif
+#  define INCBIN_2(NAME, FILENAME) \
+      INCBIN_3(unsigned char, NAME, FILENAME)
+#  define INCBIN_3(TYPE, NAME, FILENAME) INCBIN_COMMON(TYPE, NAME, FILENAME, /* No terminator for binary data */)
+#  define INCBIN_COMMON(TYPE, NAME, FILENAME, TERMINATOR) \
    __asm__(INCBIN_SECTION \
            INCBIN_GLOBAL_LABELS(NAME, DATA) \
            INCBIN_ALIGN_HOST \
            INCBIN_MANGLE INCBIN_STRINGIZE(INCBIN_PREFIX) #NAME INCBIN_STYLE_STRING(DATA) ":\n" \
            INCBIN_MACRO " \"" FILENAME "\"\n" \
+                TERMINATOR \
            INCBIN_GLOBAL_LABELS(NAME, END) \
            INCBIN_ALIGN_BYTE \
            INCBIN_MANGLE INCBIN_STRINGIZE(INCBIN_PREFIX) #NAME INCBIN_STYLE_STRING(END) ":\n" \
@@ -362,7 +431,46 @@
            INCBIN_ALIGN_HOST \
            ".text\n" \
    ); \
-    INCBIN_EXTERN(NAME)
+    INCBIN_EXTERN(TYPE, NAME)
+#endif

+/**
+ * @brief Include a textual file into the current translation unit.
+ * 
+ * This behaves the same as INCBIN except it produces char compatible arrays
+ * and implicitly adds a null-terminator byte, thus the size of data included
+ * by this is one byte larger than that of INCBIN.
+ *
+ * Includes a textual file into the current translation unit, producing three
+ * symbols for objects that encode the data and size respectively.
+ *
+ * The symbol names are a concatenation of `INCBIN_PREFIX' before *NAME*; with
+ * "Data", as well as "End" and "Size" after. An example is provided below.
+ *
+ * @param NAME The name to associate with this binary data (as an identifier.)
+ * @param FILENAME The file to include (as a string literal.)
+ *
+ * @code
+ * INCTXT(Readme, "readme.txt");
+ *
+ * // Now you have the following symbols:
+ * // const char <prefix>Readme<data>[];
+ * // const char *const <prefix>Readme<end>;
+ * // const unsigned int <prefix>Readme<size>;
+ * @endcode
+ *
+ * @warning This must be used in global scope
+ * @warning The identifiers may be different if INCBIN_STYLE is not default
+ *
+ * To externally reference the data included by this in another translation unit
+ * please @see INCBIN_EXTERN.
+ */
+#if defined(_MSC_VER)
+#  define INCTXT(NAME, FILENAME) \
+     INCBIN_EXTERN(NAME)
+#else
+#  define INCTXT(NAME, FILENAME) \
+     INCBIN_COMMON(char, NAME, FILENAME, INCBIN_BYTE "0\n")
 #endif
-#endif
+
+#endif
@@ -1,6 +1,6 @@
 /*
  Stockfish, a UCI chess playing engine derived from Glaurung 2.1
-  Copyright (C) 2004-2021 The Stockfish developers (see AUTHORS file)
+  Copyright (C) 2004-2025 The Stockfish developers (see AUTHORS file)

  Stockfish is free software: you can redistribute it and/or modify
  it under the terms of the GNU General Public License as published by
@@ -19,33 +19,26 @@
 #include <iostream>

 #include "bitboard.h"
-#include "endgame.h"
+#include "misc.h"
 #include "position.h"
-#include "psqt.h"
-#include "search.h"
-#include "syzygy/tbprobe.h"
-#include "thread.h"
-#include "tt.h"
+#include "types.h"
 #include "uci.h"
+#include "tune.h"
+
+using namespace Stockfish;

 int main(int argc, char* argv[]) {

-  std::cout << engine_info() << std::endl;
+    std::cout << engine_info() << std::endl;

-  CommandLine::init(argc, argv);
-  UCI::init(Options);
-  Tune::init();
-  PSQT::init();
-  Bitboards::init();
-  Position::init();
-  Bitbases::init();
-  Endgames::init();
-  Threads.set(size_t(Options["Threads"]));
-  Search::clear(); // After threads are up
-  Eval::NNUE::init();
+    Bitboards::init();
+    Position::init();

-  UCI::loop(argc, argv);
+    UCIEngine uci(argc, argv);

-  Threads.set(0);
-  return 0;
+    Tune::init(uci.engine_options());
+
+    uci.loop();
+
+    return 0;
 }
@@ -1,225 +0,0 @@
-/*
-  Stockfish, a UCI chess playing engine derived from Glaurung 2.1
-  Copyright (C) 2004-2021 The Stockfish developers (see AUTHORS file)
-
-  Stockfish is free software: you can redistribute it and/or modify
-  it under the terms of the GNU General Public License as published by
-  the Free Software Foundation, either version 3 of the License, or
-  (at your option) any later version.
-
-  Stockfish is distributed in the hope that it will be useful,
-  but WITHOUT ANY WARRANTY; without even the implied warranty of
-  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-  GNU General Public License for more details.
-
-  You should have received a copy of the GNU General Public License
-  along with this program.  If not, see <http://www.gnu.org/licenses/>.
-*/
-
-#include <cassert>
-#include <cstring>   // For std::memset
-
-#include "material.h"
-#include "thread.h"
-
-using namespace std;
-
-namespace {
-  #define S(mg, eg) make_score(mg, eg)
-
-  // Polynomial material imbalance parameters
-
-  // One Score parameter for each pair (our piece, another of our pieces)
-  constexpr Score QuadraticOurs[][PIECE_TYPE_NB] = {
-    // OUR PIECE 2
-    // bishop pair    pawn         knight       bishop       rook           queen
-    {S(1419, 1455)                                                                  }, // Bishop pair
-    {S( 101,   28), S( 37,  39)                                                     }, // Pawn
-    {S(  57,   64), S(249, 187), S(-49, -62)                                        }, // Knight      OUR PIECE 1
-    {S(   0,    0), S(118, 137), S( 10,  27), S(  0,   0)                           }, // Bishop
-    {S( -63,  -68), S( -5,   3), S(100,  81), S(132, 118), S(-246, -244)            }, // Rook
-    {S(-210, -211), S( 37,  14), S(147, 141), S(161, 105), S(-158, -174), S(-9,-31) }  // Queen
-  };
-
-  // One Score parameter for each pair (our piece, their piece)
-  constexpr Score QuadraticTheirs[][PIECE_TYPE_NB] = {
-    // THEIR PIECE
-    // bishop pair   pawn         knight       bishop       rook         queen
-    {                                                                               }, // Bishop pair
-    {S(  33,  30)                                                                   }, // Pawn
-    {S(  46,  18), S(106,  84)                                                      }, // Knight      OUR PIECE
-    {S(  75,  35), S( 59,  44), S( 60,  15)                                         }, // Bishop
-    {S(  26,  35), S(  6,  22), S( 38,  39), S(-12,  -2)                            }, // Rook
-    {S(  97,  93), S(100, 163), S(-58, -91), S(112, 192), S(276, 225)               }  // Queen
-  };
-
-  #undef S
-
-  // Endgame evaluation and scaling functions are accessed directly and not through
-  // the function maps because they correspond to more than one material hash key.
-  Endgame<KXK>    EvaluateKXK[] = { Endgame<KXK>(WHITE),    Endgame<KXK>(BLACK) };
-
-  Endgame<KBPsK>  ScaleKBPsK[]  = { Endgame<KBPsK>(WHITE),  Endgame<KBPsK>(BLACK) };
-  Endgame<KQKRPs> ScaleKQKRPs[] = { Endgame<KQKRPs>(WHITE), Endgame<KQKRPs>(BLACK) };
-  Endgame<KPsK>   ScaleKPsK[]   = { Endgame<KPsK>(WHITE),   Endgame<KPsK>(BLACK) };
-  Endgame<KPKP>   ScaleKPKP[]   = { Endgame<KPKP>(WHITE),   Endgame<KPKP>(BLACK) };
-
-  // Helper used to detect a given material distribution
-  bool is_KXK(const Position& pos, Color us) {
-    return  !more_than_one(pos.pieces(~us))
-          && pos.non_pawn_material(us) >= RookValueMg;
-  }
-
-  bool is_KBPsK(const Position& pos, Color us) {
-    return   pos.non_pawn_material(us) == BishopValueMg
-          && pos.count<PAWN  >(us) >= 1;
-  }
-
-  bool is_KQKRPs(const Position& pos, Color us) {
-    return  !pos.count<PAWN>(us)
-          && pos.non_pawn_material(us) == QueenValueMg
-          && pos.count<ROOK>(~us) == 1
-          && pos.count<PAWN>(~us) >= 1;
-  }
-
-
-  /// imbalance() calculates the imbalance by comparing the piece count of each
-  /// piece type for both colors.
-
-  template<Color Us>
-  Score imbalance(const int pieceCount[][PIECE_TYPE_NB]) {
-
-    constexpr Color Them = ~Us;
-
-    Score bonus = SCORE_ZERO;
-
-    // Second-degree polynomial material imbalance, by Tord Romstad
-    for (int pt1 = NO_PIECE_TYPE; pt1 <= QUEEN; ++pt1)
-    {
-        if (!pieceCount[Us][pt1])
-            continue;
-
-        int v = QuadraticOurs[pt1][pt1] * pieceCount[Us][pt1];
-
-        for (int pt2 = NO_PIECE_TYPE; pt2 < pt1; ++pt2)
-            v +=  QuadraticOurs[pt1][pt2] * pieceCount[Us][pt2]
-                + QuadraticTheirs[pt1][pt2] * pieceCount[Them][pt2];
-
-        bonus += pieceCount[Us][pt1] * v;
-    }
-
-    return bonus;
-  }
-
-} // namespace
-
-namespace Material {
-
-
-/// Material::probe() looks up the current position's material configuration in
-/// the material hash table. It returns a pointer to the Entry if the position
-/// is found. Otherwise a new Entry is computed and stored there, so we don't
-/// have to recompute all when the same material configuration occurs again.
-
-Entry* probe(const Position& pos) {
-
-  Key key = pos.material_key();
-  Entry* e = pos.this_thread()->materialTable[key];
-
-  if (e->key == key)
-      return e;
-
-  std::memset(e, 0, sizeof(Entry));
-  e->key = key;
-  e->factor[WHITE] = e->factor[BLACK] = (uint8_t)SCALE_FACTOR_NORMAL;
-
-  Value npm_w = pos.non_pawn_material(WHITE);
-  Value npm_b = pos.non_pawn_material(BLACK);
-  Value npm   = std::clamp(npm_w + npm_b, EndgameLimit, MidgameLimit);
-
-  // Map total non-pawn material into [PHASE_ENDGAME, PHASE_MIDGAME]
-  e->gamePhase = Phase(((npm - EndgameLimit) * PHASE_MIDGAME) / (MidgameLimit - EndgameLimit));
-
-  // Let's look if we have a specialized evaluation function for this particular
-  // material configuration. Firstly we look for a fixed configuration one, then
-  // for a generic one if the previous search failed.
-  if ((e->evaluationFunction = Endgames::probe<Value>(key)) != nullptr)
-      return e;
-
-  for (Color c : { WHITE, BLACK })
-      if (is_KXK(pos, c))
-      {
-          e->evaluationFunction = &EvaluateKXK[c];
-          return e;
-      }
-
-  // OK, we didn't find any special evaluation function for the current material
-  // configuration. Is there a suitable specialized scaling function?
-  const auto* sf = Endgames::probe<ScaleFactor>(key);
-
-  if (sf)
-  {
-      e->scalingFunction[sf->strongSide] = sf; // Only strong color assigned
-      return e;
-  }
-
-  // We didn't find any specialized scaling function, so fall back on generic
-  // ones that refer to more than one material distribution. Note that in this
-  // case we don't return after setting the function.
-  for (Color c : { WHITE, BLACK })
-  {
-    if (is_KBPsK(pos, c))
-        e->scalingFunction[c] = &ScaleKBPsK[c];
-
-    else if (is_KQKRPs(pos, c))
-        e->scalingFunction[c] = &ScaleKQKRPs[c];
-  }
-
-  if (npm_w + npm_b == VALUE_ZERO && pos.pieces(PAWN)) // Only pawns on the board
-  {
-      if (!pos.count<PAWN>(BLACK))
-      {
-          assert(pos.count<PAWN>(WHITE) >= 2);
-
-          e->scalingFunction[WHITE] = &ScaleKPsK[WHITE];
-      }
-      else if (!pos.count<PAWN>(WHITE))
-      {
-          assert(pos.count<PAWN>(BLACK) >= 2);
-
-          e->scalingFunction[BLACK] = &ScaleKPsK[BLACK];
-      }
-      else if (pos.count<PAWN>(WHITE) == 1 && pos.count<PAWN>(BLACK) == 1)
-      {
-          // This is a special case because we set scaling functions
-          // for both colors instead of only one.
-          e->scalingFunction[WHITE] = &ScaleKPKP[WHITE];
-          e->scalingFunction[BLACK] = &ScaleKPKP[BLACK];
-      }
-  }
-
-  // Zero or just one pawn makes it difficult to win, even with a small material
-  // advantage. This catches some trivial draws like KK, KBK and KNK and gives a
-  // drawish scale factor for cases such as KRKBP and KmmKm (except for KBBKN).
-  if (!pos.count<PAWN>(WHITE) && npm_w - npm_b <= BishopValueMg)
-      e->factor[WHITE] = uint8_t(npm_w <  RookValueMg   ? SCALE_FACTOR_DRAW :
-                                 npm_b <= BishopValueMg ? 4 : 14);
-
-  if (!pos.count<PAWN>(BLACK) && npm_b - npm_w <= BishopValueMg)
-      e->factor[BLACK] = uint8_t(npm_b <  RookValueMg   ? SCALE_FACTOR_DRAW :
-                                 npm_w <= BishopValueMg ? 4 : 14);
-
-  // Evaluate the material imbalance. We use PIECE_TYPE_NONE as a place holder
-  // for the bishop pair "extended piece", which allows us to be more flexible
-  // in defining bishop pair bonuses.
-  const int pieceCount[COLOR_NB][PIECE_TYPE_NB] = {
-  { pos.count<BISHOP>(WHITE) > 1, pos.count<PAWN>(WHITE), pos.count<KNIGHT>(WHITE),
-    pos.count<BISHOP>(WHITE)    , pos.count<ROOK>(WHITE), pos.count<QUEEN >(WHITE) },
-  { pos.count<BISHOP>(BLACK) > 1, pos.count<PAWN>(BLACK), pos.count<KNIGHT>(BLACK),
-    pos.count<BISHOP>(BLACK)    , pos.count<ROOK>(BLACK), pos.count<QUEEN >(BLACK) } };
-
-  e->score = (imbalance<WHITE>(pieceCount) - imbalance<BLACK>(pieceCount)) / 16;
-  return e;
-}
-
-} // namespace Material
@@ -1,71 +0,0 @@
-/*
-  Stockfish, a UCI chess playing engine derived from Glaurung 2.1
-  Copyright (C) 2004-2021 The Stockfish developers (see AUTHORS file)
-
-  Stockfish is free software: you can redistribute it and/or modify
-  it under the terms of the GNU General Public License as published by
-  the Free Software Foundation, either version 3 of the License, or
-  (at your option) any later version.
-
-  Stockfish is distributed in the hope that it will be useful,
-  but WITHOUT ANY WARRANTY; without even the implied warranty of
-  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-  GNU General Public License for more details.
-
-  You should have received a copy of the GNU General Public License
-  along with this program.  If not, see <http://www.gnu.org/licenses/>.
-*/
-
-#ifndef MATERIAL_H_INCLUDED
-#define MATERIAL_H_INCLUDED
-
-#include "endgame.h"
-#include "misc.h"
-#include "position.h"
-#include "types.h"
-
-namespace Material {
-
-/// Material::Entry contains various information about a material configuration.
-/// It contains a material imbalance evaluation, a function pointer to a special
-/// endgame evaluation function (which in most cases is NULL, meaning that the
-/// standard evaluation function will be used), and scale factors.
-///
-/// The scale factors are used to scale the evaluation score up or down. For
-/// instance, in KRB vs KR endgames, the score is scaled down by a factor of 4,
-/// which will result in scores of absolute value less than one pawn.
-
-struct Entry {
-
-  Score imbalance() const { return score; }
-  Phase game_phase() const { return (Phase)gamePhase; }
-  bool specialized_eval_exists() const { return evaluationFunction != nullptr; }
-  Value evaluate(const Position& pos) const { return (*evaluationFunction)(pos); }
-
-  // scale_factor() takes a position and a color as input and returns a scale factor
-  // for the given color. We have to provide the position in addition to the color
-  // because the scale factor may also be a function which should be applied to
-  // the position. For instance, in KBP vs K endgames, the scaling function looks
-  // for rook pawns and wrong-colored bishops.
-  ScaleFactor scale_factor(const Position& pos, Color c) const {
-    ScaleFactor sf = scalingFunction[c] ? (*scalingFunction[c])(pos)
-                                        :  SCALE_FACTOR_NONE;
-    return sf != SCALE_FACTOR_NONE ? sf : ScaleFactor(factor[c]);
-  }
-
-  Key key;
-  const EndgameBase<Value>* evaluationFunction;
-  const EndgameBase<ScaleFactor>* scalingFunction[COLOR_NB]; // Could be one for each
-                                                             // side (e.g. KPKP, KBPsK)
-  Score score;
-  int16_t gamePhase;
-  uint8_t factor[COLOR_NB];
-};
-
-typedef HashTable<Entry, 8192> Table;
-
-Entry* probe(const Position& pos);
-
-} // namespace Material
-
-#endif // #ifndef MATERIAL_H_INCLUDED
@@ -0,0 +1,268 @@
+/*
+  Stockfish, a UCI chess playing engine derived from Glaurung 2.1
+  Copyright (C) 2004-2025 The Stockfish developers (see AUTHORS file)
+
+  Stockfish is free software: you can redistribute it and/or modify
+  it under the terms of the GNU General Public License as published by
+  the Free Software Foundation, either version 3 of the License, or
+  (at your option) any later version.
+
+  Stockfish is distributed in the hope that it will be useful,
+  but WITHOUT ANY WARRANTY; without even the implied warranty of
+  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+  GNU General Public License for more details.
+
+  You should have received a copy of the GNU General Public License
+  along with this program.  If not, see <http://www.gnu.org/licenses/>.
+*/
+
+#include "memory.h"
+
+#include <cstdlib>
+
+#if __has_include("features.h")
+    #include <features.h>
+#endif
+
+#if defined(__linux__) && !defined(__ANDROID__)
+    #include <sys/mman.h>
+#endif
+
+#if defined(__APPLE__) || defined(__ANDROID__) || defined(__OpenBSD__) \
+  || (defined(__GLIBCXX__) && !defined(_GLIBCXX_HAVE_ALIGNED_ALLOC) && !defined(_WIN32)) \
+  || defined(__e2k__)
+    #define POSIXALIGNEDALLOC
+    #include <stdlib.h>
+#endif
+
+#ifdef _WIN32
+    #if _WIN32_WINNT < 0x0601
+        #undef _WIN32_WINNT
+        #define _WIN32_WINNT 0x0601  // Force to include needed API prototypes
+    #endif
+
+    #ifndef NOMINMAX
+        #define NOMINMAX
+    #endif
+
+    #include <ios>       // std::hex, std::dec
+    #include <iostream>  // std::cerr
+    #include <ostream>   // std::endl
+    #include <windows.h>
+
+// The needed Windows API for processor groups could be missed from old Windows
+// versions, so instead of calling them directly (forcing the linker to resolve
+// the calls at compile time), try to load them at runtime. To do this we need
+// first to define the corresponding function pointers.
+
+extern "C" {
+using OpenProcessToken_t      = bool (*)(HANDLE, DWORD, PHANDLE);
+using LookupPrivilegeValueA_t = bool (*)(LPCSTR, LPCSTR, PLUID);
+using AdjustTokenPrivileges_t =
+  bool (*)(HANDLE, BOOL, PTOKEN_PRIVILEGES, DWORD, PTOKEN_PRIVILEGES, PDWORD);
+}
+#endif
+
+
+namespace Stockfish {
+
+// Wrappers for systems where the c++17 implementation does not guarantee the
+// availability of aligned_alloc(). Memory allocated with std_aligned_alloc()
+// must be freed with std_aligned_free().
+
+void* std_aligned_alloc(size_t alignment, size_t size) {
+#if defined(_ISOC11_SOURCE)
+    return aligned_alloc(alignment, size);
+#elif defined(POSIXALIGNEDALLOC)
+    void* mem = nullptr;
+    posix_memalign(&mem, alignment, size);
+    return mem;
+#elif defined(_WIN32) && !defined(_M_ARM) && !defined(_M_ARM64)
+    return _mm_malloc(size, alignment);
+#elif defined(_WIN32)
+    return _aligned_malloc(size, alignment);
+#else
+    return std::aligned_alloc(alignment, size);
+#endif
+}
+
+void std_aligned_free(void* ptr) {
+
+#if defined(POSIXALIGNEDALLOC)
+    free(ptr);
+#elif defined(_WIN32) && !defined(_M_ARM) && !defined(_M_ARM64)
+    _mm_free(ptr);
+#elif defined(_WIN32)
+    _aligned_free(ptr);
+#else
+    free(ptr);
+#endif
+}
+
+// aligned_large_pages_alloc() will return suitably aligned memory,
+// if possible using large pages.
+
+#if defined(_WIN32)
+
+static void* aligned_large_pages_alloc_windows([[maybe_unused]] size_t allocSize) {
+
+    #if !defined(_WIN64)
+    return nullptr;
+    #else
+
+    HANDLE hProcessToken{};
+    LUID   luid{};
+    void*  mem = nullptr;
+
+    const size_t largePageSize = GetLargePageMinimum();
+    if (!largePageSize)
+        return nullptr;
+
+    // Dynamically link OpenProcessToken, LookupPrivilegeValue and AdjustTokenPrivileges
+
+    HMODULE hAdvapi32 = GetModuleHandle(TEXT("advapi32.dll"));
+
+    if (!hAdvapi32)
+        hAdvapi32 = LoadLibrary(TEXT("advapi32.dll"));
+
+    auto OpenProcessToken_f =
+      OpenProcessToken_t((void (*)()) GetProcAddress(hAdvapi32, "OpenProcessToken"));
+    if (!OpenProcessToken_f)
+        return nullptr;
+    auto LookupPrivilegeValueA_f =
+      LookupPrivilegeValueA_t((void (*)()) GetProcAddress(hAdvapi32, "LookupPrivilegeValueA"));
+    if (!LookupPrivilegeValueA_f)
+        return nullptr;
+    auto AdjustTokenPrivileges_f =
+      AdjustTokenPrivileges_t((void (*)()) GetProcAddress(hAdvapi32, "AdjustTokenPrivileges"));
+    if (!AdjustTokenPrivileges_f)
+        return nullptr;
+
+    // We need SeLockMemoryPrivilege, so try to enable it for the process
+
+    if (!OpenProcessToken_f(  // OpenProcessToken()
+          GetCurrentProcess(), TOKEN_ADJUST_PRIVILEGES | TOKEN_QUERY, &hProcessToken))
+        return nullptr;
+
+    if (LookupPrivilegeValueA_f(nullptr, "SeLockMemoryPrivilege", &luid))
+    {
+        TOKEN_PRIVILEGES tp{};
+        TOKEN_PRIVILEGES prevTp{};
+        DWORD            prevTpLen = 0;
+
+        tp.PrivilegeCount           = 1;
+        tp.Privileges[0].Luid       = luid;
+        tp.Privileges[0].Attributes = SE_PRIVILEGE_ENABLED;
+
+        // Try to enable SeLockMemoryPrivilege. Note that even if AdjustTokenPrivileges()
+        // succeeds, we still need to query GetLastError() to ensure that the privileges
+        // were actually obtained.
+
+        if (AdjustTokenPrivileges_f(hProcessToken, FALSE, &tp, sizeof(TOKEN_PRIVILEGES), &prevTp,
+                                    &prevTpLen)
+            && GetLastError() == ERROR_SUCCESS)
+        {
+            // Round up size to full pages and allocate
+            allocSize = (allocSize + largePageSize - 1) & ~size_t(largePageSize - 1);
+            mem       = VirtualAlloc(nullptr, allocSize, MEM_RESERVE | MEM_COMMIT | MEM_LARGE_PAGES,
+                                     PAGE_READWRITE);
+
+            // Privilege no longer needed, restore previous state
+            AdjustTokenPrivileges_f(hProcessToken, FALSE, &prevTp, 0, nullptr, nullptr);
+        }
+    }
+
+    CloseHandle(hProcessToken);
+
+    return mem;
+
+    #endif
+}
+
+void* aligned_large_pages_alloc(size_t allocSize) {
+
+    // Try to allocate large pages
+    void* mem = aligned_large_pages_alloc_windows(allocSize);
+
+    // Fall back to regular, page-aligned, allocation if necessary
+    if (!mem)
+        mem = VirtualAlloc(nullptr, allocSize, MEM_RESERVE | MEM_COMMIT, PAGE_READWRITE);
+
+    return mem;
+}
+
+#else
+
+void* aligned_large_pages_alloc(size_t allocSize) {
+
+    #if defined(__linux__)
+    constexpr size_t alignment = 2 * 1024 * 1024;  // 2MB page size assumed
+    #else
+    constexpr size_t alignment = 4096;  // small page size assumed
+    #endif
+
+    // Round up to multiples of alignment
+    size_t size = ((allocSize + alignment - 1) / alignment) * alignment;
+    void*  mem  = std_aligned_alloc(alignment, size);
+    #if defined(MADV_HUGEPAGE)
+    madvise(mem, size, MADV_HUGEPAGE);
+    #endif
+    return mem;
+}
+
+#endif
+
+bool has_large_pages() {
+
+#if defined(_WIN32)
+
+    constexpr size_t page_size = 2 * 1024 * 1024;  // 2MB page size assumed
+    void*            mem       = aligned_large_pages_alloc_windows(page_size);
+    if (mem == nullptr)
+    {
+        return false;
+    }
+    else
+    {
+        aligned_large_pages_free(mem);
+        return true;
+    }
+
+#elif defined(__linux__)
+
+    #if defined(MADV_HUGEPAGE)
+    return true;
+    #else
+    return false;
+    #endif
+
+#else
+
+    return false;
+
+#endif
+}
+
+
+// aligned_large_pages_free() will free the previously memory allocated
+// by aligned_large_pages_alloc(). The effect is a nop if mem == nullptr.
+
+#if defined(_WIN32)
+
+void aligned_large_pages_free(void* mem) {
+
+    if (mem && !VirtualFree(mem, 0, MEM_RELEASE))
+    {
+        DWORD err = GetLastError();
+        std::cerr << "Failed to free large page memory. Error code: 0x" << std::hex << err
+                  << std::dec << std::endl;
+        exit(EXIT_FAILURE);
+    }
+}
+
+#else
+
+void aligned_large_pages_free(void* mem) { std_aligned_free(mem); }
+
+#endif
+}  // namespace Stockfish
@@ -0,0 +1,218 @@
+/*
+  Stockfish, a UCI chess playing engine derived from Glaurung 2.1
+  Copyright (C) 2004-2025 The Stockfish developers (see AUTHORS file)
+
+  Stockfish is free software: you can redistribute it and/or modify
+  it under the terms of the GNU General Public License as published by
+  the Free Software Foundation, either version 3 of the License, or
+  (at your option) any later version.
+
+  Stockfish is distributed in the hope that it will be useful,
+  but WITHOUT ANY WARRANTY; without even the implied warranty of
+  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+  GNU General Public License for more details.
+
+  You should have received a copy of the GNU General Public License
+  along with this program.  If not, see <http://www.gnu.org/licenses/>.
+*/
+
+#ifndef MEMORY_H_INCLUDED
+#define MEMORY_H_INCLUDED
+
+#include <algorithm>
+#include <cstddef>
+#include <cstdint>
+#include <memory>
+#include <new>
+#include <type_traits>
+#include <utility>
+
+#include "types.h"
+
+namespace Stockfish {
+
+void* std_aligned_alloc(size_t alignment, size_t size);
+void  std_aligned_free(void* ptr);
+
+// Memory aligned by page size, min alignment: 4096 bytes
+void* aligned_large_pages_alloc(size_t size);
+void  aligned_large_pages_free(void* mem);
+
+bool has_large_pages();
+
+// Frees memory which was placed there with placement new.
+// Works for both single objects and arrays of unknown bound.
+template<typename T, typename FREE_FUNC>
+void memory_deleter(T* ptr, FREE_FUNC free_func) {
+    if (!ptr)
+        return;
+
+    // Explicitly needed to call the destructor
+    if constexpr (!std::is_trivially_destructible_v<T>)
+        ptr->~T();
+
+    free_func(ptr);
+    return;
+}
+
+// Frees memory which was placed there with placement new.
+// Works for both single objects and arrays of unknown bound.
+template<typename T, typename FREE_FUNC>
+void memory_deleter_array(T* ptr, FREE_FUNC free_func) {
+    if (!ptr)
+        return;
+
+
+    // Move back on the pointer to where the size is allocated
+    const size_t array_offset = std::max(sizeof(size_t), alignof(T));
+    char*        raw_memory   = reinterpret_cast<char*>(ptr) - array_offset;
+
+    if constexpr (!std::is_trivially_destructible_v<T>)
+    {
+        const size_t size = *reinterpret_cast<size_t*>(raw_memory);
+
+        // Explicitly call the destructor for each element in reverse order
+        for (size_t i = size; i-- > 0;)
+            ptr[i].~T();
+    }
+
+    free_func(raw_memory);
+}
+
+// Allocates memory for a single object and places it there with placement new
+template<typename T, typename ALLOC_FUNC, typename... Args>
+inline std::enable_if_t<!std::is_array_v<T>, T*> memory_allocator(ALLOC_FUNC alloc_func,
+                                                                  Args&&... args) {
+    void* raw_memory = alloc_func(sizeof(T));
+    ASSERT_ALIGNED(raw_memory, alignof(T));
+    return new (raw_memory) T(std::forward<Args>(args)...);
+}
+
+// Allocates memory for an array of unknown bound and places it there with placement new
+template<typename T, typename ALLOC_FUNC>
+inline std::enable_if_t<std::is_array_v<T>, std::remove_extent_t<T>*>
+memory_allocator(ALLOC_FUNC alloc_func, size_t num) {
+    using ElementType = std::remove_extent_t<T>;
+
+    const size_t array_offset = std::max(sizeof(size_t), alignof(ElementType));
+
+    // Save the array size in the memory location
+    char* raw_memory =
+      reinterpret_cast<char*>(alloc_func(array_offset + num * sizeof(ElementType)));
+    ASSERT_ALIGNED(raw_memory, alignof(T));
+
+    new (raw_memory) size_t(num);
+
+    for (size_t i = 0; i < num; ++i)
+        new (raw_memory + array_offset + i * sizeof(ElementType)) ElementType();
+
+    // Need to return the pointer at the start of the array so that
+    // the indexing in unique_ptr<T[]> works.
+    return reinterpret_cast<ElementType*>(raw_memory + array_offset);
+}
+
+//
+//
+// aligned large page unique ptr
+//
+//
+
+template<typename T>
+struct LargePageDeleter {
+    void operator()(T* ptr) const { return memory_deleter<T>(ptr, aligned_large_pages_free); }
+};
+
+template<typename T>
+struct LargePageArrayDeleter {
+    void operator()(T* ptr) const { return memory_deleter_array<T>(ptr, aligned_large_pages_free); }
+};
+
+template<typename T>
+using LargePagePtr =
+  std::conditional_t<std::is_array_v<T>,
+                     std::unique_ptr<T, LargePageArrayDeleter<std::remove_extent_t<T>>>,
+                     std::unique_ptr<T, LargePageDeleter<T>>>;
+
+// make_unique_large_page for single objects
+template<typename T, typename... Args>
+std::enable_if_t<!std::is_array_v<T>, LargePagePtr<T>> make_unique_large_page(Args&&... args) {
+    static_assert(alignof(T) <= 4096,
+                  "aligned_large_pages_alloc() may fail for such a big alignment requirement of T");
+
+    T* obj = memory_allocator<T>(aligned_large_pages_alloc, std::forward<Args>(args)...);
+
+    return LargePagePtr<T>(obj);
+}
+
+// make_unique_large_page for arrays of unknown bound
+template<typename T>
+std::enable_if_t<std::is_array_v<T>, LargePagePtr<T>> make_unique_large_page(size_t num) {
+    using ElementType = std::remove_extent_t<T>;
+
+    static_assert(alignof(ElementType) <= 4096,
+                  "aligned_large_pages_alloc() may fail for such a big alignment requirement of T");
+
+    ElementType* memory = memory_allocator<T>(aligned_large_pages_alloc, num);
+
+    return LargePagePtr<T>(memory);
+}
+
+//
+//
+// aligned unique ptr
+//
+//
+
+template<typename T>
+struct AlignedDeleter {
+    void operator()(T* ptr) const { return memory_deleter<T>(ptr, std_aligned_free); }
+};
+
+template<typename T>
+struct AlignedArrayDeleter {
+    void operator()(T* ptr) const { return memory_deleter_array<T>(ptr, std_aligned_free); }
+};
+
+template<typename T>
+using AlignedPtr =
+  std::conditional_t<std::is_array_v<T>,
+                     std::unique_ptr<T, AlignedArrayDeleter<std::remove_extent_t<T>>>,
+                     std::unique_ptr<T, AlignedDeleter<T>>>;
+
+// make_unique_aligned for single objects
+template<typename T, typename... Args>
+std::enable_if_t<!std::is_array_v<T>, AlignedPtr<T>> make_unique_aligned(Args&&... args) {
+    const auto func = [](size_t size) { return std_aligned_alloc(alignof(T), size); };
+    T*         obj  = memory_allocator<T>(func, std::forward<Args>(args)...);
+
+    return AlignedPtr<T>(obj);
+}
+
+// make_unique_aligned for arrays of unknown bound
+template<typename T>
+std::enable_if_t<std::is_array_v<T>, AlignedPtr<T>> make_unique_aligned(size_t num) {
+    using ElementType = std::remove_extent_t<T>;
+
+    const auto   func   = [](size_t size) { return std_aligned_alloc(alignof(ElementType), size); };
+    ElementType* memory = memory_allocator<T>(func, num);
+
+    return AlignedPtr<T>(memory);
+}
+
+
+// Get the first aligned element of an array.
+// ptr must point to an array of size at least `sizeof(T) * N + alignment` bytes,
+// where N is the number of elements in the array.
+template<uintptr_t Alignment, typename T>
+T* align_ptr_up(T* ptr) {
+    static_assert(alignof(T) < Alignment);
+
+    const uintptr_t ptrint = reinterpret_cast<uintptr_t>(reinterpret_cast<char*>(ptr));
+    return reinterpret_cast<T*>(
+      reinterpret_cast<char*>((ptrint + (Alignment - 1)) / Alignment * Alignment));
+}
+
+
+}  // namespace Stockfish
+
+#endif  // #ifndef MEMORY_H_INCLUDED
@@ -1,6 +1,6 @@
 /*
  Stockfish, a UCI chess playing engine derived from Glaurung 2.1
-  Copyright (C) 2004-2021 The Stockfish developers (see AUTHORS file)
+  Copyright (C) 2004-2025 The Stockfish developers (see AUTHORS file)

  Stockfish is free software: you can redistribute it and/or modify
  it under the terms of the GNU General Public License as published by
@@ -16,613 +16,496 @@
  along with this program.  If not, see <http://www.gnu.org/licenses/>.
 */

-#ifdef _WIN32
-#if _WIN32_WINNT < 0x0601
-#undef  _WIN32_WINNT
-#define _WIN32_WINNT 0x0601 // Force to include needed API prototypes
-#endif
-
-#ifndef NOMINMAX
-#define NOMINMAX
-#endif
-
-#include <windows.h>
-// The needed Windows API for processor groups could be missed from old Windows
-// versions, so instead of calling them directly (forcing the linker to resolve
-// the calls at compile time), try to load them at runtime. To do this we need
-// first to define the corresponding function pointers.
-extern "C" {
-typedef bool(*fun1_t)(LOGICAL_PROCESSOR_RELATIONSHIP,
-                      PSYSTEM_LOGICAL_PROCESSOR_INFORMATION_EX, PDWORD);
-typedef bool(*fun2_t)(USHORT, PGROUP_AFFINITY);
-typedef bool(*fun3_t)(HANDLE, CONST GROUP_AFFINITY*, PGROUP_AFFINITY);
-}
-#endif
+#include "misc.h"

+#include <array>
+#include <atomic>
+#include <cassert>
+#include <cctype>
+#include <cmath>
+#include <cstdlib>
 #include <fstream>
 #include <iomanip>
 #include <iostream>
+#include <iterator>
+#include <limits>
+#include <mutex>
 #include <sstream>
-#include <vector>
-#include <cstdlib>
+#include <string_view>

-#if defined(__linux__) && !defined(__ANDROID__)
-#include <stdlib.h>
-#include <sys/mman.h>
-#endif
+#include "types.h"

-#if defined(__APPLE__) || defined(__ANDROID__) || defined(__OpenBSD__) || (defined(__GLIBCXX__) && !defined(_GLIBCXX_HAVE_ALIGNED_ALLOC) && !defined(_WIN32))
-#define POSIXALIGNEDALLOC
-#include <stdlib.h>
-#endif
-
-#include "misc.h"
-#include "thread.h"
-
-using namespace std;
+namespace Stockfish {

 namespace {

-/// Version number. If Version is left empty, then compile date in the format
-/// DD-MM-YY and show in engine_info.
-const string Version = "13";
+// Version number or dev.
+constexpr std::string_view version = "dev";

-/// Our fancy logging facility. The trick here is to replace cin.rdbuf() and
-/// cout.rdbuf() with two Tie objects that tie cin and cout to a file stream. We
-/// can toggle the logging of std::cout and std:cin at runtime whilst preserving
-/// usual I/O functionality, all without changing a single line of code!
-/// Idea from http://groups.google.com/group/comp.lang.c++/msg/1d941c0f26ea0d81
+// Our fancy logging facility. The trick here is to replace cin.rdbuf() and
+// cout.rdbuf() with two Tie objects that tie cin and cout to a file stream. We
+// can toggle the logging of std::cout and std:cin at runtime whilst preserving
+// usual I/O functionality, all without changing a single line of code!
+// Idea from http://groups.google.com/group/comp.lang.c++/msg/1d941c0f26ea0d81

-struct Tie: public streambuf { // MSVC requires split streambuf for cin and cout
+struct Tie: public std::streambuf {  // MSVC requires split streambuf for cin and cout

-  Tie(streambuf* b, streambuf* l) : buf(b), logBuf(l) {}
+    Tie(std::streambuf* b, std::streambuf* l) :
+        buf(b),
+        logBuf(l) {}

-  int sync() override { return logBuf->pubsync(), buf->pubsync(); }
-  int overflow(int c) override { return log(buf->sputc((char)c), "<< "); }
-  int underflow() override { return buf->sgetc(); }
-  int uflow() override { return log(buf->sbumpc(), ">> "); }
+    int sync() override { return logBuf->pubsync(), buf->pubsync(); }
+    int overflow(int c) override { return log(buf->sputc(char(c)), "<< "); }
+    int underflow() override { return buf->sgetc(); }
+    int uflow() override { return log(buf->sbumpc(), ">> "); }

-  streambuf *buf, *logBuf;
+    std::streambuf *buf, *logBuf;

-  int log(int c, const char* prefix) {
+    int log(int c, const char* prefix) {

-    static int last = '\n'; // Single log file
+        static int last = '\n';  // Single log file

-    if (last == '\n')
-        logBuf->sputn(prefix, 3);
+        if (last == '\n')
+            logBuf->sputn(prefix, 3);

-    return last = logBuf->sputc((char)c);
-  }
+        return last = logBuf->sputc(char(c));
+    }
 };

 class Logger {

-  Logger() : in(cin.rdbuf(), file.rdbuf()), out(cout.rdbuf(), file.rdbuf()) {}
- ~Logger() { start(""); }
+    Logger() :
+        in(std::cin.rdbuf(), file.rdbuf()),
+        out(std::cout.rdbuf(), file.rdbuf()) {}
+    ~Logger() { start(""); }

-  ofstream file;
-  Tie in, out;
+    std::ofstream file;
+    Tie           in, out;

-public:
-  static void start(const std::string& fname) {
+   public:
+    static void start(const std::string& fname) {

-    static Logger l;
+        static Logger l;

-    if (!fname.empty() && !l.file.is_open())
-    {
-        l.file.open(fname, ifstream::out);
-
-        if (!l.file.is_open())
+        if (l.file.is_open())
        {
-            cerr << "Unable to open debug log file " << fname << endl;
-            exit(EXIT_FAILURE);
+            std::cout.rdbuf(l.out.buf);
+            std::cin.rdbuf(l.in.buf);
+            l.file.close();
        }

-        cin.rdbuf(&l.in);
-        cout.rdbuf(&l.out);
+        if (!fname.empty())
+        {
+            l.file.open(fname, std::ifstream::out);
+
+            if (!l.file.is_open())
+            {
+                std::cerr << "Unable to open debug log file " << fname << std::endl;
+                exit(EXIT_FAILURE);
+            }
+
+            std::cin.rdbuf(&l.in);
+            std::cout.rdbuf(&l.out);
+        }
    }
-    else if (fname.empty() && l.file.is_open())
-    {
-        cout.rdbuf(l.out.buf);
-        cin.rdbuf(l.in.buf);
-        l.file.close();
-    }
-  }
 };

-} // namespace
+}  // namespace


-/// engine_info() returns the full name of the current Stockfish version. This
-/// will be either "Stockfish <Tag> DD-MM-YY" (where DD-MM-YY is the date when
-/// the program was compiled) or "Stockfish <Version>", depending on whether
-/// Version is empty.
+// Returns the full name of the current Stockfish version.
+//
+// For local dev compiles we try to append the commit SHA and
+// commit date from git. If that fails only the local compilation
+// date is set and "nogit" is specified:
+//      Stockfish dev-YYYYMMDD-SHA
+//      or
+//      Stockfish dev-YYYYMMDD-nogit
+//
+// For releases (non-dev builds) we only include the version number:
+//      Stockfish version
+std::string engine_version_info() {
+    std::stringstream ss;
+    ss << "Stockfish " << version << std::setfill('0');

-const string engine_info(bool to_uci) {
+    if constexpr (version == "dev")
+    {
+        ss << "-";
+#ifdef GIT_DATE
+        ss << stringify(GIT_DATE);
+#else
+        constexpr std::string_view months("Jan Feb Mar Apr May Jun Jul Aug Sep Oct Nov Dec");

-  const string months("Jan Feb Mar Apr May Jun Jul Aug Sep Oct Nov Dec");
-  string month, day, year;
-  stringstream ss, date(__DATE__); // From compiler, format is "Sep 21 2008"
+        std::string       month, day, year;
+        std::stringstream date(__DATE__);  // From compiler, format is "Sep 21 2008"

-  ss << "Stockfish " << Version << setfill('0');
+        date >> month >> day >> year;
+        ss << year << std::setw(2) << std::setfill('0') << (1 + months.find(month) / 4)
+           << std::setw(2) << std::setfill('0') << day;
+#endif

-  if (Version.empty())
-  {
-      date >> month >> day >> year;
-      ss << setw(2) << day << setw(2) << (1 + months.find(month) / 4) << year.substr(2);
-  }
+        ss << "-";

-  ss << (to_uci  ? "\nid author ": " by ")
-     << "the Stockfish developers (see AUTHORS file)";
+#ifdef GIT_SHA
+        ss << stringify(GIT_SHA);
+#else
+        ss << "nogit";
+#endif
+    }

-  return ss.str();
+    return ss.str();
+}
+
+std::string engine_info(bool to_uci) {
+    return engine_version_info() + (to_uci ? "\nid author " : " by ")
+         + "the Stockfish developers (see AUTHORS file)";
 }


-/// compiler_info() returns a string trying to describe the compiler we use
+// Returns a string trying to describe the compiler we use
+std::string compiler_info() {

-const std::string compiler_info() {
+#define make_version_string(major, minor, patch) \
+    stringify(major) "." stringify(minor) "." stringify(patch)

-  #define stringify2(x) #x
-  #define stringify(x) stringify2(x)
-  #define make_version_string(major, minor, patch) stringify(major) "." stringify(minor) "." stringify(patch)
+    // Predefined macros hell:
+    //
+    // __GNUC__                Compiler is GCC, Clang or ICX
+    // __clang__               Compiler is Clang or ICX
+    // __INTEL_LLVM_COMPILER   Compiler is ICX
+    // _MSC_VER                Compiler is MSVC
+    // _WIN32                  Building on Windows (any)
+    // _WIN64                  Building on Windows 64 bit

-/// Predefined macros hell:
-///
-/// __GNUC__           Compiler is gcc, Clang or Intel on Linux
-/// __INTEL_COMPILER   Compiler is Intel
-/// _MSC_VER           Compiler is MSVC or Intel on Windows
-/// _WIN32             Building on Windows (any)
-/// _WIN64             Building on Windows 64 bit
+    std::string compiler = "\nCompiled by                : ";

-  std::string compiler = "\nCompiled by ";
+#if defined(__INTEL_LLVM_COMPILER)
+    compiler += "ICX ";
+    compiler += stringify(__INTEL_LLVM_COMPILER);
+#elif defined(__clang__)
+    compiler += "clang++ ";
+    compiler += make_version_string(__clang_major__, __clang_minor__, __clang_patchlevel__);
+#elif _MSC_VER
+    compiler += "MSVC ";
+    compiler += "(version ";
+    compiler += stringify(_MSC_FULL_VER) "." stringify(_MSC_BUILD);
+    compiler += ")";
+#elif defined(__e2k__) && defined(__LCC__)
+    #define dot_ver2(n) \
+        compiler += char('.'); \
+        compiler += char('0' + (n) / 10); \
+        compiler += char('0' + (n) % 10);

-  #ifdef __clang__
-     compiler += "clang++ ";
-     compiler += make_version_string(__clang_major__, __clang_minor__, __clang_patchlevel__);
-  #elif __INTEL_COMPILER
-     compiler += "Intel compiler ";
-     compiler += "(version ";
-     compiler += stringify(__INTEL_COMPILER) " update " stringify(__INTEL_COMPILER_UPDATE);
-     compiler += ")";
-  #elif _MSC_VER
-     compiler += "MSVC ";
-     compiler += "(version ";
-     compiler += stringify(_MSC_FULL_VER) "." stringify(_MSC_BUILD);
-     compiler += ")";
-  #elif __GNUC__
-     compiler += "g++ (GNUC) ";
-     compiler += make_version_string(__GNUC__, __GNUC_MINOR__, __GNUC_PATCHLEVEL__);
-  #else
-     compiler += "Unknown compiler ";
-     compiler += "(unknown version)";
-  #endif
+    compiler += "MCST LCC ";
+    compiler += "(version ";
+    compiler += std::to_string(__LCC__ / 100);
+    dot_ver2(__LCC__ % 100) dot_ver2(__LCC_MINOR__) compiler += ")";
+#elif __GNUC__
+    compiler += "g++ (GNUC) ";
+    compiler += make_version_string(__GNUC__, __GNUC_MINOR__, __GNUC_PATCHLEVEL__);
+#else
+    compiler += "Unknown compiler ";
+    compiler += "(unknown version)";
+#endif

-  #if defined(__APPLE__)
-     compiler += " on Apple";
-  #elif defined(__CYGWIN__)
-     compiler += " on Cygwin";
-  #elif defined(__MINGW64__)
-     compiler += " on MinGW64";
-  #elif defined(__MINGW32__)
-     compiler += " on MinGW32";
-  #elif defined(__ANDROID__)
-     compiler += " on Android";
-  #elif defined(__linux__)
-     compiler += " on Linux";
-  #elif defined(_WIN64)
-     compiler += " on Microsoft Windows 64-bit";
-  #elif defined(_WIN32)
-     compiler += " on Microsoft Windows 32-bit";
-  #else
-     compiler += " on unknown system";
-  #endif
+#if defined(__APPLE__)
+    compiler += " on Apple";
+#elif defined(__CYGWIN__)
+    compiler += " on Cygwin";
+#elif defined(__MINGW64__)
+    compiler += " on MinGW64";
+#elif defined(__MINGW32__)
+    compiler += " on MinGW32";
+#elif defined(__ANDROID__)
+    compiler += " on Android";
+#elif defined(__linux__)
+    compiler += " on Linux";
+#elif defined(_WIN64)
+    compiler += " on Microsoft Windows 64-bit";
+#elif defined(_WIN32)
+    compiler += " on Microsoft Windows 32-bit";
+#else
+    compiler += " on unknown system";
+#endif

-  compiler += "\nCompilation settings include: ";
-  compiler += (Is64Bit ? " 64bit" : " 32bit");
-  #if defined(USE_VNNI)
+    compiler += "\nCompilation architecture   : ";
+#if defined(ARCH)
+    compiler += stringify(ARCH);
+#else
+    compiler += "(undefined architecture)";
+#endif
+
+    compiler += "\nCompilation settings       : ";
+    compiler += (Is64Bit ? "64bit" : "32bit");
+#if defined(USE_VNNI)
    compiler += " VNNI";
-  #endif
-  #if defined(USE_AVX512)
+#endif
+#if defined(USE_AVX512)
    compiler += " AVX512";
-  #endif
-  compiler += (HasPext ? " BMI2" : "");
-  #if defined(USE_AVX2)
+#endif
+    compiler += (HasPext ? " BMI2" : "");
+#if defined(USE_AVX2)
    compiler += " AVX2";
-  #endif
-  #if defined(USE_SSE41)
+#endif
+#if defined(USE_SSE41)
    compiler += " SSE41";
-  #endif
-  #if defined(USE_SSSE3)
+#endif
+#if defined(USE_SSSE3)
    compiler += " SSSE3";
-  #endif
-  #if defined(USE_SSE2)
+#endif
+#if defined(USE_SSE2)
    compiler += " SSE2";
-  #endif
-  compiler += (HasPopCnt ? " POPCNT" : "");
-  #if defined(USE_MMX)
-    compiler += " MMX";
-  #endif
-  #if defined(USE_NEON)
+#endif
+    compiler += (HasPopCnt ? " POPCNT" : "");
+#if defined(USE_NEON_DOTPROD)
+    compiler += " NEON_DOTPROD";
+#elif defined(USE_NEON)
    compiler += " NEON";
-  #endif
+#endif

-  #if !defined(NDEBUG)
+#if !defined(NDEBUG)
    compiler += " DEBUG";
-  #endif
+#endif

-  compiler += "\n__VERSION__ macro expands to: ";
-  #ifdef __VERSION__
-     compiler += __VERSION__;
-  #else
-     compiler += "(undefined macro)";
-  #endif
-  compiler += "\n";
+    compiler += "\nCompiler __VERSION__ macro : ";
+#ifdef __VERSION__
+    compiler += __VERSION__;
+#else
+    compiler += "(undefined macro)";
+#endif

-  return compiler;
+    compiler += "\n";
+
+    return compiler;
 }


-/// Debug functions used mainly to collect run-time statistics
-static std::atomic<int64_t> hits[2], means[2];
+// Debug functions used mainly to collect run-time statistics
+constexpr int MaxDebugSlots = 32;

-void dbg_hit_on(bool b) { ++hits[0]; if (b) ++hits[1]; }
-void dbg_hit_on(bool c, bool b) { if (c) dbg_hit_on(b); }
-void dbg_mean_of(int v) { ++means[0]; means[1] += v; }
+namespace {
+
+template<size_t N>
+struct DebugInfo {
+    std::atomic<int64_t> data[N] = {0};
+
+    [[nodiscard]] constexpr std::atomic<int64_t>& operator[](size_t index) {
+        assert(index < N);
+        return data[index];
+    }
+};
+
+struct DebugExtremes: public DebugInfo<3> {
+    DebugExtremes() {
+        data[1] = std::numeric_limits<int64_t>::min();
+        data[2] = std::numeric_limits<int64_t>::max();
+    }
+};
+
+std::array<DebugInfo<2>, MaxDebugSlots>  hit;
+std::array<DebugInfo<2>, MaxDebugSlots>  mean;
+std::array<DebugInfo<3>, MaxDebugSlots>  stdev;
+std::array<DebugInfo<6>, MaxDebugSlots>  correl;
+std::array<DebugExtremes, MaxDebugSlots> extremes;
+
+}  // namespace
+
+void dbg_hit_on(bool cond, int slot) {
+
+    ++hit.at(slot)[0];
+    if (cond)
+        ++hit.at(slot)[1];
+}
+
+void dbg_mean_of(int64_t value, int slot) {
+
+    ++mean.at(slot)[0];
+    mean.at(slot)[1] += value;
+}
+
+void dbg_stdev_of(int64_t value, int slot) {
+
+    ++stdev.at(slot)[0];
+    stdev.at(slot)[1] += value;
+    stdev.at(slot)[2] += value * value;
+}
+
+void dbg_extremes_of(int64_t value, int slot) {
+    ++extremes.at(slot)[0];
+
+    int64_t current_max = extremes.at(slot)[1].load();
+    while (current_max < value && !extremes.at(slot)[1].compare_exchange_weak(current_max, value))
+    {}
+
+    int64_t current_min = extremes.at(slot)[2].load();
+    while (current_min > value && !extremes.at(slot)[2].compare_exchange_weak(current_min, value))
+    {}
+}
+
+void dbg_correl_of(int64_t value1, int64_t value2, int slot) {
+
+    ++correl.at(slot)[0];
+    correl.at(slot)[1] += value1;
+    correl.at(slot)[2] += value1 * value1;
+    correl.at(slot)[3] += value2;
+    correl.at(slot)[4] += value2 * value2;
+    correl.at(slot)[5] += value1 * value2;
+}

 void dbg_print() {

-  if (hits[0])
-      cerr << "Total " << hits[0] << " Hits " << hits[1]
-           << " hit rate (%) " << 100 * hits[1] / hits[0] << endl;
+    int64_t n;
+    auto    E   = [&n](int64_t x) { return double(x) / n; };
+    auto    sqr = [](double x) { return x * x; };

-  if (means[0])
-      cerr << "Total " << means[0] << " Mean "
-           << (double)means[1] / means[0] << endl;
+    for (int i = 0; i < MaxDebugSlots; ++i)
+        if ((n = hit[i][0]))
+            std::cerr << "Hit #" << i << ": Total " << n << " Hits " << hit[i][1]
+                      << " Hit Rate (%) " << 100.0 * E(hit[i][1]) << std::endl;
+
+    for (int i = 0; i < MaxDebugSlots; ++i)
+        if ((n = mean[i][0]))
+        {
+            std::cerr << "Mean #" << i << ": Total " << n << " Mean " << E(mean[i][1]) << std::endl;
+        }
+
+    for (int i = 0; i < MaxDebugSlots; ++i)
+        if ((n = stdev[i][0]))
+        {
+            double r = sqrt(E(stdev[i][2]) - sqr(E(stdev[i][1])));
+            std::cerr << "Stdev #" << i << ": Total " << n << " Stdev " << r << std::endl;
+        }
+
+    for (int i = 0; i < MaxDebugSlots; ++i)
+        if ((n = extremes[i][0]))
+        {
+            std::cerr << "Extremity #" << i << ": Total " << n << " Min " << extremes[i][2]
+                      << " Max " << extremes[i][1] << std::endl;
+        }
+
+    for (int i = 0; i < MaxDebugSlots; ++i)
+        if ((n = correl[i][0]))
+        {
+            double r = (E(correl[i][5]) - E(correl[i][1]) * E(correl[i][3]))
+                     / (sqrt(E(correl[i][2]) - sqr(E(correl[i][1])))
+                        * sqrt(E(correl[i][4]) - sqr(E(correl[i][3]))));
+            std::cerr << "Correl. #" << i << ": Total " << n << " Coefficient " << r << std::endl;
+        }
 }


-/// Used to serialize access to std::cout to avoid multiple threads writing at
-/// the same time.
-
+// Used to serialize access to std::cout
+// to avoid multiple threads writing at the same time.
 std::ostream& operator<<(std::ostream& os, SyncCout sc) {

-  static std::mutex m;
+    static std::mutex m;

-  if (sc == IO_LOCK)
-      m.lock();
+    if (sc == IO_LOCK)
+        m.lock();

-  if (sc == IO_UNLOCK)
-      m.unlock();
+    if (sc == IO_UNLOCK)
+        m.unlock();

-  return os;
+    return os;
 }

+void sync_cout_start() { std::cout << IO_LOCK; }
+void sync_cout_end() { std::cout << IO_UNLOCK; }

-/// Trampoline helper to avoid moving Logger to misc.h
+// Trampoline helper to avoid moving Logger to misc.h
 void start_logger(const std::string& fname) { Logger::start(fname); }


-/// prefetch() preloads the given address in L1/L2 cache. This is a non-blocking
-/// function that doesn't stall the CPU waiting for data to be loaded from memory,
-/// which can be quite slow.
 #ifdef NO_PREFETCH

-void prefetch(void*) {}
+void prefetch(const void*) {}

 #else

-void prefetch(void* addr) {
+void prefetch(const void* addr) {

-#  if defined(__INTEL_COMPILER)
-   // This hack prevents prefetches from being optimized away by
-   // Intel compiler. Both MSVC and gcc seem not be affected by this.
-   __asm__ ("");
-#  endif
-
-#  if defined(__INTEL_COMPILER) || defined(_MSC_VER)
-  _mm_prefetch((char*)addr, _MM_HINT_T0);
-#  else
-  __builtin_prefetch(addr);
-#  endif
+    #if defined(_MSC_VER)
+    _mm_prefetch((char const*) addr, _MM_HINT_T0);
+    #else
+    __builtin_prefetch(addr);
+    #endif
 }

 #endif

-
-/// std_aligned_alloc() is our wrapper for systems where the c++17 implementation
-/// does not guarantee the availability of aligned_alloc(). Memory allocated with
-/// std_aligned_alloc() must be freed with std_aligned_free().
-
-void* std_aligned_alloc(size_t alignment, size_t size) {
-
-#if defined(POSIXALIGNEDALLOC)
-  void *mem;
-  return posix_memalign(&mem, alignment, size) ? nullptr : mem;
-#elif defined(_WIN32)
-  return _mm_malloc(size, alignment);
-#else
-  return std::aligned_alloc(alignment, size);
-#endif
-}
-
-void std_aligned_free(void* ptr) {
-
-#if defined(POSIXALIGNEDALLOC)
-  free(ptr);
-#elif defined(_WIN32)
-  _mm_free(ptr);
-#else
-  free(ptr);
-#endif
-}
-
-/// aligned_large_pages_alloc() will return suitably aligned memory, if possible using large pages.
-
-#if defined(_WIN32)
-
-static void* aligned_large_pages_alloc_win(size_t allocSize) {
-
-  HANDLE hProcessToken { };
-  LUID luid { };
-  void* mem = nullptr;
-
-  const size_t largePageSize = GetLargePageMinimum();
-  if (!largePageSize)
-      return nullptr;
-
-  // We need SeLockMemoryPrivilege, so try to enable it for the process
-  if (!OpenProcessToken(GetCurrentProcess(), TOKEN_ADJUST_PRIVILEGES | TOKEN_QUERY, &hProcessToken))
-      return nullptr;
-
-  if (LookupPrivilegeValue(NULL, SE_LOCK_MEMORY_NAME, &luid))
-  {
-      TOKEN_PRIVILEGES tp { };
-      TOKEN_PRIVILEGES prevTp { };
-      DWORD prevTpLen = 0;
-
-      tp.PrivilegeCount = 1;
-      tp.Privileges[0].Luid = luid;
-      tp.Privileges[0].Attributes = SE_PRIVILEGE_ENABLED;
-
-      // Try to enable SeLockMemoryPrivilege. Note that even if AdjustTokenPrivileges() succeeds,
-      // we still need to query GetLastError() to ensure that the privileges were actually obtained.
-      if (AdjustTokenPrivileges(
-              hProcessToken, FALSE, &tp, sizeof(TOKEN_PRIVILEGES), &prevTp, &prevTpLen) &&
-          GetLastError() == ERROR_SUCCESS)
-      {
-          // Round up size to full pages and allocate
-          allocSize = (allocSize + largePageSize - 1) & ~size_t(largePageSize - 1);
-          mem = VirtualAlloc(
-              NULL, allocSize, MEM_RESERVE | MEM_COMMIT | MEM_LARGE_PAGES, PAGE_READWRITE);
-
-          // Privilege no longer needed, restore previous state
-          AdjustTokenPrivileges(hProcessToken, FALSE, &prevTp, 0, NULL, NULL);
-      }
-  }
-
-  CloseHandle(hProcessToken);
-
-  return mem;
-}
-
-void* aligned_large_pages_alloc(size_t allocSize) {
-
-  // Try to allocate large pages
-  void* mem = aligned_large_pages_alloc_win(allocSize);
-
-  // Fall back to regular, page aligned, allocation if necessary
-  if (!mem)
-      mem = VirtualAlloc(NULL, allocSize, MEM_RESERVE | MEM_COMMIT, PAGE_READWRITE);
-
-  return mem;
-}
-
-#else
-
-void* aligned_large_pages_alloc(size_t allocSize) {
-
-#if defined(__linux__)
-  constexpr size_t alignment = 2 * 1024 * 1024; // assumed 2MB page size
-#else
-  constexpr size_t alignment = 4096; // assumed small page size
-#endif
-
-  // round up to multiples of alignment
-  size_t size = ((allocSize + alignment - 1) / alignment) * alignment;
-  void *mem = std_aligned_alloc(alignment, size);
-#if defined(MADV_HUGEPAGE)
-  madvise(mem, size, MADV_HUGEPAGE);
-#endif
-  return mem;
-}
-
-#endif
-
-
-/// aligned_large_pages_free() will free the previously allocated ttmem
-
-#if defined(_WIN32)
-
-void aligned_large_pages_free(void* mem) {
-
-  if (mem && !VirtualFree(mem, 0, MEM_RELEASE))
-  {
-      DWORD err = GetLastError();
-      std::cerr << "Failed to free transposition table. Error code: 0x" <<
-          std::hex << err << std::dec << std::endl;
-      exit(EXIT_FAILURE);
-  }
-}
-
-#else
-
-void aligned_large_pages_free(void *mem) {
-  std_aligned_free(mem);
-}
-
-#endif
-
-
-namespace WinProcGroup {
-
-#ifndef _WIN32
-
-void bindThisThread(size_t) {}
-
-#else
-
-/// best_group() retrieves logical processor information using Windows specific
-/// API and returns the best group id for the thread with index idx. Original
-/// code from Texel by Peter Österlund.
-
-int best_group(size_t idx) {
-
-  int threads = 0;
-  int nodes = 0;
-  int cores = 0;
-  DWORD returnLength = 0;
-  DWORD byteOffset = 0;
-
-  // Early exit if the needed API is not available at runtime
-  HMODULE k32 = GetModuleHandle("Kernel32.dll");
-  auto fun1 = (fun1_t)(void(*)())GetProcAddress(k32, "GetLogicalProcessorInformationEx");
-  if (!fun1)
-      return -1;
-
-  // First call to get returnLength. We expect it to fail due to null buffer
-  if (fun1(RelationAll, nullptr, &returnLength))
-      return -1;
-
-  // Once we know returnLength, allocate the buffer
-  SYSTEM_LOGICAL_PROCESSOR_INFORMATION_EX *buffer, *ptr;
-  ptr = buffer = (SYSTEM_LOGICAL_PROCESSOR_INFORMATION_EX*)malloc(returnLength);
-
-  // Second call, now we expect to succeed
-  if (!fun1(RelationAll, buffer, &returnLength))
-  {
-      free(buffer);
-      return -1;
-  }
-
-  while (byteOffset < returnLength)
-  {
-      if (ptr->Relationship == RelationNumaNode)
-          nodes++;
-
-      else if (ptr->Relationship == RelationProcessorCore)
-      {
-          cores++;
-          threads += (ptr->Processor.Flags == LTP_PC_SMT) ? 2 : 1;
-      }
-
-      assert(ptr->Size);
-      byteOffset += ptr->Size;
-      ptr = (SYSTEM_LOGICAL_PROCESSOR_INFORMATION_EX*)(((char*)ptr) + ptr->Size);
-  }
-
-  free(buffer);
-
-  std::vector<int> groups;
-
-  // Run as many threads as possible on the same node until core limit is
-  // reached, then move on filling the next node.
-  for (int n = 0; n < nodes; n++)
-      for (int i = 0; i < cores / nodes; i++)
-          groups.push_back(n);
-
-  // In case a core has more than one logical processor (we assume 2) and we
-  // have still threads to allocate, then spread them evenly across available
-  // nodes.
-  for (int t = 0; t < threads - cores; t++)
-      groups.push_back(t % nodes);
-
-  // If we still have more threads than the total number of logical processors
-  // then return -1 and let the OS to decide what to do.
-  return idx < groups.size() ? groups[idx] : -1;
-}
-
-
-/// bindThisThread() set the group affinity of the current thread
-
-void bindThisThread(size_t idx) {
-
-  // Use only local variables to be thread-safe
-  int group = best_group(idx);
-
-  if (group == -1)
-      return;
-
-  // Early exit if the needed API are not available at runtime
-  HMODULE k32 = GetModuleHandle("Kernel32.dll");
-  auto fun2 = (fun2_t)(void(*)())GetProcAddress(k32, "GetNumaNodeProcessorMaskEx");
-  auto fun3 = (fun3_t)(void(*)())GetProcAddress(k32, "SetThreadGroupAffinity");
-
-  if (!fun2 || !fun3)
-      return;
-
-  GROUP_AFFINITY affinity;
-  if (fun2(group, &affinity))
-      fun3(GetCurrentThread(), &affinity, nullptr);
-}
-
-#endif
-
-} // namespace WinProcGroup
-
 #ifdef _WIN32
-#include <direct.h>
-#define GETCWD _getcwd
+    #include <direct.h>
+    #define GETCWD _getcwd
 #else
-#include <unistd.h>
-#define GETCWD getcwd
+    #include <unistd.h>
+    #define GETCWD getcwd
 #endif

-namespace CommandLine {
+size_t str_to_size_t(const std::string& s) {
+    unsigned long long value = std::stoull(s);
+    if (value > std::numeric_limits<size_t>::max())
+        std::exit(EXIT_FAILURE);
+    return static_cast<size_t>(value);
+}

-string argv0;            // path+name of the executable binary, as given by argv[0]
-string binaryDirectory;  // path of the executable directory
-string workingDirectory; // path of the working directory
+std::optional<std::string> read_file_to_string(const std::string& path) {
+    std::ifstream f(path, std::ios_base::binary);
+    if (!f)
+        return std::nullopt;
+    return std::string(std::istreambuf_iterator<char>(f), std::istreambuf_iterator<char>());
+}

-void init(int argc, char* argv[]) {
-    (void)argc;
-    string pathSeparator;
+void remove_whitespace(std::string& s) {
+    s.erase(std::remove_if(s.begin(), s.end(), [](char c) { return std::isspace(c); }), s.end());
+}

-    // extract the path+name of the executable binary
-    argv0 = argv[0];
+bool is_whitespace(std::string_view s) {
+    return std::all_of(s.begin(), s.end(), [](char c) { return std::isspace(c); });
+}
+
+std::string CommandLine::get_binary_directory(std::string argv0) {
+    std::string pathSeparator;

 #ifdef _WIN32
    pathSeparator = "\\";
-  #ifdef _MSC_VER
+    #ifdef _MSC_VER
    // Under windows argv[0] may not have the extension. Also _get_pgmptr() had
-    // issues in some windows 10 versions, so check returned values carefully.
+    // issues in some Windows 10 versions, so check returned values carefully.
    char* pgmptr = nullptr;
    if (!_get_pgmptr(&pgmptr) && pgmptr != nullptr && *pgmptr)
        argv0 = pgmptr;
-  #endif
+    #endif
 #else
    pathSeparator = "/";
 #endif

-    // extract the working directory
-    workingDirectory = "";
-    char buff[40000];
-    char* cwd = GETCWD(buff, 40000);
-    if (cwd)
-        workingDirectory = cwd;
+    // Extract the working directory
+    auto workingDirectory = CommandLine::get_working_directory();

-    // extract the binary directory path from argv0
-    binaryDirectory = argv0;
-    size_t pos = binaryDirectory.find_last_of("\\/");
+    // Extract the binary directory path from argv0
+    auto   binaryDirectory = argv0;
+    size_t pos             = binaryDirectory.find_last_of("\\/");
    if (pos == std::string::npos)
        binaryDirectory = "." + pathSeparator;
    else
        binaryDirectory.resize(pos + 1);

-    // pattern replacement: "./" at the start of path is replaced by the working directory
+    // Pattern replacement: "./" at the start of path is replaced by the working directory
    if (binaryDirectory.find("." + pathSeparator) == 0)
        binaryDirectory.replace(0, 1, workingDirectory);
+
+    return binaryDirectory;
+}
+
+std::string CommandLine::get_working_directory() {
+    std::string workingDirectory = "";
+    char        buff[40000];
+    char*       cwd = GETCWD(buff, 40000);
+    if (cwd)
+        workingDirectory = cwd;
+
+    return workingDirectory;
 }


-} // namespace CommandLine
+}  // namespace Stockfish
@@ -1,6 +1,6 @@
 /*
  Stockfish, a UCI chess playing engine derived from Glaurung 2.1
-  Copyright (C) 2004-2021 The Stockfish developers (see AUTHORS file)
+  Copyright (C) 2004-2025 The Stockfish developers (see AUTHORS file)

  Stockfish is free software: you can redistribute it and/or modify
  it under the terms of the GNU General Public License as published by
@@ -19,128 +19,298 @@
 #ifndef MISC_H_INCLUDED
 #define MISC_H_INCLUDED

+#include <algorithm>
+#include <array>
 #include <cassert>
 #include <chrono>
-#include <ostream>
-#include <string>
-#include <vector>
+#include <cstddef>
 #include <cstdint>
+#include <cstdio>
+#include <iosfwd>
+#include <optional>
+#include <string>
+#include <string_view>
+#include <vector>

-#include "types.h"
+#define stringify2(x) #x
+#define stringify(x) stringify2(x)
+
+namespace Stockfish {
+
+std::string engine_version_info();
+std::string engine_info(bool to_uci = false);
+std::string compiler_info();
+
+// Preloads the given address in L1/L2 cache. This is a non-blocking
+// function that doesn't stall the CPU waiting for data to be loaded from memory,
+// which can be quite slow.
+void prefetch(const void* addr);

-const std::string engine_info(bool to_uci = false);
-const std::string compiler_info();
-void prefetch(void* addr);
 void start_logger(const std::string& fname);
-void* std_aligned_alloc(size_t alignment, size_t size);
-void std_aligned_free(void* ptr);
-void* aligned_large_pages_alloc(size_t size); // memory aligned by page size, min alignment: 4096 bytes
-void aligned_large_pages_free(void* mem); // nop if mem == nullptr

-void dbg_hit_on(bool b);
-void dbg_hit_on(bool c, bool b);
-void dbg_mean_of(int v);
-void dbg_print();
+size_t str_to_size_t(const std::string& s);

-typedef std::chrono::milliseconds::rep TimePoint; // A value in milliseconds
-static_assert(sizeof(TimePoint) == sizeof(int64_t), "TimePoint should be 64 bits");
-inline TimePoint now() {
-  return std::chrono::duration_cast<std::chrono::milliseconds>
-        (std::chrono::steady_clock::now().time_since_epoch()).count();
-}
+#if defined(__linux__)

-template<class Entry, int Size>
-struct HashTable {
-  Entry* operator[](Key key) { return &table[(uint32_t)key & (Size - 1)]; }
-
-private:
-  std::vector<Entry> table = std::vector<Entry>(Size); // Allocate on the heap
+struct PipeDeleter {
+    void operator()(FILE* file) const {
+        if (file != nullptr)
+        {
+            pclose(file);
+        }
+    }
 };

+#endif

-enum SyncCout { IO_LOCK, IO_UNLOCK };
+// Reads the file as bytes.
+// Returns std::nullopt if the file does not exist.
+std::optional<std::string> read_file_to_string(const std::string& path);
+
+void dbg_hit_on(bool cond, int slot = 0);
+void dbg_mean_of(int64_t value, int slot = 0);
+void dbg_stdev_of(int64_t value, int slot = 0);
+void dbg_extremes_of(int64_t value, int slot = 0);
+void dbg_correl_of(int64_t value1, int64_t value2, int slot = 0);
+void dbg_print();
+
+using TimePoint = std::chrono::milliseconds::rep;  // A value in milliseconds
+static_assert(sizeof(TimePoint) == sizeof(int64_t), "TimePoint should be 64 bits");
+inline TimePoint now() {
+    return std::chrono::duration_cast<std::chrono::milliseconds>(
+             std::chrono::steady_clock::now().time_since_epoch())
+      .count();
+}
+
+inline std::vector<std::string_view> split(std::string_view s, std::string_view delimiter) {
+    std::vector<std::string_view> res;
+
+    if (s.empty())
+        return res;
+
+    size_t begin = 0;
+    for (;;)
+    {
+        const size_t end = s.find(delimiter, begin);
+        if (end == std::string::npos)
+            break;
+
+        res.emplace_back(s.substr(begin, end - begin));
+        begin = end + delimiter.size();
+    }
+
+    res.emplace_back(s.substr(begin));
+
+    return res;
+}
+
+void remove_whitespace(std::string& s);
+bool is_whitespace(std::string_view s);
+
+enum SyncCout {
+    IO_LOCK,
+    IO_UNLOCK
+};
 std::ostream& operator<<(std::ostream&, SyncCout);

 #define sync_cout std::cout << IO_LOCK
 #define sync_endl std::endl << IO_UNLOCK

-// `ptr` must point to an array of size at least
-// `sizeof(T) * N + alignment` bytes, where `N` is the
-// number of elements in the array.
-template <uintptr_t Alignment, typename T>
-T* align_ptr_up(T* ptr)
-{
-  static_assert(alignof(T) < Alignment);
+void sync_cout_start();
+void sync_cout_end();
+
+// True if and only if the binary is compiled on a little-endian machine
+static inline const std::uint16_t Le             = 1;
+static inline const bool          IsLittleEndian = *reinterpret_cast<const char*>(&Le) == 1;
+
+
+template<typename T, std::size_t MaxSize>
+class ValueList {
+
+   public:
+    std::size_t size() const { return size_; }
+    void        push_back(const T& value) { values_[size_++] = value; }
+    const T*    begin() const { return values_; }
+    const T*    end() const { return values_ + size_; }
+    const T&    operator[](int index) const { return values_[index]; }
+
+   private:
+    T           values_[MaxSize];
+    std::size_t size_ = 0;
+};
+
+
+template<typename T, std::size_t Size, std::size_t... Sizes>
+class MultiArray;
+
+namespace Detail {
+
+template<typename T, std::size_t Size, std::size_t... Sizes>
+struct MultiArrayHelper {
+    using ChildType = MultiArray<T, Sizes...>;
+};
+
+template<typename T, std::size_t Size>
+struct MultiArrayHelper<T, Size> {
+    using ChildType = T;
+};

-  const uintptr_t ptrint = reinterpret_cast<uintptr_t>(reinterpret_cast<char*>(ptr));
-  return reinterpret_cast<T*>(reinterpret_cast<char*>((ptrint + (Alignment - 1)) / Alignment * Alignment));
 }

-/// xorshift64star Pseudo-Random Number Generator
-/// This class is based on original code written and dedicated
-/// to the public domain by Sebastiano Vigna (2014).
-/// It has the following characteristics:
-///
-///  -  Outputs 64-bit numbers
-///  -  Passes Dieharder and SmallCrush test batteries
-///  -  Does not require warm-up, no zeroland to escape
-///  -  Internal state is a single 64-bit integer
-///  -  Period is 2^64 - 1
-///  -  Speed: 1.60 ns/call (Core i7 @3.40GHz)
-///
-/// For further analysis see
-///   <http://vigna.di.unimi.it/ftp/papers/xorshift.pdf>
+// MultiArray is a generic N-dimensional array.
+// The template parameters (Size and Sizes) encode the dimensions of the array.
+template<typename T, std::size_t Size, std::size_t... Sizes>
+class MultiArray {
+    using ChildType = typename Detail::MultiArrayHelper<T, Size, Sizes...>::ChildType;
+    using ArrayType = std::array<ChildType, Size>;
+    ArrayType data_;
+
+   public:
+    using value_type             = typename ArrayType::value_type;
+    using size_type              = typename ArrayType::size_type;
+    using difference_type        = typename ArrayType::difference_type;
+    using reference              = typename ArrayType::reference;
+    using const_reference        = typename ArrayType::const_reference;
+    using pointer                = typename ArrayType::pointer;
+    using const_pointer          = typename ArrayType::const_pointer;
+    using iterator               = typename ArrayType::iterator;
+    using const_iterator         = typename ArrayType::const_iterator;
+    using reverse_iterator       = typename ArrayType::reverse_iterator;
+    using const_reverse_iterator = typename ArrayType::const_reverse_iterator;
+
+    constexpr auto&       at(size_type index) noexcept { return data_.at(index); }
+    constexpr const auto& at(size_type index) const noexcept { return data_.at(index); }
+
+    constexpr auto&       operator[](size_type index) noexcept { return data_[index]; }
+    constexpr const auto& operator[](size_type index) const noexcept { return data_[index]; }
+
+    constexpr auto&       front() noexcept { return data_.front(); }
+    constexpr const auto& front() const noexcept { return data_.front(); }
+    constexpr auto&       back() noexcept { return data_.back(); }
+    constexpr const auto& back() const noexcept { return data_.back(); }
+
+    auto*       data() { return data_.data(); }
+    const auto* data() const { return data_.data(); }
+
+    constexpr auto begin() noexcept { return data_.begin(); }
+    constexpr auto end() noexcept { return data_.end(); }
+    constexpr auto begin() const noexcept { return data_.begin(); }
+    constexpr auto end() const noexcept { return data_.end(); }
+    constexpr auto cbegin() const noexcept { return data_.cbegin(); }
+    constexpr auto cend() const noexcept { return data_.cend(); }
+
+    constexpr auto rbegin() noexcept { return data_.rbegin(); }
+    constexpr auto rend() noexcept { return data_.rend(); }
+    constexpr auto rbegin() const noexcept { return data_.rbegin(); }
+    constexpr auto rend() const noexcept { return data_.rend(); }
+    constexpr auto crbegin() const noexcept { return data_.crbegin(); }
+    constexpr auto crend() const noexcept { return data_.crend(); }
+
+    constexpr bool      empty() const noexcept { return data_.empty(); }
+    constexpr size_type size() const noexcept { return data_.size(); }
+    constexpr size_type max_size() const noexcept { return data_.max_size(); }
+
+    template<typename U>
+    void fill(const U& v) {
+        static_assert(std::is_assignable_v<T, U>, "Cannot assign fill value to entry type");
+        for (auto& ele : data_)
+        {
+            if constexpr (sizeof...(Sizes) == 0)
+                ele = v;
+            else
+                ele.fill(v);
+        }
+    }
+
+    constexpr void swap(MultiArray<T, Size, Sizes...>& other) noexcept { data_.swap(other.data_); }
+};
+
+
+// xorshift64star Pseudo-Random Number Generator
+// This class is based on original code written and dedicated
+// to the public domain by Sebastiano Vigna (2014).
+// It has the following characteristics:
+//
+//  -  Outputs 64-bit numbers
+//  -  Passes Dieharder and SmallCrush test batteries
+//  -  Does not require warm-up, no zeroland to escape
+//  -  Internal state is a single 64-bit integer
+//  -  Period is 2^64 - 1
+//  -  Speed: 1.60 ns/call (Core i7 @3.40GHz)
+//
+// For further analysis see
+//   <http://vigna.di.unimi.it/ftp/papers/xorshift.pdf>

 class PRNG {

-  uint64_t s;
+    uint64_t s;

-  uint64_t rand64() {
+    uint64_t rand64() {

-    s ^= s >> 12, s ^= s << 25, s ^= s >> 27;
-    return s * 2685821657736338717LL;
-  }
+        s ^= s >> 12, s ^= s << 25, s ^= s >> 27;
+        return s * 2685821657736338717LL;
+    }

-public:
-  PRNG(uint64_t seed) : s(seed) { assert(seed); }
+   public:
+    PRNG(uint64_t seed) :
+        s(seed) {
+        assert(seed);
+    }

-  template<typename T> T rand() { return T(rand64()); }
+    template<typename T>
+    T rand() {
+        return T(rand64());
+    }

-  /// Special generator used to fast init magic numbers.
-  /// Output values only have 1/8th of their bits set on average.
-  template<typename T> T sparse_rand()
-  { return T(rand64() & rand64() & rand64()); }
+    // Special generator used to fast init magic numbers.
+    // Output values only have 1/8th of their bits set on average.
+    template<typename T>
+    T sparse_rand() {
+        return T(rand64() & rand64() & rand64());
+    }
 };

 inline uint64_t mul_hi64(uint64_t a, uint64_t b) {
 #if defined(__GNUC__) && defined(IS_64BIT)
-    __extension__ typedef unsigned __int128 uint128;
-    return ((uint128)a * (uint128)b) >> 64;
+    __extension__ using uint128 = unsigned __int128;
+    return (uint128(a) * uint128(b)) >> 64;
 #else
-    uint64_t aL = (uint32_t)a, aH = a >> 32;
-    uint64_t bL = (uint32_t)b, bH = b >> 32;
+    uint64_t aL = uint32_t(a), aH = a >> 32;
+    uint64_t bL = uint32_t(b), bH = b >> 32;
    uint64_t c1 = (aL * bL) >> 32;
    uint64_t c2 = aH * bL + c1;
-    uint64_t c3 = aL * bH + (uint32_t)c2;
+    uint64_t c3 = aL * bH + uint32_t(c2);
    return aH * bH + (c2 >> 32) + (c3 >> 32);
 #endif
 }

-/// Under Windows it is not possible for a process to run on more than one
-/// logical processor group. This usually means to be limited to use max 64
-/// cores. To overcome this, some special platform specific API should be
-/// called to set group affinity for each thread. Original code from Texel by
-/// Peter Österlund.

-namespace WinProcGroup {
-  void bindThisThread(size_t idx);
+struct CommandLine {
+   public:
+    CommandLine(int _argc, char** _argv) :
+        argc(_argc),
+        argv(_argv) {}
+
+    static std::string get_binary_directory(std::string argv0);
+    static std::string get_working_directory();
+
+    int    argc;
+    char** argv;
+};
+
+namespace Utility {
+
+template<typename T, typename Predicate>
+void move_to_front(std::vector<T>& vec, Predicate pred) {
+    auto it = std::find_if(vec.begin(), vec.end(), pred);
+
+    if (it != vec.end())
+    {
+        std::rotate(vec.begin(), it, it + 1);
+    }
+}
 }

-namespace CommandLine {
-  void init(int argc, char* argv[]);
+}  // namespace Stockfish

-  extern std::string binaryDirectory;  // path of the executable directory
-  extern std::string workingDirectory; // path of the working directory
-}
-
-#endif // #ifndef MISC_H_INCLUDED
+#endif  // #ifndef MISC_H_INCLUDED
@@ -1,6 +1,6 @@
 /*
  Stockfish, a UCI chess playing engine derived from Glaurung 2.1
-  Copyright (C) 2004-2021 The Stockfish developers (see AUTHORS file)
+  Copyright (C) 2004-2025 The Stockfish developers (see AUTHORS file)

  Stockfish is free software: you can redistribute it and/or modify
  it under the terms of the GNU General Public License as published by
@@ -16,147 +16,121 @@
  along with this program.  If not, see <http://www.gnu.org/licenses/>.
 */

-#include <cassert>
-
 #include "movegen.h"
+
+#include <cassert>
+#include <initializer_list>
+
+#include "bitboard.h"
 #include "position.h"

+namespace Stockfish {
+
 namespace {

-  template<GenType Type, Direction D>
-  ExtMove* make_promotions(ExtMove* moveList, Square to, Square ksq) {
+template<GenType Type, Direction D, bool Enemy>
+ExtMove* make_promotions(ExtMove* moveList, [[maybe_unused]] Square to) {

-    if (Type == CAPTURES || Type == EVASIONS || Type == NON_EVASIONS)
-    {
-        *moveList++ = make<PROMOTION>(to - D, to, QUEEN);
-        if (attacks_bb<KNIGHT>(to) & ksq)
-            *moveList++ = make<PROMOTION>(to - D, to, KNIGHT);
-    }
+    constexpr bool all = Type == EVASIONS || Type == NON_EVASIONS;

-    if (Type == QUIETS || Type == EVASIONS || Type == NON_EVASIONS)
+    if constexpr (Type == CAPTURES || all)
+        *moveList++ = Move::make<PROMOTION>(to - D, to, QUEEN);
+
+    if constexpr ((Type == CAPTURES && Enemy) || (Type == QUIETS && !Enemy) || all)
    {
-        *moveList++ = make<PROMOTION>(to - D, to, ROOK);
-        *moveList++ = make<PROMOTION>(to - D, to, BISHOP);
-        if (!(attacks_bb<KNIGHT>(to) & ksq))
-            *moveList++ = make<PROMOTION>(to - D, to, KNIGHT);
+        *moveList++ = Move::make<PROMOTION>(to - D, to, ROOK);
+        *moveList++ = Move::make<PROMOTION>(to - D, to, BISHOP);
+        *moveList++ = Move::make<PROMOTION>(to - D, to, KNIGHT);
    }

    return moveList;
-  }
+}


-  template<Color Us, GenType Type>
-  ExtMove* generate_pawn_moves(const Position& pos, ExtMove* moveList, Bitboard target) {
+template<Color Us, GenType Type>
+ExtMove* generate_pawn_moves(const Position& pos, ExtMove* moveList, Bitboard target) {

    constexpr Color     Them     = ~Us;
-    constexpr Bitboard  TRank7BB = (Us == WHITE ? Rank7BB    : Rank2BB);
-    constexpr Bitboard  TRank3BB = (Us == WHITE ? Rank3BB    : Rank6BB);
+    constexpr Bitboard  TRank7BB = (Us == WHITE ? Rank7BB : Rank2BB);
+    constexpr Bitboard  TRank3BB = (Us == WHITE ? Rank3BB : Rank6BB);
    constexpr Direction Up       = pawn_push(Us);
    constexpr Direction UpRight  = (Us == WHITE ? NORTH_EAST : SOUTH_WEST);
    constexpr Direction UpLeft   = (Us == WHITE ? NORTH_WEST : SOUTH_EAST);

-    const Square ksq = pos.square<KING>(Them);
-    Bitboard emptySquares;
+    const Bitboard emptySquares = ~pos.pieces();
+    const Bitboard enemies      = Type == EVASIONS ? pos.checkers() : pos.pieces(Them);

-    Bitboard pawnsOn7    = pos.pieces(Us, PAWN) &  TRank7BB;
+    Bitboard pawnsOn7    = pos.pieces(Us, PAWN) & TRank7BB;
    Bitboard pawnsNotOn7 = pos.pieces(Us, PAWN) & ~TRank7BB;

-    Bitboard enemies = (Type == EVASIONS ? pos.pieces(Them) & target:
-                        Type == CAPTURES ? target : pos.pieces(Them));
-
    // Single and double pawn pushes, no promotions
-    if (Type != CAPTURES)
+    if constexpr (Type != CAPTURES)
    {
-        emptySquares = (Type == QUIETS || Type == QUIET_CHECKS ? target : ~pos.pieces());
-
-        Bitboard b1 = shift<Up>(pawnsNotOn7)   & emptySquares;
+        Bitboard b1 = shift<Up>(pawnsNotOn7) & emptySquares;
        Bitboard b2 = shift<Up>(b1 & TRank3BB) & emptySquares;

-        if (Type == EVASIONS) // Consider only blocking squares
+        if constexpr (Type == EVASIONS)  // Consider only blocking squares
        {
            b1 &= target;
            b2 &= target;
        }

-        if (Type == QUIET_CHECKS)
-        {
-            b1 &= pawn_attacks_bb(Them, ksq);
-            b2 &= pawn_attacks_bb(Them, ksq);
-
-            // Add pawn pushes which give discovered check. This is possible only
-            // if the pawn is not on the same file as the enemy king, because we
-            // don't generate captures. Note that a possible discovered check
-            // promotion has been already generated amongst the captures.
-            Bitboard dcCandidateQuiets = pos.blockers_for_king(Them) & pawnsNotOn7;
-            if (dcCandidateQuiets)
-            {
-                Bitboard dc1 = shift<Up>(dcCandidateQuiets) & emptySquares & ~file_bb(ksq);
-                Bitboard dc2 = shift<Up>(dc1 & TRank3BB) & emptySquares;
-
-                b1 |= dc1;
-                b2 |= dc2;
-            }
-        }
-
        while (b1)
        {
-            Square to = pop_lsb(&b1);
-            *moveList++ = make_move(to - Up, to);
+            Square to   = pop_lsb(b1);
+            *moveList++ = Move(to - Up, to);
        }

        while (b2)
        {
-            Square to = pop_lsb(&b2);
-            *moveList++ = make_move(to - Up - Up, to);
+            Square to   = pop_lsb(b2);
+            *moveList++ = Move(to - Up - Up, to);
        }
    }

    // Promotions and underpromotions
    if (pawnsOn7)
    {
-        if (Type == CAPTURES)
-            emptySquares = ~pos.pieces();
-
-        if (Type == EVASIONS)
-            emptySquares &= target;
-
        Bitboard b1 = shift<UpRight>(pawnsOn7) & enemies;
-        Bitboard b2 = shift<UpLeft >(pawnsOn7) & enemies;
-        Bitboard b3 = shift<Up     >(pawnsOn7) & emptySquares;
+        Bitboard b2 = shift<UpLeft>(pawnsOn7) & enemies;
+        Bitboard b3 = shift<Up>(pawnsOn7) & emptySquares;
+
+        if constexpr (Type == EVASIONS)
+            b3 &= target;

        while (b1)
-            moveList = make_promotions<Type, UpRight>(moveList, pop_lsb(&b1), ksq);
+            moveList = make_promotions<Type, UpRight, true>(moveList, pop_lsb(b1));

        while (b2)
-            moveList = make_promotions<Type, UpLeft >(moveList, pop_lsb(&b2), ksq);
+            moveList = make_promotions<Type, UpLeft, true>(moveList, pop_lsb(b2));

        while (b3)
-            moveList = make_promotions<Type, Up     >(moveList, pop_lsb(&b3), ksq);
+            moveList = make_promotions<Type, Up, false>(moveList, pop_lsb(b3));
    }

    // Standard and en passant captures
-    if (Type == CAPTURES || Type == EVASIONS || Type == NON_EVASIONS)
+    if constexpr (Type == CAPTURES || Type == EVASIONS || Type == NON_EVASIONS)
    {
        Bitboard b1 = shift<UpRight>(pawnsNotOn7) & enemies;
-        Bitboard b2 = shift<UpLeft >(pawnsNotOn7) & enemies;
+        Bitboard b2 = shift<UpLeft>(pawnsNotOn7) & enemies;

        while (b1)
        {
-            Square to = pop_lsb(&b1);
-            *moveList++ = make_move(to - UpRight, to);
+            Square to   = pop_lsb(b1);
+            *moveList++ = Move(to - UpRight, to);
        }

        while (b2)
        {
-            Square to = pop_lsb(&b2);
-            *moveList++ = make_move(to - UpLeft, to);
+            Square to   = pop_lsb(b2);
+            *moveList++ = Move(to - UpLeft, to);
        }

        if (pos.ep_square() != SQ_NONE)
        {
            assert(rank_of(pos.ep_square()) == relative_rank(Us, RANK_6));

-            // An en passant capture cannot resolve a discovered check.
+            // An en passant capture cannot resolve a discovered check
            if (Type == EVASIONS && (target & (pos.ep_square() + Up)))
                return moveList;

@@ -165,200 +139,118 @@ namespace {
            assert(b1);

            while (b1)
-                *moveList++ = make<EN_PASSANT>(pop_lsb(&b1), pos.ep_square());
+                *moveList++ = Move::make<EN_PASSANT>(pop_lsb(b1), pos.ep_square());
        }
    }

    return moveList;
-  }
+}


-  template<PieceType Pt, bool Checks>
-  ExtMove* generate_moves(const Position& pos, ExtMove* moveList, Bitboard piecesToMove, Bitboard target) {
+template<Color Us, PieceType Pt>
+ExtMove* generate_moves(const Position& pos, ExtMove* moveList, Bitboard target) {

    static_assert(Pt != KING && Pt != PAWN, "Unsupported piece type in generate_moves()");

-    Bitboard bb = piecesToMove & pos.pieces(Pt);
+    Bitboard bb = pos.pieces(Us, Pt);

-    if (!bb)
-        return moveList;
-
-    [[maybe_unused]] const Bitboard checkSquares = pos.check_squares(Pt);
-
-    while (bb) {
-        Square from = pop_lsb(&bb);
-
-        Bitboard b = attacks_bb<Pt>(from, pos.pieces()) & target;
-        if constexpr (Checks)
-            b &= checkSquares;
+    while (bb)
+    {
+        Square   from = pop_lsb(bb);
+        Bitboard b    = attacks_bb<Pt>(from, pos.pieces()) & target;

        while (b)
-            *moveList++ = make_move(from, pop_lsb(&b));
+            *moveList++ = Move(from, pop_lsb(b));
    }

    return moveList;
-  }
+}


-  template<Color Us, GenType Type>
-  ExtMove* generate_all(const Position& pos, ExtMove* moveList) {
+template<Color Us, GenType Type>
+ExtMove* generate_all(const Position& pos, ExtMove* moveList) {

    static_assert(Type != LEGAL, "Unsupported type in generate_all()");

-    constexpr bool Checks = Type == QUIET_CHECKS; // Reduce template instantiations
-    Bitboard target, piecesToMove = pos.pieces(Us);
+    const Square ksq = pos.square<KING>(Us);
+    Bitboard     target;

-    if(Type == QUIET_CHECKS)
-        piecesToMove &= ~pos.blockers_for_king(~Us);
-
-    switch (Type)
+    // Skip generating non-king moves when in double check
+    if (Type != EVASIONS || !more_than_one(pos.checkers()))
    {
-        case CAPTURES:
-            target =  pos.pieces(~Us);
-            break;
-        case QUIETS:
-        case QUIET_CHECKS:
-            target = ~pos.pieces();
-            break;
-        case EVASIONS:
-        {
-            Square checksq = lsb(pos.checkers());
-            target = between_bb(pos.square<KING>(Us), checksq) | checksq;
-            break;
-        }
-        case NON_EVASIONS:
-            target = ~pos.pieces(Us);
-            break;
+        target = Type == EVASIONS     ? between_bb(ksq, lsb(pos.checkers()))
+               : Type == NON_EVASIONS ? ~pos.pieces(Us)
+               : Type == CAPTURES     ? pos.pieces(~Us)
+                                      : ~pos.pieces();  // QUIETS
+
+        moveList = generate_pawn_moves<Us, Type>(pos, moveList, target);
+        moveList = generate_moves<Us, KNIGHT>(pos, moveList, target);
+        moveList = generate_moves<Us, BISHOP>(pos, moveList, target);
+        moveList = generate_moves<Us, ROOK>(pos, moveList, target);
+        moveList = generate_moves<Us, QUEEN>(pos, moveList, target);
    }

-    moveList = generate_pawn_moves<Us, Type>(pos, moveList, target);
-    moveList = generate_moves<KNIGHT, Checks>(pos, moveList, piecesToMove, target);
-    moveList = generate_moves<BISHOP, Checks>(pos, moveList, piecesToMove, target);
-    moveList = generate_moves<  ROOK, Checks>(pos, moveList, piecesToMove, target);
-    moveList = generate_moves< QUEEN, Checks>(pos, moveList, piecesToMove, target);
+    Bitboard b = attacks_bb<KING>(ksq) & (Type == EVASIONS ? ~pos.pieces(Us) : target);

-    if (Type != QUIET_CHECKS && Type != EVASIONS)
-    {
-        Square ksq = pos.square<KING>(Us);
-        Bitboard b = attacks_bb<KING>(ksq) & target;
-        while (b)
-            *moveList++ = make_move(ksq, pop_lsb(&b));
+    while (b)
+        *moveList++ = Move(ksq, pop_lsb(b));

-        if ((Type != CAPTURES) && pos.can_castle(Us & ANY_CASTLING))
-            for (CastlingRights cr : { Us & KING_SIDE, Us & QUEEN_SIDE } )
-                if (!pos.castling_impeded(cr) && pos.can_castle(cr))
-                    *moveList++ = make<CASTLING>(ksq, pos.castling_rook_square(cr));
-    }
+    if ((Type == QUIETS || Type == NON_EVASIONS) && pos.can_castle(Us & ANY_CASTLING))
+        for (CastlingRights cr : {Us & KING_SIDE, Us & QUEEN_SIDE})
+            if (!pos.castling_impeded(cr) && pos.can_castle(cr))
+                *moveList++ = Move::make<CASTLING>(ksq, pos.castling_rook_square(cr));

    return moveList;
-  }
+}

-} // namespace
+}  // namespace


-/// <CAPTURES>     Generates all pseudo-legal captures plus queen and checking knight promotions
-/// <QUIETS>       Generates all pseudo-legal non-captures and underpromotions (except checking knight)
-/// <NON_EVASIONS> Generates all pseudo-legal captures and non-captures
-///
-/// Returns a pointer to the end of the move list.
-
+// <CAPTURES>     Generates all pseudo-legal captures plus queen promotions
+// <QUIETS>       Generates all pseudo-legal non-captures and underpromotions
+// <EVASIONS>     Generates all pseudo-legal check evasions
+// <NON_EVASIONS> Generates all pseudo-legal captures and non-captures
+//
+// Returns a pointer to the end of the move list.
 template<GenType Type>
 ExtMove* generate(const Position& pos, ExtMove* moveList) {

-  static_assert(Type == CAPTURES || Type == QUIETS || Type == NON_EVASIONS, "Unsupported type in generate()");
-  assert(!pos.checkers());
+    static_assert(Type != LEGAL, "Unsupported type in generate()");
+    assert((Type == EVASIONS) == bool(pos.checkers()));

-  Color us = pos.side_to_move();
+    Color us = pos.side_to_move();

-  return us == WHITE ? generate_all<WHITE, Type>(pos, moveList)
-                     : generate_all<BLACK, Type>(pos, moveList);
+    return us == WHITE ? generate_all<WHITE, Type>(pos, moveList)
+                       : generate_all<BLACK, Type>(pos, moveList);
 }

 // Explicit template instantiations
 template ExtMove* generate<CAPTURES>(const Position&, ExtMove*);
 template ExtMove* generate<QUIETS>(const Position&, ExtMove*);
+template ExtMove* generate<EVASIONS>(const Position&, ExtMove*);
 template ExtMove* generate<NON_EVASIONS>(const Position&, ExtMove*);


-/// generate<QUIET_CHECKS> generates all pseudo-legal non-captures giving check,
-/// except castling. Returns a pointer to the end of the move list.
-template<>
-ExtMove* generate<QUIET_CHECKS>(const Position& pos, ExtMove* moveList) {
-
-  assert(!pos.checkers());
-
-  Color us = pos.side_to_move();
-  Bitboard dc = pos.blockers_for_king(~us) & pos.pieces(us) & ~pos.pieces(PAWN);
-
-  while (dc)
-  {
-     Square from = pop_lsb(&dc);
-     PieceType pt = type_of(pos.piece_on(from));
-
-     Bitboard b = attacks_bb(pt, from, pos.pieces()) & ~pos.pieces();
-
-     if (pt == KING)
-         b &= ~attacks_bb<QUEEN>(pos.square<KING>(~us));
-
-     while (b)
-         *moveList++ = make_move(from, pop_lsb(&b));
-  }
-
-  return us == WHITE ? generate_all<WHITE, QUIET_CHECKS>(pos, moveList)
-                     : generate_all<BLACK, QUIET_CHECKS>(pos, moveList);
-}
-
-
-/// generate<EVASIONS> generates all pseudo-legal check evasions when the side
-/// to move is in check. Returns a pointer to the end of the move list.
-template<>
-ExtMove* generate<EVASIONS>(const Position& pos, ExtMove* moveList) {
-
-  assert(pos.checkers());
-
-  Color us = pos.side_to_move();
-  Square ksq = pos.square<KING>(us);
-  Bitboard sliderAttacks = 0;
-  Bitboard sliders = pos.checkers() & ~pos.pieces(KNIGHT, PAWN);
-
-  // Find all the squares attacked by slider checkers. We will remove them from
-  // the king evasions in order to skip known illegal moves, which avoids any
-  // useless legality checks later on.
-  while (sliders)
-      sliderAttacks |= line_bb(ksq, pop_lsb(&sliders)) & ~pos.checkers();
-
-  // Generate evasions for king, capture and non capture moves
-  Bitboard b = attacks_bb<KING>(ksq) & ~pos.pieces(us) & ~sliderAttacks;
-  while (b)
-      *moveList++ = make_move(ksq, pop_lsb(&b));
-
-  if (more_than_one(pos.checkers()))
-      return moveList; // Double check, only a king move can save the day
-
-  // Generate blocking evasions or captures of the checking piece
-  return us == WHITE ? generate_all<WHITE, EVASIONS>(pos, moveList)
-                     : generate_all<BLACK, EVASIONS>(pos, moveList);
-}
-
-
-/// generate<LEGAL> generates all the legal moves in the given position
+// generate<LEGAL> generates all the legal moves in the given position

 template<>
 ExtMove* generate<LEGAL>(const Position& pos, ExtMove* moveList) {

-  Color us = pos.side_to_move();
-  Bitboard pinned = pos.blockers_for_king(us) & pos.pieces(us);
-  Square ksq = pos.square<KING>(us);
-  ExtMove* cur = moveList;
+    Color    us     = pos.side_to_move();
+    Bitboard pinned = pos.blockers_for_king(us) & pos.pieces(us);
+    Square   ksq    = pos.square<KING>(us);
+    ExtMove* cur    = moveList;

-  moveList = pos.checkers() ? generate<EVASIONS    >(pos, moveList)
-                            : generate<NON_EVASIONS>(pos, moveList);
-  while (cur != moveList)
-      if (  ((pinned && pinned & from_sq(*cur)) || from_sq(*cur) == ksq || type_of(*cur) == EN_PASSANT)
-          && !pos.legal(*cur))
-          *cur = (--moveList)->move;
-      else
-          ++cur;
+    moveList =
+      pos.checkers() ? generate<EVASIONS>(pos, moveList) : generate<NON_EVASIONS>(pos, moveList);
+    while (cur != moveList)
+        if (((pinned & cur->from_sq()) || cur->from_sq() == ksq || cur->type_of() == EN_PASSANT)
+            && !pos.legal(*cur))
+            *cur = *(--moveList);
+        else
+            ++cur;

-  return moveList;
+    return moveList;
 }
+
+}  // namespace Stockfish
@@ -1,6 +1,6 @@
 /*
  Stockfish, a UCI chess playing engine derived from Glaurung 2.1
-  Copyright (C) 2004-2021 The Stockfish developers (see AUTHORS file)
+  Copyright (C) 2004-2025 The Stockfish developers (see AUTHORS file)

  Stockfish is free software: you can redistribute it and/or modify
  it under the terms of the GNU General Public License as published by
@@ -19,55 +19,55 @@
 #ifndef MOVEGEN_H_INCLUDED
 #define MOVEGEN_H_INCLUDED

-#include <algorithm>
+#include <algorithm>  // IWYU pragma: keep
+#include <cstddef>

 #include "types.h"

+namespace Stockfish {
+
 class Position;

 enum GenType {
-  CAPTURES,
-  QUIETS,
-  QUIET_CHECKS,
-  EVASIONS,
-  NON_EVASIONS,
-  LEGAL
+    CAPTURES,
+    QUIETS,
+    EVASIONS,
+    NON_EVASIONS,
+    LEGAL
 };

-struct ExtMove {
-  Move move;
-  int value;
+struct ExtMove: public Move {
+    int value;

-  operator Move() const { return move; }
-  void operator=(Move m) { move = m; }
+    void operator=(Move m) { data = m.raw(); }

-  // Inhibit unwanted implicit conversions to Move
-  // with an ambiguity that yields to a compile error.
-  operator float() const = delete;
+    // Inhibit unwanted implicit conversions to Move
+    // with an ambiguity that yields to a compile error.
+    operator float() const = delete;
 };

-inline bool operator<(const ExtMove& f, const ExtMove& s) {
-  return f.value < s.value;
-}
+inline bool operator<(const ExtMove& f, const ExtMove& s) { return f.value < s.value; }

 template<GenType>
 ExtMove* generate(const Position& pos, ExtMove* moveList);

-/// The MoveList struct is a simple wrapper around generate(). It sometimes comes
-/// in handy to use this class instead of the low level generate() function.
+// The MoveList struct wraps the generate() function and returns a convenient
+// list of moves. Using MoveList is sometimes preferable to directly calling
+// the lower level generate() function.
 template<GenType T>
 struct MoveList {

-  explicit MoveList(const Position& pos) : last(generate<T>(pos, moveList)) {}
-  const ExtMove* begin() const { return moveList; }
-  const ExtMove* end() const { return last; }
-  size_t size() const { return last - moveList; }
-  bool contains(Move move) const {
-    return std::find(begin(), end(), move) != end();
-  }
+    explicit MoveList(const Position& pos) :
+        last(generate<T>(pos, moveList)) {}
+    const ExtMove* begin() const { return moveList; }
+    const ExtMove* end() const { return last; }
+    size_t         size() const { return last - moveList; }
+    bool           contains(Move move) const { return std::find(begin(), end(), move) != end(); }

-private:
-  ExtMove moveList[MAX_MOVES], *last;
+   private:
+    ExtMove moveList[MAX_MOVES], *last;
 };

-#endif // #ifndef MOVEGEN_H_INCLUDED
+}  // namespace Stockfish
+
+#endif  // #ifndef MOVEGEN_H_INCLUDED
@@ -1,6 +1,6 @@
 /*
  Stockfish, a UCI chess playing engine derived from Glaurung 2.1
-  Copyright (C) 2004-2021 The Stockfish developers (see AUTHORS file)
+  Copyright (C) 2004-2025 The Stockfish developers (see AUTHORS file)

  Stockfish is free software: you can redistribute it and/or modify
  it under the terms of the GNU General Public License as published by
@@ -16,250 +16,305 @@
  along with this program.  If not, see <http://www.gnu.org/licenses/>.
 */

-#include <cassert>
-
 #include "movepick.h"

+#include <cassert>
+#include <limits>
+
+#include "bitboard.h"
+#include "misc.h"
+#include "position.h"
+
+namespace Stockfish {
+
 namespace {

-  enum Stages {
-    MAIN_TT, CAPTURE_INIT, GOOD_CAPTURE, REFUTATION, QUIET_INIT, QUIET, BAD_CAPTURE,
-    EVASION_TT, EVASION_INIT, EVASION,
-    PROBCUT_TT, PROBCUT_INIT, PROBCUT,
-    QSEARCH_TT, QCAPTURE_INIT, QCAPTURE, QCHECK_INIT, QCHECK
-  };
+enum Stages {
+    // generate main search moves
+    MAIN_TT,
+    CAPTURE_INIT,
+    GOOD_CAPTURE,
+    QUIET_INIT,
+    GOOD_QUIET,
+    BAD_CAPTURE,
+    BAD_QUIET,

-  // partial_insertion_sort() sorts moves in descending order up to and including
-  // a given limit. The order of moves smaller than the limit is left unspecified.
-  void partial_insertion_sort(ExtMove* begin, ExtMove* end, int limit) {
+    // generate evasion moves
+    EVASION_TT,
+    EVASION_INIT,
+    EVASION,
+
+    // generate probcut moves
+    PROBCUT_TT,
+    PROBCUT_INIT,
+    PROBCUT,
+
+    // generate qsearch moves
+    QSEARCH_TT,
+    QCAPTURE_INIT,
+    QCAPTURE
+};
+
+// Sort moves in descending order up to and including a given limit.
+// The order of moves smaller than the limit is left unspecified.
+void partial_insertion_sort(ExtMove* begin, ExtMove* end, int limit) {

    for (ExtMove *sortedEnd = begin, *p = begin + 1; p < end; ++p)
        if (p->value >= limit)
        {
            ExtMove tmp = *p, *q;
-            *p = *++sortedEnd;
+            *p          = *++sortedEnd;
            for (q = sortedEnd; q != begin && *(q - 1) < tmp; --q)
                *q = *(q - 1);
            *q = tmp;
        }
-  }
-
-} // namespace
-
-
-/// Constructors of the MovePicker class. As arguments we pass information
-/// to help it to return the (presumably) good moves first, to decide which
-/// moves to return (in the quiescence search, for instance, we only want to
-/// search captures, promotions, and some checks) and how important good move
-/// ordering is at the current node.
-
-/// MovePicker constructor for the main search
-MovePicker::MovePicker(const Position& p, Move ttm, Depth d, const ButterflyHistory* mh, const LowPlyHistory* lp,
-                       const CapturePieceToHistory* cph, const PieceToHistory** ch, Move cm, const Move* killers, int pl)
-           : pos(p), mainHistory(mh), lowPlyHistory(lp), captureHistory(cph), continuationHistory(ch),
-             ttMove(ttm), refutations{{killers[0], 0}, {killers[1], 0}, {cm, 0}}, depth(d), ply(pl) {
-
-  assert(d > 0);
-
-  stage = (pos.checkers() ? EVASION_TT : MAIN_TT) +
-          !(ttm && pos.pseudo_legal(ttm));
 }

-/// MovePicker constructor for quiescence search
-MovePicker::MovePicker(const Position& p, Move ttm, Depth d, const ButterflyHistory* mh,
-                       const CapturePieceToHistory* cph, const PieceToHistory** ch, Square rs)
-           : pos(p), mainHistory(mh), captureHistory(cph), continuationHistory(ch), ttMove(ttm), recaptureSquare(rs), depth(d) {
+}  // namespace

-  assert(d <= 0);

-  stage = (pos.checkers() ? EVASION_TT : QSEARCH_TT) +
-          !(   ttm
-            && (pos.checkers() || depth > DEPTH_QS_RECAPTURES || to_sq(ttm) == recaptureSquare)
-            && pos.pseudo_legal(ttm));
+// Constructors of the MovePicker class. As arguments, we pass information
+// to decide which class of moves to emit, to help sorting the (presumably)
+// good moves first, and how important move ordering is at the current node.
+
+// MovePicker constructor for the main search and for the quiescence search
+MovePicker::MovePicker(const Position&              p,
+                       Move                         ttm,
+                       Depth                        d,
+                       const ButterflyHistory*      mh,
+                       const LowPlyHistory*         lph,
+                       const CapturePieceToHistory* cph,
+                       const PieceToHistory**       ch,
+                       const PawnHistory*           ph,
+                       int                          pl) :
+    pos(p),
+    mainHistory(mh),
+    lowPlyHistory(lph),
+    captureHistory(cph),
+    continuationHistory(ch),
+    pawnHistory(ph),
+    ttMove(ttm),
+    depth(d),
+    ply(pl) {
+
+    if (pos.checkers())
+        stage = EVASION_TT + !(ttm && pos.pseudo_legal(ttm));
+
+    else
+        stage = (depth > 0 ? MAIN_TT : QSEARCH_TT) + !(ttm && pos.pseudo_legal(ttm));
 }

-/// MovePicker constructor for ProbCut: we generate captures with SEE greater
-/// than or equal to the given threshold.
-MovePicker::MovePicker(const Position& p, Move ttm, Value th, const CapturePieceToHistory* cph)
-           : pos(p), captureHistory(cph), ttMove(ttm), threshold(th) {
+// MovePicker constructor for ProbCut: we generate captures with Static Exchange
+// Evaluation (SEE) greater than or equal to the given threshold.
+MovePicker::MovePicker(const Position& p, Move ttm, int th, const CapturePieceToHistory* cph) :
+    pos(p),
+    captureHistory(cph),
+    ttMove(ttm),
+    threshold(th) {
+    assert(!pos.checkers());

-  assert(!pos.checkers());
-
-  stage = PROBCUT_TT + !(ttm && pos.capture(ttm)
-                             && pos.pseudo_legal(ttm)
-                             && pos.see_ge(ttm, threshold));
+    stage = PROBCUT_TT
+          + !(ttm && pos.capture_stage(ttm) && pos.pseudo_legal(ttm) && pos.see_ge(ttm, threshold));
 }

-/// MovePicker::score() assigns a numerical value to each move in a list, used
-/// for sorting. Captures are ordered by Most Valuable Victim (MVV), preferring
-/// captures with a good history. Quiets moves are ordered using the histories.
+// Assigns a numerical value to each move in a list, used for sorting.
+// Captures are ordered by Most Valuable Victim (MVV), preferring captures
+// with a good history. Quiets moves are ordered using the history tables.
 template<GenType Type>
 void MovePicker::score() {

-  static_assert(Type == CAPTURES || Type == QUIETS || Type == EVASIONS, "Wrong type");
+    static_assert(Type == CAPTURES || Type == QUIETS || Type == EVASIONS, "Wrong type");

-  for (auto& m : *this)
-      if constexpr (Type == CAPTURES)
-          m.value =  int(PieceValue[MG][pos.piece_on(to_sq(m))]) * 6
-                   + (*captureHistory)[pos.moved_piece(m)][to_sq(m)][type_of(pos.piece_on(to_sq(m)))];
+    [[maybe_unused]] Bitboard threatenedByPawn, threatenedByMinor, threatenedByRook,
+      threatenedPieces;
+    if constexpr (Type == QUIETS)
+    {
+        Color us = pos.side_to_move();

-      else if constexpr (Type == QUIETS)
-          m.value =      (*mainHistory)[pos.side_to_move()][from_to(m)]
-                   + 2 * (*continuationHistory[0])[pos.moved_piece(m)][to_sq(m)]
-                   +     (*continuationHistory[1])[pos.moved_piece(m)][to_sq(m)]
-                   +     (*continuationHistory[3])[pos.moved_piece(m)][to_sq(m)]
-                   +     (*continuationHistory[5])[pos.moved_piece(m)][to_sq(m)]
-                   + (ply < MAX_LPH ? std::min(4, depth / 3) * (*lowPlyHistory)[ply][from_to(m)] : 0);
+        threatenedByPawn = pos.attacks_by<PAWN>(~us);
+        threatenedByMinor =
+          pos.attacks_by<KNIGHT>(~us) | pos.attacks_by<BISHOP>(~us) | threatenedByPawn;
+        threatenedByRook = pos.attacks_by<ROOK>(~us) | threatenedByMinor;

-      else // Type == EVASIONS
-      {
-          if (pos.capture(m))
-              m.value =  PieceValue[MG][pos.piece_on(to_sq(m))]
-                       - Value(type_of(pos.moved_piece(m)));
-          else
-              m.value =      (*mainHistory)[pos.side_to_move()][from_to(m)]
-                       + 2 * (*continuationHistory[0])[pos.moved_piece(m)][to_sq(m)]
-                       - (1 << 28);
-      }
+        // Pieces threatened by pieces of lesser material value
+        threatenedPieces = (pos.pieces(us, QUEEN) & threatenedByRook)
+                         | (pos.pieces(us, ROOK) & threatenedByMinor)
+                         | (pos.pieces(us, KNIGHT, BISHOP) & threatenedByPawn);
+    }
+
+    for (auto& m : *this)
+        if constexpr (Type == CAPTURES)
+            m.value =
+              7 * int(PieceValue[pos.piece_on(m.to_sq())])
+              + (*captureHistory)[pos.moved_piece(m)][m.to_sq()][type_of(pos.piece_on(m.to_sq()))];
+
+        else if constexpr (Type == QUIETS)
+        {
+            Piece     pc   = pos.moved_piece(m);
+            PieceType pt   = type_of(pc);
+            Square    from = m.from_sq();
+            Square    to   = m.to_sq();
+
+            // histories
+            m.value = 2 * (*mainHistory)[pos.side_to_move()][m.from_to()];
+            m.value += 2 * (*pawnHistory)[pawn_structure_index(pos)][pc][to];
+            m.value += (*continuationHistory[0])[pc][to];
+            m.value += (*continuationHistory[1])[pc][to];
+            m.value += (*continuationHistory[2])[pc][to];
+            m.value += (*continuationHistory[3])[pc][to];
+            m.value += (*continuationHistory[4])[pc][to] / 3;
+            m.value += (*continuationHistory[5])[pc][to];
+
+            // bonus for checks
+            m.value += bool(pos.check_squares(pt) & to) * 16384;
+
+            // bonus for escaping from capture
+            m.value += threatenedPieces & from ? (pt == QUEEN && !(to & threatenedByRook)   ? 51700
+                                                  : pt == ROOK && !(to & threatenedByMinor) ? 25600
+                                                  : !(to & threatenedByPawn)                ? 14450
+                                                                                            : 0)
+                                               : 0;
+
+            // malus for putting piece en prise
+            m.value -= (pt == QUEEN ? bool(to & threatenedByRook) * 49000
+                        : pt == ROOK && bool(to & threatenedByMinor) ? 24335
+                                                                     : 0);
+
+            if (ply < LOW_PLY_HISTORY_SIZE)
+                m.value += 8 * (*lowPlyHistory)[ply][m.from_to()] / (1 + 2 * ply);
+        }
+
+        else  // Type == EVASIONS
+        {
+            if (pos.capture_stage(m))
+                m.value = PieceValue[pos.piece_on(m.to_sq())] + (1 << 28);
+            else
+                m.value = (*mainHistory)[pos.side_to_move()][m.from_to()]
+                        + (*continuationHistory[0])[pos.moved_piece(m)][m.to_sq()]
+                        + (*pawnHistory)[pawn_structure_index(pos)][pos.moved_piece(m)][m.to_sq()];
+        }
 }

-/// MovePicker::select() returns the next move satisfying a predicate function.
-/// It never returns the TT move.
-template<MovePicker::PickType T, typename Pred>
+// Returns the next move satisfying a predicate function.
+// This never returns the TT move, as it was emitted before.
+template<typename Pred>
 Move MovePicker::select(Pred filter) {

-  while (cur < endMoves)
-  {
-      if (T == Best)
-          std::swap(*cur, *std::max_element(cur, endMoves));
+    for (; cur < endMoves; ++cur)
+        if (*cur != ttMove && filter())
+            return *cur++;

-      if (*cur != ttMove && filter())
-          return *cur++;
-
-      cur++;
-  }
-  return MOVE_NONE;
+    return Move::none();
 }

-/// MovePicker::next_move() is the most important method of the MovePicker class. It
-/// returns a new pseudo-legal move every time it is called until there are no more
-/// moves left, picking the move with the highest score from a list of generated moves.
-Move MovePicker::next_move(bool skipQuiets) {
+// This is the most important method of the MovePicker class. We emit one
+// new pseudo-legal move on every call until there are no more moves left,
+// picking the move with the highest score from a list of generated moves.
+Move MovePicker::next_move() {
+
+    auto quiet_threshold = [](Depth d) { return -3560 * d; };

 top:
-  switch (stage) {
+    switch (stage)
+    {

-  case MAIN_TT:
-  case EVASION_TT:
-  case QSEARCH_TT:
-  case PROBCUT_TT:
-      ++stage;
-      return ttMove;
+    case MAIN_TT :
+    case EVASION_TT :
+    case QSEARCH_TT :
+    case PROBCUT_TT :
+        ++stage;
+        return ttMove;

-  case CAPTURE_INIT:
-  case PROBCUT_INIT:
-  case QCAPTURE_INIT:
-      cur = endBadCaptures = moves;
-      endMoves = generate<CAPTURES>(pos, cur);
+    case CAPTURE_INIT :
+    case PROBCUT_INIT :
+    case QCAPTURE_INIT :
+        cur = endBadCaptures = moves;
+        endMoves             = generate<CAPTURES>(pos, cur);

-      score<CAPTURES>();
-      ++stage;
-      goto top;
+        score<CAPTURES>();
+        partial_insertion_sort(cur, endMoves, std::numeric_limits<int>::min());
+        ++stage;
+        goto top;

-  case GOOD_CAPTURE:
-      if (select<Best>([&](){
-                       return pos.see_ge(*cur, Value(-69 * cur->value / 1024)) ?
-                              // Move losing capture to endBadCaptures to be tried later
-                              true : (*endBadCaptures++ = *cur, false); }))
-          return *(cur - 1);
+    case GOOD_CAPTURE :
+        if (select([&]() {
+                // Move losing capture to endBadCaptures to be tried later
+                return pos.see_ge(*cur, -cur->value / 18) ? true
+                                                          : (*endBadCaptures++ = *cur, false);
+            }))
+            return *(cur - 1);

-      // Prepare the pointers to loop over the refutations array
-      cur = std::begin(refutations);
-      endMoves = std::end(refutations);
+        ++stage;
+        [[fallthrough]];

-      // If the countermove is the same as a killer, skip it
-      if (   refutations[0].move == refutations[2].move
-          || refutations[1].move == refutations[2].move)
-          --endMoves;
+    case QUIET_INIT :
+        if (!skipQuiets)
+        {
+            cur      = endBadCaptures;
+            endMoves = beginBadQuiets = endBadQuiets = generate<QUIETS>(pos, cur);

-      ++stage;
-      [[fallthrough]];
+            score<QUIETS>();
+            partial_insertion_sort(cur, endMoves, quiet_threshold(depth));
+        }

-  case REFUTATION:
-      if (select<Next>([&](){ return    *cur != MOVE_NONE
-                                    && !pos.capture(*cur)
-                                    &&  pos.pseudo_legal(*cur); }))
-          return *(cur - 1);
-      ++stage;
-      [[fallthrough]];
+        ++stage;
+        [[fallthrough]];

-  case QUIET_INIT:
-      if (!skipQuiets)
-      {
-          cur = endBadCaptures;
-          endMoves = generate<QUIETS>(pos, cur);
+    case GOOD_QUIET :
+        if (!skipQuiets && select([]() { return true; }))
+        {
+            if ((cur - 1)->value > -7998 || (cur - 1)->value <= quiet_threshold(depth))
+                return *(cur - 1);

-          score<QUIETS>();
-          partial_insertion_sort(cur, endMoves, -3000 * depth);
-      }
+            // Remaining quiets are bad
+            beginBadQuiets = cur - 1;
+        }

-      ++stage;
-      [[fallthrough]];
+        // Prepare the pointers to loop over the bad captures
+        cur      = moves;
+        endMoves = endBadCaptures;

-  case QUIET:
-      if (   !skipQuiets
-          && select<Next>([&](){return   *cur != refutations[0].move
-                                      && *cur != refutations[1].move
-                                      && *cur != refutations[2].move;}))
-          return *(cur - 1);
+        ++stage;
+        [[fallthrough]];

-      // Prepare the pointers to loop over the bad captures
-      cur = moves;
-      endMoves = endBadCaptures;
+    case BAD_CAPTURE :
+        if (select([]() { return true; }))
+            return *(cur - 1);

-      ++stage;
-      [[fallthrough]];
+        // Prepare the pointers to loop over the bad quiets
+        cur      = beginBadQuiets;
+        endMoves = endBadQuiets;

-  case BAD_CAPTURE:
-      return select<Next>([](){ return true; });
+        ++stage;
+        [[fallthrough]];

-  case EVASION_INIT:
-      cur = moves;
-      endMoves = generate<EVASIONS>(pos, cur);
+    case BAD_QUIET :
+        if (!skipQuiets)
+            return select([]() { return true; });

-      score<EVASIONS>();
-      ++stage;
-      [[fallthrough]];
+        return Move::none();

-  case EVASION:
-      return select<Best>([](){ return true; });
+    case EVASION_INIT :
+        cur      = moves;
+        endMoves = generate<EVASIONS>(pos, cur);

-  case PROBCUT:
-      return select<Best>([&](){ return pos.see_ge(*cur, threshold); });
+        score<EVASIONS>();
+        partial_insertion_sort(cur, endMoves, std::numeric_limits<int>::min());
+        ++stage;
+        [[fallthrough]];

-  case QCAPTURE:
-      if (select<Best>([&](){ return   depth > DEPTH_QS_RECAPTURES
-                                    || to_sq(*cur) == recaptureSquare; }))
-          return *(cur - 1);
+    case EVASION :
+    case QCAPTURE :
+        return select([]() { return true; });

-      // If we did not find any move and we do not try checks, we have finished
-      if (depth != DEPTH_QS_CHECKS)
-          return MOVE_NONE;
+    case PROBCUT :
+        return select([&]() { return pos.see_ge(*cur, threshold); });
+    }

-      ++stage;
-      [[fallthrough]];
-
-  case QCHECK_INIT:
-      cur = moves;
-      endMoves = generate<QUIET_CHECKS>(pos, cur);
-
-      ++stage;
-      [[fallthrough]];
-
-  case QCHECK:
-      return select<Next>([](){ return true; });
-  }
-
-  assert(false);
-  return MOVE_NONE; // Silence warning
+    assert(false);
+    return Move::none();  // Silence warning
 }
+
+void MovePicker::skip_quiet_moves() { skipQuiets = true; }
+
+}  // namespace Stockfish
@@ -1,6 +1,6 @@
 /*
  Stockfish, a UCI chess playing engine derived from Glaurung 2.1
-  Copyright (C) 2004-2021 The Stockfish developers (see AUTHORS file)
+  Copyright (C) 2004-2025 The Stockfish developers (see AUTHORS file)

  Stockfish is free software: you can redistribute it and/or modify
  it under the terms of the GNU General Public License as published by
@@ -19,141 +19,62 @@
 #ifndef MOVEPICK_H_INCLUDED
 #define MOVEPICK_H_INCLUDED

-#include <array>
-#include <limits>
-#include <type_traits>
-
+#include "history.h"
 #include "movegen.h"
-#include "position.h"
 #include "types.h"

-/// StatsEntry stores the stat table value. It is usually a number but could
-/// be a move or even a nested history. We use a class instead of naked value
-/// to directly call history update operator<<() on the entry so to use stats
-/// tables at caller sites as simple multi-dim arrays.
-template<typename T, int D>
-class StatsEntry {
+namespace Stockfish {

-  T entry;
+class Position;

-public:
-  void operator=(const T& v) { entry = v; }
-  T* operator&() { return &entry; }
-  T* operator->() { return &entry; }
-  operator const T&() const { return entry; }
-
-  void operator<<(int bonus) {
-    assert(abs(bonus) <= D); // Ensure range is [-D, D]
-    static_assert(D <= std::numeric_limits<T>::max(), "D overflows T");
-
-    entry += bonus - entry * abs(bonus) / D;
-
-    assert(abs(entry) <= D);
-  }
-};
-
-/// Stats is a generic N-dimensional array used to store various statistics.
-/// The first template parameter T is the base type of the array, the second
-/// template parameter D limits the range of updates in [-D, D] when we update
-/// values with the << operator, while the last parameters (Size and Sizes)
-/// encode the dimensions of the array.
-template <typename T, int D, int Size, int... Sizes>
-struct Stats : public std::array<Stats<T, D, Sizes...>, Size>
-{
-  typedef Stats<T, D, Size, Sizes...> stats;
-
-  void fill(const T& v) {
-
-    // For standard-layout 'this' points to first struct member
-    assert(std::is_standard_layout<stats>::value);
-
-    typedef StatsEntry<T, D> entry;
-    entry* p = reinterpret_cast<entry*>(this);
-    std::fill(p, p + sizeof(*this) / sizeof(entry), v);
-  }
-};
-
-template <typename T, int D, int Size>
-struct Stats<T, D, Size> : public std::array<StatsEntry<T, D>, Size> {};
-
-/// In stats table, D=0 means that the template parameter is not used
-enum StatsParams { NOT_USED = 0 };
-enum StatsType { NoCaptures, Captures };
-
-/// ButterflyHistory records how often quiet moves have been successful or
-/// unsuccessful during the current search, and is used for reduction and move
-/// ordering decisions. It uses 2 tables (one for each color) indexed by
-/// the move's from and to squares, see www.chessprogramming.org/Butterfly_Boards
-typedef Stats<int16_t, 13365, COLOR_NB, int(SQUARE_NB) * int(SQUARE_NB)> ButterflyHistory;
-
-/// At higher depths LowPlyHistory records successful quiet moves near the root
-/// and quiet moves which are/were in the PV (ttPv). It is cleared with each new
-/// search and filled during iterative deepening.
-constexpr int MAX_LPH = 4;
-typedef Stats<int16_t, 10692, MAX_LPH, int(SQUARE_NB) * int(SQUARE_NB)> LowPlyHistory;
-
-/// CounterMoveHistory stores counter moves indexed by [piece][to] of the previous
-/// move, see www.chessprogramming.org/Countermove_Heuristic
-typedef Stats<Move, NOT_USED, PIECE_NB, SQUARE_NB> CounterMoveHistory;
-
-/// CapturePieceToHistory is addressed by a move's [piece][to][captured piece type]
-typedef Stats<int16_t, 10692, PIECE_NB, SQUARE_NB, PIECE_TYPE_NB> CapturePieceToHistory;
-
-/// PieceToHistory is like ButterflyHistory but is addressed by a move's [piece][to]
-typedef Stats<int16_t, 29952, PIECE_NB, SQUARE_NB> PieceToHistory;
-
-/// ContinuationHistory is the combined history of a given pair of moves, usually
-/// the current one given a previous one. The nested history table is based on
-/// PieceToHistory instead of ButterflyBoards.
-typedef Stats<PieceToHistory, NOT_USED, PIECE_NB, SQUARE_NB> ContinuationHistory;
-
-
-/// MovePicker class is used to pick one pseudo-legal move at a time from the
-/// current position. The most important method is next_move(), which returns a
-/// new pseudo-legal move each time it is called, until there are no moves left,
-/// when MOVE_NONE is returned. In order to improve the efficiency of the
-/// alpha-beta algorithm, MovePicker attempts to return the moves which are most
-/// likely to get a cut-off first.
+// The MovePicker class is used to pick one pseudo-legal move at a time from the
+// current position. The most important method is next_move(), which emits one
+// new pseudo-legal move on every call, until there are no moves left, when
+// Move::none() is returned. In order to improve the efficiency of the alpha-beta
+// algorithm, MovePicker attempts to return the moves which are most likely to get
+// a cut-off first.
 class MovePicker {

-  enum PickType { Next, Best };
+   public:
+    MovePicker(const MovePicker&)            = delete;
+    MovePicker& operator=(const MovePicker&) = delete;
+    MovePicker(const Position&,
+               Move,
+               Depth,
+               const ButterflyHistory*,
+               const LowPlyHistory*,
+               const CapturePieceToHistory*,
+               const PieceToHistory**,
+               const PawnHistory*,
+               int);
+    MovePicker(const Position&, Move, int, const CapturePieceToHistory*);
+    Move next_move();
+    void skip_quiet_moves();

-public:
-  MovePicker(const MovePicker&) = delete;
-  MovePicker& operator=(const MovePicker&) = delete;
-  MovePicker(const Position&, Move, Value, const CapturePieceToHistory*);
-  MovePicker(const Position&, Move, Depth, const ButterflyHistory*,
-                                           const CapturePieceToHistory*,
-                                           const PieceToHistory**,
-                                           Square);
-  MovePicker(const Position&, Move, Depth, const ButterflyHistory*,
-                                           const LowPlyHistory*,
-                                           const CapturePieceToHistory*,
-                                           const PieceToHistory**,
-                                           Move,
-                                           const Move*,
-                                           int);
-  Move next_move(bool skipQuiets = false);
+   private:
+    template<typename Pred>
+    Move select(Pred);
+    template<GenType>
+    void     score();
+    ExtMove* begin() { return cur; }
+    ExtMove* end() { return endMoves; }

-private:
-  template<PickType T, typename Pred> Move select(Pred);
-  template<GenType> void score();
-  ExtMove* begin() { return cur; }
-  ExtMove* end() { return endMoves; }
-
-  const Position& pos;
-  const ButterflyHistory* mainHistory;
-  const LowPlyHistory* lowPlyHistory;
-  const CapturePieceToHistory* captureHistory;
-  const PieceToHistory** continuationHistory;
-  Move ttMove;
-  ExtMove refutations[3], *cur, *endMoves, *endBadCaptures;
-  int stage;
-  Square recaptureSquare;
-  Value threshold;
-  Depth depth;
-  int ply;
-  ExtMove moves[MAX_MOVES];
+    const Position&              pos;
+    const ButterflyHistory*      mainHistory;
+    const LowPlyHistory*         lowPlyHistory;
+    const CapturePieceToHistory* captureHistory;
+    const PieceToHistory**       continuationHistory;
+    const PawnHistory*           pawnHistory;
+    Move                         ttMove;
+    ExtMove *                    cur, *endMoves, *endBadCaptures, *beginBadQuiets, *endBadQuiets;
+    int                          stage;
+    int                          threshold;
+    Depth                        depth;
+    int                          ply;
+    bool                         skipQuiets = false;
+    ExtMove                      moves[MAX_MOVES];
 };

-#endif // #ifndef MOVEPICK_H_INCLUDED
+}  // namespace Stockfish
+
+#endif  // #ifndef MOVEPICK_H_INCLUDED
@@ -1,54 +0,0 @@
-/*
-  Stockfish, a UCI chess playing engine derived from Glaurung 2.1
-  Copyright (C) 2004-2021 The Stockfish developers (see AUTHORS file)
-
-  Stockfish is free software: you can redistribute it and/or modify
-  it under the terms of the GNU General Public License as published by
-  the Free Software Foundation, either version 3 of the License, or
-  (at your option) any later version.
-
-  Stockfish is distributed in the hope that it will be useful,
-  but WITHOUT ANY WARRANTY; without even the implied warranty of
-  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-  GNU General Public License for more details.
-
-  You should have received a copy of the GNU General Public License
-  along with this program.  If not, see <http://www.gnu.org/licenses/>.
-*/
-
-// Definition of input features and network structure used in NNUE evaluation function
-
-#ifndef NNUE_HALFKP_256X2_32_32_H_INCLUDED
-#define NNUE_HALFKP_256X2_32_32_H_INCLUDED
-
-#include "../features/feature_set.h"
-#include "../features/half_kp.h"
-
-#include "../layers/input_slice.h"
-#include "../layers/affine_transform.h"
-#include "../layers/clipped_relu.h"
-
-namespace Eval::NNUE {
-
-// Input features used in evaluation function
-using RawFeatures = Features::FeatureSet<
-    Features::HalfKP<Features::Side::kFriend>>;
-
-// Number of input feature dimensions after conversion
-constexpr IndexType kTransformedFeatureDimensions = 256;
-
-namespace Layers {
-
-// Define network structure
-using InputLayer = InputSlice<kTransformedFeatureDimensions * 2>;
-using HiddenLayer1 = ClippedReLU<AffineTransform<InputLayer, 32>>;
-using HiddenLayer2 = ClippedReLU<AffineTransform<HiddenLayer1, 32>>;
-using OutputLayer = AffineTransform<HiddenLayer2, 1>;
-
-}  // namespace Layers
-
-using Network = Layers::OutputLayer;
-
-}  // namespace Eval::NNUE
-
-#endif // #ifndef NNUE_HALFKP_256X2_32_32_H_INCLUDED
@@ -1,144 +0,0 @@
-/*
-  Stockfish, a UCI chess playing engine derived from Glaurung 2.1
-  Copyright (C) 2004-2021 The Stockfish developers (see AUTHORS file)
-
-  Stockfish is free software: you can redistribute it and/or modify
-  it under the terms of the GNU General Public License as published by
-  the Free Software Foundation, either version 3 of the License, or
-  (at your option) any later version.
-
-  Stockfish is distributed in the hope that it will be useful,
-  but WITHOUT ANY WARRANTY; without even the implied warranty of
-  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-  GNU General Public License for more details.
-
-  You should have received a copy of the GNU General Public License
-  along with this program.  If not, see <http://www.gnu.org/licenses/>.
-*/
-
-// Code for calculating NNUE evaluation function
-
-#include <iostream>
-#include <set>
-
-#include "../evaluate.h"
-#include "../position.h"
-#include "../misc.h"
-#include "../uci.h"
-#include "../types.h"
-
-#include "evaluate_nnue.h"
-
-namespace Eval::NNUE {
-
-  // Input feature converter
-  LargePagePtr<FeatureTransformer> feature_transformer;
-
-  // Evaluation function
-  AlignedPtr<Network> network;
-
-  // Evaluation function file name
-  std::string fileName;
-
-  namespace Detail {
-
-  // Initialize the evaluation function parameters
-  template <typename T>
-  void Initialize(AlignedPtr<T>& pointer) {
-
-    pointer.reset(reinterpret_cast<T*>(std_aligned_alloc(alignof(T), sizeof(T))));
-    std::memset(pointer.get(), 0, sizeof(T));
-  }
-
-  template <typename T>
-  void Initialize(LargePagePtr<T>& pointer) {
-
-    static_assert(alignof(T) <= 4096, "aligned_large_pages_alloc() may fail for such a big alignment requirement of T");
-    pointer.reset(reinterpret_cast<T*>(aligned_large_pages_alloc(sizeof(T))));
-    std::memset(pointer.get(), 0, sizeof(T));
-  }
-
-  // Read evaluation function parameters
-  template <typename T>
-  bool ReadParameters(std::istream& stream, T& reference) {
-
-    std::uint32_t header;
-    header = read_little_endian<std::uint32_t>(stream);
-    if (!stream || header != T::GetHashValue()) return false;
-    return reference.ReadParameters(stream);
-  }
-
-  }  // namespace Detail
-
-  // Initialize the evaluation function parameters
-  void Initialize() {
-
-    Detail::Initialize(feature_transformer);
-    Detail::Initialize(network);
-  }
-
-  // Read network header
-  bool ReadHeader(std::istream& stream, std::uint32_t* hash_value, std::string* architecture)
-  {
-    std::uint32_t version, size;
-
-    version     = read_little_endian<std::uint32_t>(stream);
-    *hash_value = read_little_endian<std::uint32_t>(stream);
-    size        = read_little_endian<std::uint32_t>(stream);
-    if (!stream || version != kVersion) return false;
-    architecture->resize(size);
-    stream.read(&(*architecture)[0], size);
-    return !stream.fail();
-  }
-
-  // Read network parameters
-  bool ReadParameters(std::istream& stream) {
-
-    std::uint32_t hash_value;
-    std::string architecture;
-    if (!ReadHeader(stream, &hash_value, &architecture)) return false;
-    if (hash_value != kHashValue) return false;
-    if (!Detail::ReadParameters(stream, *feature_transformer)) return false;
-    if (!Detail::ReadParameters(stream, *network)) return false;
-    return stream && stream.peek() == std::ios::traits_type::eof();
-  }
-
-  // Evaluation function. Perform differential calculation.
-  Value evaluate(const Position& pos) {
-
-    // We manually align the arrays on the stack because with gcc < 9.3
-    // overaligning stack variables with alignas() doesn't work correctly.
-
-    constexpr uint64_t alignment = kCacheLineSize;
-
-#if defined(ALIGNAS_ON_STACK_VARIABLES_BROKEN)
-    TransformedFeatureType transformed_features_unaligned[
-      FeatureTransformer::kBufferSize + alignment / sizeof(TransformedFeatureType)];
-    char buffer_unaligned[Network::kBufferSize + alignment];
-
-    auto* transformed_features = align_ptr_up<alignment>(&transformed_features_unaligned[0]);
-    auto* buffer = align_ptr_up<alignment>(&buffer_unaligned[0]);
-#else
-    alignas(alignment)
-      TransformedFeatureType transformed_features[FeatureTransformer::kBufferSize];
-    alignas(alignment) char buffer[Network::kBufferSize];
-#endif
-
-    ASSERT_ALIGNED(transformed_features, alignment);
-    ASSERT_ALIGNED(buffer, alignment);
-
-    feature_transformer->Transform(pos, transformed_features);
-    const auto output = network->Propagate(transformed_features, buffer);
-
-    return static_cast<Value>(output[0] / FV_SCALE);
-  }
-
-  // Load eval, from a file stream or a memory stream
-  bool load_eval(std::string name, std::istream& stream) {
-
-    Initialize();
-    fileName = name;
-    return ReadParameters(stream);
-  }
-
-} // namespace Eval::NNUE
@@ -1,59 +0,0 @@
-/*
-  Stockfish, a UCI chess playing engine derived from Glaurung 2.1
-  Copyright (C) 2004-2021 The Stockfish developers (see AUTHORS file)
-
-  Stockfish is free software: you can redistribute it and/or modify
-  it under the terms of the GNU General Public License as published by
-  the Free Software Foundation, either version 3 of the License, or
-  (at your option) any later version.
-
-  Stockfish is distributed in the hope that it will be useful,
-  but WITHOUT ANY WARRANTY; without even the implied warranty of
-  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-  GNU General Public License for more details.
-
-  You should have received a copy of the GNU General Public License
-  along with this program.  If not, see <http://www.gnu.org/licenses/>.
-*/
-
-// header used in NNUE evaluation function
-
-#ifndef NNUE_EVALUATE_NNUE_H_INCLUDED
-#define NNUE_EVALUATE_NNUE_H_INCLUDED
-
-#include "nnue_feature_transformer.h"
-
-#include <memory>
-
-namespace Eval::NNUE {
-
-  // Hash value of evaluation function structure
-  constexpr std::uint32_t kHashValue =
-      FeatureTransformer::GetHashValue() ^ Network::GetHashValue();
-
-  // Deleter for automating release of memory area
-  template <typename T>
-  struct AlignedDeleter {
-    void operator()(T* ptr) const {
-      ptr->~T();
-      std_aligned_free(ptr);
-    }
-  };
-
-  template <typename T>
-  struct LargePageDeleter {
-    void operator()(T* ptr) const {
-      ptr->~T();
-      aligned_large_pages_free(ptr);
-    }
-  };
-
-  template <typename T>
-  using AlignedPtr = std::unique_ptr<T, AlignedDeleter<T>>;
-
-  template <typename T>
-  using LargePagePtr = std::unique_ptr<T, LargePageDeleter<T>>;
-
-}  // namespace Eval::NNUE
-
-#endif // #ifndef NNUE_EVALUATE_NNUE_H_INCLUDED
@@ -1,69 +0,0 @@
-/*
-  Stockfish, a UCI chess playing engine derived from Glaurung 2.1
-  Copyright (C) 2004-2021 The Stockfish developers (see AUTHORS file)
-
-  Stockfish is free software: you can redistribute it and/or modify
-  it under the terms of the GNU General Public License as published by
-  the Free Software Foundation, either version 3 of the License, or
-  (at your option) any later version.
-
-  Stockfish is distributed in the hope that it will be useful,
-  but WITHOUT ANY WARRANTY; without even the implied warranty of
-  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-  GNU General Public License for more details.
-
-  You should have received a copy of the GNU General Public License
-  along with this program.  If not, see <http://www.gnu.org/licenses/>.
-*/
-
-// A class template that represents the input feature set of the NNUE evaluation function
-
-#ifndef NNUE_FEATURE_SET_H_INCLUDED
-#define NNUE_FEATURE_SET_H_INCLUDED
-
-#include "features_common.h"
-#include <array>
-
-namespace Eval::NNUE::Features {
-
-  // Class template that represents a list of values
-  template <typename T, T... Values>
-  struct CompileTimeList;
-
-  template <typename T, T First, T... Remaining>
-  struct CompileTimeList<T, First, Remaining...> {
-    static constexpr bool Contains(T value) {
-      return value == First || CompileTimeList<T, Remaining...>::Contains(value);
-    }
-    static constexpr std::array<T, sizeof...(Remaining) + 1>
-        kValues = {{First, Remaining...}};
-  };
-
-  // Base class of feature set
-  template <typename Derived>
-  class FeatureSetBase {
-
-  };
-
-  // Class template that represents the feature set
-  template <typename FeatureType>
-  class FeatureSet<FeatureType> : public FeatureSetBase<FeatureSet<FeatureType>> {
-
-   public:
-    // Hash value embedded in the evaluation file
-    static constexpr std::uint32_t kHashValue = FeatureType::kHashValue;
-    // Number of feature dimensions
-    static constexpr IndexType kDimensions = FeatureType::kDimensions;
-    // Maximum number of simultaneously active features
-    static constexpr IndexType kMaxActiveDimensions =
-        FeatureType::kMaxActiveDimensions;
-    // Trigger for full calculation instead of difference calculation
-    using SortedTriggerSet =
-        CompileTimeList<TriggerEvent, FeatureType::kRefreshTrigger>;
-    static constexpr auto kRefreshTriggers = SortedTriggerSet::kValues;
-
-  };
-
-}  // namespace Eval::NNUE::Features
-
-#endif // #ifndef NNUE_FEATURE_SET_H_INCLUDED
@@ -0,0 +1,88 @@
+/*
+  Stockfish, a UCI chess playing engine derived from Glaurung 2.1
+  Copyright (C) 2004-2025 The Stockfish developers (see AUTHORS file)
+
+  Stockfish is free software: you can redistribute it and/or modify
+  it under the terms of the GNU General Public License as published by
+  the Free Software Foundation, either version 3 of the License, or
+  (at your option) any later version.
+
+  Stockfish is distributed in the hope that it will be useful,
+  but WITHOUT ANY WARRANTY; without even the implied warranty of
+  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+  GNU General Public License for more details.
+
+  You should have received a copy of the GNU General Public License
+  along with this program.  If not, see <http://www.gnu.org/licenses/>.
+*/
+
+//Definition of input features HalfKAv2_hm of NNUE evaluation function
+
+#include "half_ka_v2_hm.h"
+
+#include "../../bitboard.h"
+#include "../../position.h"
+#include "../../types.h"
+#include "../nnue_accumulator.h"
+
+namespace Stockfish::Eval::NNUE::Features {
+
+// Index of a feature for a given king position and another piece on some square
+template<Color Perspective>
+inline IndexType HalfKAv2_hm::make_index(Square s, Piece pc, Square ksq) {
+    return IndexType((int(s) ^ OrientTBL[Perspective][ksq]) + PieceSquareIndex[Perspective][pc]
+                     + KingBuckets[Perspective][ksq]);
+}
+
+// Get a list of indices for active features
+template<Color Perspective>
+void HalfKAv2_hm::append_active_indices(const Position& pos, IndexList& active) {
+    Square   ksq = pos.square<KING>(Perspective);
+    Bitboard bb  = pos.pieces();
+    while (bb)
+    {
+        Square s = pop_lsb(bb);
+        active.push_back(make_index<Perspective>(s, pos.piece_on(s), ksq));
+    }
+}
+
+// Explicit template instantiations
+template void HalfKAv2_hm::append_active_indices<WHITE>(const Position& pos, IndexList& active);
+template void HalfKAv2_hm::append_active_indices<BLACK>(const Position& pos, IndexList& active);
+template IndexType HalfKAv2_hm::make_index<WHITE>(Square s, Piece pc, Square ksq);
+template IndexType HalfKAv2_hm::make_index<BLACK>(Square s, Piece pc, Square ksq);
+
+// Get a list of indices for recently changed features
+template<Color Perspective>
+void HalfKAv2_hm::append_changed_indices(Square            ksq,
+                                         const DirtyPiece& dp,
+                                         IndexList&        removed,
+                                         IndexList&        added) {
+    for (int i = 0; i < dp.dirty_num; ++i)
+    {
+        if (dp.from[i] != SQ_NONE)
+            removed.push_back(make_index<Perspective>(dp.from[i], dp.piece[i], ksq));
+        if (dp.to[i] != SQ_NONE)
+            added.push_back(make_index<Perspective>(dp.to[i], dp.piece[i], ksq));
+    }
+}
+
+// Explicit template instantiations
+template void HalfKAv2_hm::append_changed_indices<WHITE>(Square            ksq,
+                                                         const DirtyPiece& dp,
+                                                         IndexList&        removed,
+                                                         IndexList&        added);
+template void HalfKAv2_hm::append_changed_indices<BLACK>(Square            ksq,
+                                                         const DirtyPiece& dp,
+                                                         IndexList&        removed,
+                                                         IndexList&        added);
+
+int HalfKAv2_hm::update_cost(const StateInfo* st) { return st->dirtyPiece.dirty_num; }
+
+int HalfKAv2_hm::refresh_cost(const Position& pos) { return pos.count<ALL_PIECES>(); }
+
+bool HalfKAv2_hm::requires_refresh(const StateInfo* st, Color perspective) {
+    return st->dirtyPiece.piece[0] == make_piece(perspective, KING);
+}
+
+}  // namespace Stockfish::Eval::NNUE::Features
@@ -0,0 +1,150 @@
+/*
+  Stockfish, a UCI chess playing engine derived from Glaurung 2.1
+  Copyright (C) 2004-2025 The Stockfish developers (see AUTHORS file)
+
+  Stockfish is free software: you can redistribute it and/or modify
+  it under the terms of the GNU General Public License as published by
+  the Free Software Foundation, either version 3 of the License, or
+  (at your option) any later version.
+
+  Stockfish is distributed in the hope that it will be useful,
+  but WITHOUT ANY WARRANTY; without even the implied warranty of
+  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+  GNU General Public License for more details.
+
+  You should have received a copy of the GNU General Public License
+  along with this program.  If not, see <http://www.gnu.org/licenses/>.
+*/
+
+//Definition of input features HalfKP of NNUE evaluation function
+
+#ifndef NNUE_FEATURES_HALF_KA_V2_HM_H_INCLUDED
+#define NNUE_FEATURES_HALF_KA_V2_HM_H_INCLUDED
+
+#include <cstdint>
+
+#include "../../misc.h"
+#include "../../types.h"
+#include "../nnue_common.h"
+
+namespace Stockfish {
+struct StateInfo;
+class Position;
+}
+
+namespace Stockfish::Eval::NNUE::Features {
+
+// Feature HalfKAv2_hm: Combination of the position of own king and the
+// position of pieces. Position mirrored such that king is always on e..h files.
+class HalfKAv2_hm {
+
+    // Unique number for each piece type on each square
+    enum {
+        PS_NONE     = 0,
+        PS_W_PAWN   = 0,
+        PS_B_PAWN   = 1 * SQUARE_NB,
+        PS_W_KNIGHT = 2 * SQUARE_NB,
+        PS_B_KNIGHT = 3 * SQUARE_NB,
+        PS_W_BISHOP = 4 * SQUARE_NB,
+        PS_B_BISHOP = 5 * SQUARE_NB,
+        PS_W_ROOK   = 6 * SQUARE_NB,
+        PS_B_ROOK   = 7 * SQUARE_NB,
+        PS_W_QUEEN  = 8 * SQUARE_NB,
+        PS_B_QUEEN  = 9 * SQUARE_NB,
+        PS_KING     = 10 * SQUARE_NB,
+        PS_NB       = 11 * SQUARE_NB
+    };
+
+    static constexpr IndexType PieceSquareIndex[COLOR_NB][PIECE_NB] = {
+      // Convention: W - us, B - them
+      // Viewed from other side, W and B are reversed
+      {PS_NONE, PS_W_PAWN, PS_W_KNIGHT, PS_W_BISHOP, PS_W_ROOK, PS_W_QUEEN, PS_KING, PS_NONE,
+       PS_NONE, PS_B_PAWN, PS_B_KNIGHT, PS_B_BISHOP, PS_B_ROOK, PS_B_QUEEN, PS_KING, PS_NONE},
+      {PS_NONE, PS_B_PAWN, PS_B_KNIGHT, PS_B_BISHOP, PS_B_ROOK, PS_B_QUEEN, PS_KING, PS_NONE,
+       PS_NONE, PS_W_PAWN, PS_W_KNIGHT, PS_W_BISHOP, PS_W_ROOK, PS_W_QUEEN, PS_KING, PS_NONE}};
+
+   public:
+    // Feature name
+    static constexpr const char* Name = "HalfKAv2_hm(Friend)";
+
+    // Hash value embedded in the evaluation file
+    static constexpr std::uint32_t HashValue = 0x7f234cb8u;
+
+    // Number of feature dimensions
+    static constexpr IndexType Dimensions =
+      static_cast<IndexType>(SQUARE_NB) * static_cast<IndexType>(PS_NB) / 2;
+
+#define B(v) (v * PS_NB)
+    // clang-format off
+    static constexpr int KingBuckets[COLOR_NB][SQUARE_NB] = {
+      { B(28), B(29), B(30), B(31), B(31), B(30), B(29), B(28),
+        B(24), B(25), B(26), B(27), B(27), B(26), B(25), B(24),
+        B(20), B(21), B(22), B(23), B(23), B(22), B(21), B(20),
+        B(16), B(17), B(18), B(19), B(19), B(18), B(17), B(16),
+        B(12), B(13), B(14), B(15), B(15), B(14), B(13), B(12),
+        B( 8), B( 9), B(10), B(11), B(11), B(10), B( 9), B( 8),
+        B( 4), B( 5), B( 6), B( 7), B( 7), B( 6), B( 5), B( 4),
+        B( 0), B( 1), B( 2), B( 3), B( 3), B( 2), B( 1), B( 0) },
+      { B( 0), B( 1), B( 2), B( 3), B( 3), B( 2), B( 1), B( 0),
+        B( 4), B( 5), B( 6), B( 7), B( 7), B( 6), B( 5), B( 4),
+        B( 8), B( 9), B(10), B(11), B(11), B(10), B( 9), B( 8),
+        B(12), B(13), B(14), B(15), B(15), B(14), B(13), B(12),
+        B(16), B(17), B(18), B(19), B(19), B(18), B(17), B(16),
+        B(20), B(21), B(22), B(23), B(23), B(22), B(21), B(20),
+        B(24), B(25), B(26), B(27), B(27), B(26), B(25), B(24),
+        B(28), B(29), B(30), B(31), B(31), B(30), B(29), B(28) }
+    };
+    // clang-format on
+#undef B
+    // clang-format off
+    // Orient a square according to perspective (rotates by 180 for black)
+    static constexpr int OrientTBL[COLOR_NB][SQUARE_NB] = {
+      { SQ_H1, SQ_H1, SQ_H1, SQ_H1, SQ_A1, SQ_A1, SQ_A1, SQ_A1,
+        SQ_H1, SQ_H1, SQ_H1, SQ_H1, SQ_A1, SQ_A1, SQ_A1, SQ_A1,
+        SQ_H1, SQ_H1, SQ_H1, SQ_H1, SQ_A1, SQ_A1, SQ_A1, SQ_A1,
+        SQ_H1, SQ_H1, SQ_H1, SQ_H1, SQ_A1, SQ_A1, SQ_A1, SQ_A1,
+        SQ_H1, SQ_H1, SQ_H1, SQ_H1, SQ_A1, SQ_A1, SQ_A1, SQ_A1,
+        SQ_H1, SQ_H1, SQ_H1, SQ_H1, SQ_A1, SQ_A1, SQ_A1, SQ_A1,
+        SQ_H1, SQ_H1, SQ_H1, SQ_H1, SQ_A1, SQ_A1, SQ_A1, SQ_A1,
+        SQ_H1, SQ_H1, SQ_H1, SQ_H1, SQ_A1, SQ_A1, SQ_A1, SQ_A1 },
+      { SQ_H8, SQ_H8, SQ_H8, SQ_H8, SQ_A8, SQ_A8, SQ_A8, SQ_A8,
+        SQ_H8, SQ_H8, SQ_H8, SQ_H8, SQ_A8, SQ_A8, SQ_A8, SQ_A8,
+        SQ_H8, SQ_H8, SQ_H8, SQ_H8, SQ_A8, SQ_A8, SQ_A8, SQ_A8,
+        SQ_H8, SQ_H8, SQ_H8, SQ_H8, SQ_A8, SQ_A8, SQ_A8, SQ_A8,
+        SQ_H8, SQ_H8, SQ_H8, SQ_H8, SQ_A8, SQ_A8, SQ_A8, SQ_A8,
+        SQ_H8, SQ_H8, SQ_H8, SQ_H8, SQ_A8, SQ_A8, SQ_A8, SQ_A8,
+        SQ_H8, SQ_H8, SQ_H8, SQ_H8, SQ_A8, SQ_A8, SQ_A8, SQ_A8,
+        SQ_H8, SQ_H8, SQ_H8, SQ_H8, SQ_A8, SQ_A8, SQ_A8, SQ_A8 }
+    };
+    // clang-format on
+
+    // Maximum number of simultaneously active features.
+    static constexpr IndexType MaxActiveDimensions = 32;
+    using IndexList                                = ValueList<IndexType, MaxActiveDimensions>;
+
+    // Index of a feature for a given king position and another piece on some square
+    template<Color Perspective>
+    static IndexType make_index(Square s, Piece pc, Square ksq);
+
+    // Get a list of indices for active features
+    template<Color Perspective>
+    static void append_active_indices(const Position& pos, IndexList& active);
+
+    // Get a list of indices for recently changed features
+    template<Color Perspective>
+    static void
+    append_changed_indices(Square ksq, const DirtyPiece& dp, IndexList& removed, IndexList& added);
+
+    // Returns the cost of updating one perspective, the most costly one.
+    // Assumes no refresh needed.
+    static int update_cost(const StateInfo* st);
+    static int refresh_cost(const Position& pos);
+
+    // Returns whether the change stored in this StateInfo means
+    // that a full accumulator refresh is required.
+    static bool requires_refresh(const StateInfo* st, Color perspective);
+};
+
+}  // namespace Stockfish::Eval::NNUE::Features
+
+#endif  // #ifndef NNUE_FEATURES_HALF_KA_V2_HM_H_INCLUDED
@@ -1,68 +0,0 @@
-/*
-  Stockfish, a UCI chess playing engine derived from Glaurung 2.1
-  Copyright (C) 2004-2021 The Stockfish developers (see AUTHORS file)
-
-  Stockfish is free software: you can redistribute it and/or modify
-  it under the terms of the GNU General Public License as published by
-  the Free Software Foundation, either version 3 of the License, or
-  (at your option) any later version.
-
-  Stockfish is distributed in the hope that it will be useful,
-  but WITHOUT ANY WARRANTY; without even the implied warranty of
-  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-  GNU General Public License for more details.
-
-  You should have received a copy of the GNU General Public License
-  along with this program.  If not, see <http://www.gnu.org/licenses/>.
-*/
-
-//Definition of input features HalfKP of NNUE evaluation function
-
-#include "half_kp.h"
-#include "index_list.h"
-
-namespace Eval::NNUE::Features {
-
-  // Orient a square according to perspective (rotates by 180 for black)
-  inline Square orient(Color perspective, Square s) {
-    return Square(int(s) ^ (bool(perspective) * 63));
-  }
-
-  // Index of a feature for a given king position and another piece on some square
-  inline IndexType make_index(Color perspective, Square s, Piece pc, Square ksq) {
-    return IndexType(orient(perspective, s) + kpp_board_index[perspective][pc] + PS_END * ksq);
-  }
-
-  // Get a list of indices for active features
-  template <Side AssociatedKing>
-  void HalfKP<AssociatedKing>::AppendActiveIndices(
-      const Position& pos, Color perspective, IndexList* active) {
-
-    Square ksq = orient(perspective, pos.square<KING>(perspective));
-    Bitboard bb = pos.pieces() & ~pos.pieces(KING);
-    while (bb) {
-      Square s = pop_lsb(&bb);
-      active->push_back(make_index(perspective, s, pos.piece_on(s), ksq));
-    }
-  }
-
-  // Get a list of indices for recently changed features
-  template <Side AssociatedKing>
-  void HalfKP<AssociatedKing>::AppendChangedIndices(
-      const Position& pos, const DirtyPiece& dp, Color perspective,
-      IndexList* removed, IndexList* added) {
-
-    Square ksq = orient(perspective, pos.square<KING>(perspective));
-    for (int i = 0; i < dp.dirty_num; ++i) {
-      Piece pc = dp.piece[i];
-      if (type_of(pc) == KING) continue;
-      if (dp.from[i] != SQ_NONE)
-        removed->push_back(make_index(perspective, dp.from[i], pc, ksq));
-      if (dp.to[i] != SQ_NONE)
-        added->push_back(make_index(perspective, dp.to[i], pc, ksq));
-    }
-  }
-
-  template class HalfKP<Side::kFriend>;
-
-}  // namespace Eval::NNUE::Features
@@ -1,59 +0,0 @@
-/*
-  Stockfish, a UCI chess playing engine derived from Glaurung 2.1
-  Copyright (C) 2004-2021 The Stockfish developers (see AUTHORS file)
-
-  Stockfish is free software: you can redistribute it and/or modify
-  it under the terms of the GNU General Public License as published by
-  the Free Software Foundation, either version 3 of the License, or
-  (at your option) any later version.
-
-  Stockfish is distributed in the hope that it will be useful,
-  but WITHOUT ANY WARRANTY; without even the implied warranty of
-  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-  GNU General Public License for more details.
-
-  You should have received a copy of the GNU General Public License
-  along with this program.  If not, see <http://www.gnu.org/licenses/>.
-*/
-
-//Definition of input features HalfKP of NNUE evaluation function
-
-#ifndef NNUE_FEATURES_HALF_KP_H_INCLUDED
-#define NNUE_FEATURES_HALF_KP_H_INCLUDED
-
-#include "../../evaluate.h"
-#include "features_common.h"
-
-namespace Eval::NNUE::Features {
-
-  // Feature HalfKP: Combination of the position of own king
-  // and the position of pieces other than kings
-  template <Side AssociatedKing>
-  class HalfKP {
-
-   public:
-    // Feature name
-    static constexpr const char* kName = "HalfKP(Friend)";
-    // Hash value embedded in the evaluation file
-    static constexpr std::uint32_t kHashValue =
-        0x5D69D5B9u ^ (AssociatedKing == Side::kFriend);
-    // Number of feature dimensions
-    static constexpr IndexType kDimensions =
-        static_cast<IndexType>(SQUARE_NB) * static_cast<IndexType>(PS_END);
-    // Maximum number of simultaneously active features
-    static constexpr IndexType kMaxActiveDimensions = 30; // Kings don't count
-    // Trigger for full calculation instead of difference calculation
-    static constexpr TriggerEvent kRefreshTrigger = TriggerEvent::kFriendKingMoved;
-
-    // Get a list of indices for active features
-    static void AppendActiveIndices(const Position& pos, Color perspective,
-                                    IndexList* active);
-
-    // Get a list of indices for recently changed features
-    static void AppendChangedIndices(const Position& pos, const DirtyPiece& dp, Color perspective,
-                                     IndexList* removed, IndexList* added);
-  };
-
-}  // namespace Eval::NNUE::Features
-
-#endif // #ifndef NNUE_FEATURES_HALF_KP_H_INCLUDED
@@ -1,64 +0,0 @@
-/*
-  Stockfish, a UCI chess playing engine derived from Glaurung 2.1
-  Copyright (C) 2004-2021 The Stockfish developers (see AUTHORS file)
-
-  Stockfish is free software: you can redistribute it and/or modify
-  it under the terms of the GNU General Public License as published by
-  the Free Software Foundation, either version 3 of the License, or
-  (at your option) any later version.
-
-  Stockfish is distributed in the hope that it will be useful,
-  but WITHOUT ANY WARRANTY; without even the implied warranty of
-  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-  GNU General Public License for more details.
-
-  You should have received a copy of the GNU General Public License
-  along with this program.  If not, see <http://www.gnu.org/licenses/>.
-*/
-
-// Definition of index list of input features
-
-#ifndef NNUE_FEATURES_INDEX_LIST_H_INCLUDED
-#define NNUE_FEATURES_INDEX_LIST_H_INCLUDED
-
-#include "../../position.h"
-#include "../nnue_architecture.h"
-
-namespace Eval::NNUE::Features {
-
-  // Class template used for feature index list
-  template <typename T, std::size_t MaxSize>
-  class ValueList {
-
-   public:
-    std::size_t size() const { return size_; }
-    void resize(std::size_t size) { size_ = size; }
-    void push_back(const T& value) { values_[size_++] = value; }
-    T& operator[](std::size_t index) { return values_[index]; }
-    T* begin() { return values_; }
-    T* end() { return values_ + size_; }
-    const T& operator[](std::size_t index) const { return values_[index]; }
-    const T* begin() const { return values_; }
-    const T* end() const { return values_ + size_; }
-
-    void swap(ValueList& other) {
-      const std::size_t max_size = std::max(size_, other.size_);
-      for (std::size_t i = 0; i < max_size; ++i) {
-        std::swap(values_[i], other.values_[i]);
-      }
-      std::swap(size_, other.size_);
-    }
-
-   private:
-    T values_[MaxSize];
-    std::size_t size_ = 0;
-  };
-
-  //Type of feature index list
-  class IndexList
-      : public ValueList<IndexType, RawFeatures::kMaxActiveDimensions> {
-  };
-
-}  // namespace Eval::NNUE::Features
-
-#endif // NNUE_FEATURES_INDEX_LIST_H_INCLUDED
@@ -1,6 +1,6 @@
 /*
  Stockfish, a UCI chess playing engine derived from Glaurung 2.1
-  Copyright (C) 2004-2021 The Stockfish developers (see AUTHORS file)
+  Copyright (C) 2004-2025 The Stockfish developers (see AUTHORS file)

  Stockfish is free software: you can redistribute it and/or modify
  it under the terms of the GNU General Public License as published by
@@ -21,444 +21,286 @@
 #ifndef NNUE_LAYERS_AFFINE_TRANSFORM_H_INCLUDED
 #define NNUE_LAYERS_AFFINE_TRANSFORM_H_INCLUDED

+#include <cstdint>
 #include <iostream>
+
 #include "../nnue_common.h"
+#include "simd.h"

-namespace Eval::NNUE::Layers {
+/*
+  This file contains the definition for a fully connected layer (aka affine transform).

-  // Affine transformation layer
-  template <typename PreviousLayer, IndexType OutputDimensions>
-  class AffineTransform {
-   public:
-    // Input/output type
-    using InputType = typename PreviousLayer::OutputType;
-    using OutputType = std::int32_t;
-    static_assert(std::is_same<InputType, std::uint8_t>::value, "");
+    - expected use-case is for when PaddedInputDimensions == 32 and InputDimensions <= 32.
+      - that's why AVX512 is hard to implement
+    - expected use-case is small layers
+    - inputs are processed in chunks of 4, weights are respectively transposed
+    - accumulation happens directly to int32s
+*/

-    // Number of input/output dimensions
-    static constexpr IndexType kInputDimensions =
-        PreviousLayer::kOutputDimensions;
-    static constexpr IndexType kOutputDimensions = OutputDimensions;
-    static constexpr IndexType kPaddedInputDimensions =
-        CeilToMultiple<IndexType>(kInputDimensions, kMaxSimdWidth);
-#if defined (USE_AVX512)
-    static constexpr const IndexType kOutputSimdWidth = kSimdWidth / 2;
-#elif defined (USE_SSSE3)
-    static constexpr const IndexType kOutputSimdWidth = kSimdWidth / 4;
+namespace Stockfish::Eval::NNUE::Layers {
+
+#if defined(USE_SSSE3) || defined(USE_NEON_DOTPROD)
+    #define ENABLE_SEQ_OPT
 #endif

-    // Size of forward propagation buffer used in this layer
-    static constexpr std::size_t kSelfBufferSize =
-        CeilToMultiple(kOutputDimensions * sizeof(OutputType), kCacheLineSize);
+// Fallback implementation for older/other architectures.
+// Requires the input to be padded to at least 16 values.
+#ifndef ENABLE_SEQ_OPT

-    // Size of the forward propagation buffer used from the input layer to this layer
-    static constexpr std::size_t kBufferSize =
-        PreviousLayer::kBufferSize + kSelfBufferSize;
+template<IndexType InputDimensions, IndexType PaddedInputDimensions, IndexType OutputDimensions>
+static void affine_transform_non_ssse3(std::int32_t*       output,
+                                       const std::int8_t*  weights,
+                                       const std::int32_t* biases,
+                                       const std::uint8_t* input) {
+    #if defined(USE_SSE2) || defined(USE_NEON)
+        #if defined(USE_SSE2)
+    // At least a multiple of 16, with SSE2.
+    constexpr IndexType NumChunks   = ceil_to_multiple<IndexType>(InputDimensions, 16) / 16;
+    const __m128i       Zeros       = _mm_setzero_si128();
+    const auto          inputVector = reinterpret_cast<const __m128i*>(input);

-    // Hash value embedded in the evaluation file
-    static constexpr std::uint32_t GetHashValue() {
-      std::uint32_t hash_value = 0xCC03DAE4u;
-      hash_value += kOutputDimensions;
-      hash_value ^= PreviousLayer::GetHashValue() >> 1;
-      hash_value ^= PreviousLayer::GetHashValue() << 31;
-      return hash_value;
-    }
+        #elif defined(USE_NEON)
+    constexpr IndexType NumChunks   = ceil_to_multiple<IndexType>(InputDimensions, 16) / 16;
+    const auto          inputVector = reinterpret_cast<const int8x8_t*>(input);
+        #endif

-   // Read network parameters
-    bool ReadParameters(std::istream& stream) {
-      if (!previous_layer_.ReadParameters(stream)) return false;
-      for (std::size_t i = 0; i < kOutputDimensions; ++i)
-        biases_[i] = read_little_endian<BiasType>(stream);
-      for (std::size_t i = 0; i < kOutputDimensions * kPaddedInputDimensions; ++i)
-#if !defined (USE_SSSE3)
-        weights_[i] = read_little_endian<WeightType>(stream);
-#else
-        weights_[
-          (i / 4) % (kPaddedInputDimensions / 4) * kOutputDimensions * 4 +
-          i / kPaddedInputDimensions * 4 +
-          i % 4
-        ] = read_little_endian<WeightType>(stream);
+    for (IndexType i = 0; i < OutputDimensions; ++i)
+    {
+        const IndexType offset = i * PaddedInputDimensions;

-      // Determine if eights of weight and input products can be summed using 16bits
-      // without saturation. We assume worst case combinations of 0 and 127 for all inputs.
-      if (kOutputDimensions > 1 && !stream.fail())
-      {
-          canSaturate16.count = 0;
-#if !defined(USE_VNNI)
-          for (IndexType i = 0; i < kPaddedInputDimensions; i += 16)
-              for (IndexType j = 0; j < kOutputDimensions; ++j)
-                  for (int x = 0; x < 2; ++x)
-                  {
-                      WeightType* w = &weights_[i * kOutputDimensions + j * 4 + x * 2];
-                      int sum[2] = {0, 0};
-                      for (int k = 0; k < 8; ++k)
-                      {
-                          IndexType idx = k / 2 * kOutputDimensions * 4 + k % 2;
-                          sum[w[idx] < 0] += w[idx];
-                      }
-                      for (int sign : {-1, 1})
-                          while (sign * sum[sign == -1] > 258)
-                          {
-                              int maxK = 0, maxW = 0;
-                              for (int k = 0; k < 8; ++k)
-                              {
-                                  IndexType idx = k / 2 * kOutputDimensions * 4 + k % 2;
-                                  if (maxW < sign * w[idx])
-                                      maxK = k, maxW = sign * w[idx];
-                              }
-
-                              IndexType idx = maxK / 2 * kOutputDimensions * 4 + maxK % 2;
-                              sum[sign == -1] -= w[idx];
-                              canSaturate16.add(j, i + maxK / 2 * 4 + maxK % 2 + x * 2, w[idx]);
-                              w[idx] = 0;
-                          }
-                  }
-
-          // Non functional optimization for faster more linear access
-          std::sort(canSaturate16.ids, canSaturate16.ids + canSaturate16.count,
-                    [](const typename CanSaturate::Entry& e1, const typename CanSaturate::Entry& e2)
-                    { return e1.in == e2.in ? e1.out < e2.out : e1.in < e2.in; });
-#endif
-      }
-#endif
-
-      return !stream.fail();
-    }
-
-    // Forward propagation
-    const OutputType* Propagate(
-        const TransformedFeatureType* transformed_features, char* buffer) const {
-      const auto input = previous_layer_.Propagate(
-          transformed_features, buffer + kSelfBufferSize);
-
-#if defined (USE_AVX512)
-
-      [[maybe_unused]] const __m512i kOnes512 = _mm512_set1_epi16(1);
-
-      [[maybe_unused]] auto m512_hadd = [](__m512i sum, int bias) -> int {
-        return _mm512_reduce_add_epi32(sum) + bias;
-      };
-
-      [[maybe_unused]] auto m512_add_dpbusd_epi32 = [=](__m512i& acc, __m512i a, __m512i b) {
-#if defined (USE_VNNI)
-        acc = _mm512_dpbusd_epi32(acc, a, b);
-#else
-        __m512i product0 = _mm512_maddubs_epi16(a, b);
-        product0 = _mm512_madd_epi16(product0, kOnes512);
-        acc = _mm512_add_epi32(acc, product0);
-#endif
-      };
-
-      [[maybe_unused]] auto m512_add_dpbusd_epi32x4 = [=](__m512i& acc, __m512i a0, __m512i b0, __m512i a1, __m512i b1,
-                                                                        __m512i a2, __m512i b2, __m512i a3, __m512i b3) {
-#if defined (USE_VNNI)
-        acc = _mm512_dpbusd_epi32(acc, a0, b0);
-        acc = _mm512_dpbusd_epi32(acc, a1, b1);
-        acc = _mm512_dpbusd_epi32(acc, a2, b2);
-        acc = _mm512_dpbusd_epi32(acc, a3, b3);
-#else
-        __m512i product0 = _mm512_maddubs_epi16(a0, b0);
-        __m512i product1 = _mm512_maddubs_epi16(a1, b1);
-        __m512i product2 = _mm512_maddubs_epi16(a2, b2);
-        __m512i product3 = _mm512_maddubs_epi16(a3, b3);
-        product0 = _mm512_add_epi16(product0, product1);
-        product2 = _mm512_add_epi16(product2, product3);
-        product0 = _mm512_add_epi16(product0, product2);
-        product0 = _mm512_madd_epi16(product0, kOnes512);
-        acc = _mm512_add_epi32(acc, product0);
-#endif
-      };
-
-#endif
-#if defined (USE_AVX2)
-
-      [[maybe_unused]] const __m256i kOnes256 = _mm256_set1_epi16(1);
-
-      [[maybe_unused]] auto m256_hadd = [](__m256i sum, int bias) -> int {
-        __m128i sum128 = _mm_add_epi32(_mm256_castsi256_si128(sum), _mm256_extracti128_si256(sum, 1));
-        sum128 = _mm_add_epi32(sum128, _mm_shuffle_epi32(sum128, _MM_PERM_BADC));
-        sum128 = _mm_add_epi32(sum128, _mm_shuffle_epi32(sum128, _MM_PERM_CDAB));
-        return _mm_cvtsi128_si32(sum128) + bias;
-      };
-
-      [[maybe_unused]] auto m256_add_dpbusd_epi32 = [=](__m256i& acc, __m256i a, __m256i b) {
-#if defined (USE_VNNI)
-        acc = _mm256_dpbusd_epi32(acc, a, b);
-#else
-        __m256i product0 = _mm256_maddubs_epi16(a, b);
-        product0 = _mm256_madd_epi16(product0, kOnes256);
-        acc = _mm256_add_epi32(acc, product0);
-#endif
-      };
-
-      [[maybe_unused]] auto m256_add_dpbusd_epi32x4 = [=](__m256i& acc, __m256i a0, __m256i b0, __m256i a1, __m256i b1,
-                                                                        __m256i a2, __m256i b2, __m256i a3, __m256i b3) {
-#if defined (USE_VNNI)
-        acc = _mm256_dpbusd_epi32(acc, a0, b0);
-        acc = _mm256_dpbusd_epi32(acc, a1, b1);
-        acc = _mm256_dpbusd_epi32(acc, a2, b2);
-        acc = _mm256_dpbusd_epi32(acc, a3, b3);
-#else
-        __m256i product0 = _mm256_maddubs_epi16(a0, b0);
-        __m256i product1 = _mm256_maddubs_epi16(a1, b1);
-        __m256i product2 = _mm256_maddubs_epi16(a2, b2);
-        __m256i product3 = _mm256_maddubs_epi16(a3, b3);
-        product0 = _mm256_add_epi16(product0, product1);
-        product2 = _mm256_add_epi16(product2, product3);
-        product0 = _mm256_add_epi16(product0, product2);
-        product0 = _mm256_madd_epi16(product0, kOnes256);
-        acc = _mm256_add_epi32(acc, product0);
-#endif
-      };
-
-#endif
-#if defined (USE_SSSE3)
-
-      [[maybe_unused]] const __m128i kOnes128 = _mm_set1_epi16(1);
-
-      [[maybe_unused]] auto m128_hadd = [](__m128i sum, int bias) -> int {
-        sum = _mm_add_epi32(sum, _mm_shuffle_epi32(sum, 0x4E)); //_MM_PERM_BADC
-        sum = _mm_add_epi32(sum, _mm_shuffle_epi32(sum, 0xB1)); //_MM_PERM_CDAB
-        return _mm_cvtsi128_si32(sum) + bias;
-      };
-
-      [[maybe_unused]] auto m128_add_dpbusd_epi32 = [=](__m128i& acc, __m128i a, __m128i b) {
-        __m128i product0 = _mm_maddubs_epi16(a, b);
-        product0 = _mm_madd_epi16(product0, kOnes128);
-        acc = _mm_add_epi32(acc, product0);
-      };
-
-      [[maybe_unused]] auto m128_add_dpbusd_epi32x4 = [=](__m128i& acc, __m128i a0, __m128i b0, __m128i a1, __m128i b1,
-                                                                        __m128i a2, __m128i b2, __m128i a3, __m128i b3) {
-        __m128i product0 = _mm_maddubs_epi16(a0, b0);
-        __m128i product1 = _mm_maddubs_epi16(a1, b1);
-        __m128i product2 = _mm_maddubs_epi16(a2, b2);
-        __m128i product3 = _mm_maddubs_epi16(a3, b3);
-        product0 = _mm_adds_epi16(product0, product1);
-        product2 = _mm_adds_epi16(product2, product3);
-        product0 = _mm_adds_epi16(product0, product2);
-        product0 = _mm_madd_epi16(product0, kOnes128);
-        acc = _mm_add_epi32(acc, product0);
-      };
-
-#endif
-
-#if defined (USE_AVX512)
-      using vec_t = __m512i;
-      #define vec_setzero _mm512_setzero_si512
-      #define vec_set_32 _mm512_set1_epi32
-      auto& vec_add_dpbusd_32 = m512_add_dpbusd_epi32;
-      auto& vec_add_dpbusd_32x4 = m512_add_dpbusd_epi32x4;
-      auto& vec_hadd = m512_hadd;
-#elif defined (USE_AVX2)
-      using vec_t = __m256i;
-      #define vec_setzero _mm256_setzero_si256
-      #define vec_set_32 _mm256_set1_epi32
-      auto& vec_add_dpbusd_32 = m256_add_dpbusd_epi32;
-      auto& vec_add_dpbusd_32x4 = m256_add_dpbusd_epi32x4;
-      auto& vec_hadd = m256_hadd;
-#elif defined (USE_SSSE3)
-      using vec_t = __m128i;
-      #define vec_setzero _mm_setzero_si128
-      #define vec_set_32 _mm_set1_epi32
-      auto& vec_add_dpbusd_32 = m128_add_dpbusd_epi32;
-      auto& vec_add_dpbusd_32x4 = m128_add_dpbusd_epi32x4;
-      auto& vec_hadd = m128_hadd;
-#endif
-
-#if defined (USE_SSSE3)
-
-      const auto output = reinterpret_cast<OutputType*>(buffer);
-      const auto input_vector = reinterpret_cast<const vec_t*>(input);
-
-      static_assert(kOutputDimensions % kOutputSimdWidth == 0 || kOutputDimensions == 1);
-
-      // kOutputDimensions is either 1 or a multiple of kSimdWidth
-      // because then it is also an input dimension.
-      if constexpr (kOutputDimensions % kOutputSimdWidth == 0)
-      {
-          constexpr IndexType kNumChunks = kPaddedInputDimensions / 4;
-
-          const auto input32 = reinterpret_cast<const std::int32_t*>(input);
-          vec_t* outptr = reinterpret_cast<vec_t*>(output);
-          std::memcpy(output, biases_, kOutputDimensions * sizeof(OutputType));
-
-          for (int i = 0; i < (int)kNumChunks - 3; i += 4)
-          {
-              const vec_t in0 = vec_set_32(input32[i + 0]);
-              const vec_t in1 = vec_set_32(input32[i + 1]);
-              const vec_t in2 = vec_set_32(input32[i + 2]);
-              const vec_t in3 = vec_set_32(input32[i + 3]);
-              const auto col0 = reinterpret_cast<const vec_t*>(&weights_[(i + 0) * kOutputDimensions * 4]);
-              const auto col1 = reinterpret_cast<const vec_t*>(&weights_[(i + 1) * kOutputDimensions * 4]);
-              const auto col2 = reinterpret_cast<const vec_t*>(&weights_[(i + 2) * kOutputDimensions * 4]);
-              const auto col3 = reinterpret_cast<const vec_t*>(&weights_[(i + 3) * kOutputDimensions * 4]);
-              for (int j = 0; j * kOutputSimdWidth < kOutputDimensions; ++j)
-                  vec_add_dpbusd_32x4(outptr[j], in0, col0[j], in1, col1[j], in2, col2[j], in3, col3[j]);
-          }
-          for (int i = 0; i < canSaturate16.count; ++i)
-              output[canSaturate16.ids[i].out] += input[canSaturate16.ids[i].in] * canSaturate16.ids[i].w;
-      }
-      else if constexpr (kOutputDimensions == 1)
-      {
-#if defined (USE_AVX512)
-          if constexpr (kPaddedInputDimensions % (kSimdWidth * 2) != 0)
-          {
-              constexpr IndexType kNumChunks = kPaddedInputDimensions / kSimdWidth;
-              const auto input_vector256 = reinterpret_cast<const __m256i*>(input);
-
-              __m256i sum0 = _mm256_setzero_si256();
-              const auto row0 = reinterpret_cast<const __m256i*>(&weights_[0]);
-
-              for (int j = 0; j < (int)kNumChunks; ++j)
-              {
-                  const __m256i in = input_vector256[j];
-                  m256_add_dpbusd_epi32(sum0, in, row0[j]);
-              }
-              output[0] = m256_hadd(sum0, biases_[0]);
-          }
-          else
-#endif
-          {
-#if defined (USE_AVX512)
-              constexpr IndexType kNumChunks = kPaddedInputDimensions / (kSimdWidth * 2);
-#else
-              constexpr IndexType kNumChunks = kPaddedInputDimensions / kSimdWidth;
-#endif
-              vec_t sum0 = vec_setzero();
-              const auto row0 = reinterpret_cast<const vec_t*>(&weights_[0]);
-
-              for (int j = 0; j < (int)kNumChunks; ++j)
-              {
-                  const vec_t in = input_vector[j];
-                  vec_add_dpbusd_32(sum0, in, row0[j]);
-              }
-              output[0] = vec_hadd(sum0, biases_[0]);
-          }
-      }
-
-#else
-
-// Use old implementation for the other architectures.
-
-      auto output = reinterpret_cast<OutputType*>(buffer);
-
-#if defined(USE_SSE2)
-      constexpr IndexType kNumChunks = kPaddedInputDimensions / kSimdWidth;
-      const __m128i kZeros = _mm_setzero_si128();
-      const auto input_vector = reinterpret_cast<const __m128i*>(input);
-
-#elif defined(USE_MMX)
-      constexpr IndexType kNumChunks = kPaddedInputDimensions / kSimdWidth;
-      const __m64 kZeros = _mm_setzero_si64();
-      const auto input_vector = reinterpret_cast<const __m64*>(input);
-
-#elif defined(USE_NEON)
-      constexpr IndexType kNumChunks = kPaddedInputDimensions / kSimdWidth;
-      const auto input_vector = reinterpret_cast<const int8x8_t*>(input);
-#endif
-
-      for (IndexType i = 0; i < kOutputDimensions; ++i) {
-        const IndexType offset = i * kPaddedInputDimensions;
-
-#if defined(USE_SSE2)
-        __m128i sum_lo = _mm_cvtsi32_si128(biases_[i]);
-        __m128i sum_hi = kZeros;
-        const auto row = reinterpret_cast<const __m128i*>(&weights_[offset]);
-        for (IndexType j = 0; j < kNumChunks; ++j) {
-          __m128i row_j = _mm_load_si128(&row[j]);
-          __m128i input_j = _mm_load_si128(&input_vector[j]);
-          __m128i extended_row_lo = _mm_srai_epi16(_mm_unpacklo_epi8(row_j, row_j), 8);
-          __m128i extended_row_hi = _mm_srai_epi16(_mm_unpackhi_epi8(row_j, row_j), 8);
-          __m128i extended_input_lo = _mm_unpacklo_epi8(input_j, kZeros);
-          __m128i extended_input_hi = _mm_unpackhi_epi8(input_j, kZeros);
-          __m128i product_lo = _mm_madd_epi16(extended_row_lo, extended_input_lo);
-          __m128i product_hi = _mm_madd_epi16(extended_row_hi, extended_input_hi);
-          sum_lo = _mm_add_epi32(sum_lo, product_lo);
-          sum_hi = _mm_add_epi32(sum_hi, product_hi);
+        #if defined(USE_SSE2)
+        __m128i    sumLo = _mm_cvtsi32_si128(biases[i]);
+        __m128i    sumHi = Zeros;
+        const auto row   = reinterpret_cast<const __m128i*>(&weights[offset]);
+        for (IndexType j = 0; j < NumChunks; ++j)
+        {
+            __m128i row_j           = _mm_load_si128(&row[j]);
+            __m128i input_j         = _mm_load_si128(&inputVector[j]);
+            __m128i extendedRowLo   = _mm_srai_epi16(_mm_unpacklo_epi8(row_j, row_j), 8);
+            __m128i extendedRowHi   = _mm_srai_epi16(_mm_unpackhi_epi8(row_j, row_j), 8);
+            __m128i extendedInputLo = _mm_unpacklo_epi8(input_j, Zeros);
+            __m128i extendedInputHi = _mm_unpackhi_epi8(input_j, Zeros);
+            __m128i productLo       = _mm_madd_epi16(extendedRowLo, extendedInputLo);
+            __m128i productHi       = _mm_madd_epi16(extendedRowHi, extendedInputHi);
+            sumLo                   = _mm_add_epi32(sumLo, productLo);
+            sumHi                   = _mm_add_epi32(sumHi, productHi);
        }
-        __m128i sum = _mm_add_epi32(sum_lo, sum_hi);
-        __m128i sum_high_64 = _mm_shuffle_epi32(sum, _MM_SHUFFLE(1, 0, 3, 2));
-        sum = _mm_add_epi32(sum, sum_high_64);
+        __m128i sum           = _mm_add_epi32(sumLo, sumHi);
+        __m128i sumHigh_64    = _mm_shuffle_epi32(sum, _MM_SHUFFLE(1, 0, 3, 2));
+        sum                   = _mm_add_epi32(sum, sumHigh_64);
        __m128i sum_second_32 = _mm_shufflelo_epi16(sum, _MM_SHUFFLE(1, 0, 3, 2));
-        sum = _mm_add_epi32(sum, sum_second_32);
-        output[i] = _mm_cvtsi128_si32(sum);
+        sum                   = _mm_add_epi32(sum, sum_second_32);
+        output[i]             = _mm_cvtsi128_si32(sum);

-#elif defined(USE_MMX)
-        __m64 sum_lo = _mm_cvtsi32_si64(biases_[i]);
-        __m64 sum_hi = kZeros;
-        const auto row = reinterpret_cast<const __m64*>(&weights_[offset]);
-        for (IndexType j = 0; j < kNumChunks; ++j) {
-          __m64 row_j = row[j];
-          __m64 input_j = input_vector[j];
-          __m64 extended_row_lo = _mm_srai_pi16(_mm_unpacklo_pi8(row_j, row_j), 8);
-          __m64 extended_row_hi = _mm_srai_pi16(_mm_unpackhi_pi8(row_j, row_j), 8);
-          __m64 extended_input_lo = _mm_unpacklo_pi8(input_j, kZeros);
-          __m64 extended_input_hi = _mm_unpackhi_pi8(input_j, kZeros);
-          __m64 product_lo = _mm_madd_pi16(extended_row_lo, extended_input_lo);
-          __m64 product_hi = _mm_madd_pi16(extended_row_hi, extended_input_hi);
-          sum_lo = _mm_add_pi32(sum_lo, product_lo);
-          sum_hi = _mm_add_pi32(sum_hi, product_hi);
-        }
-        __m64 sum = _mm_add_pi32(sum_lo, sum_hi);
-        sum = _mm_add_pi32(sum, _mm_unpackhi_pi32(sum, sum));
-        output[i] = _mm_cvtsi64_si32(sum);
+        #elif defined(USE_NEON)

-#elif defined(USE_NEON)
-        int32x4_t sum = {biases_[i]};
-        const auto row = reinterpret_cast<const int8x8_t*>(&weights_[offset]);
-        for (IndexType j = 0; j < kNumChunks; ++j) {
-          int16x8_t product = vmull_s8(input_vector[j * 2], row[j * 2]);
-          product = vmlal_s8(product, input_vector[j * 2 + 1], row[j * 2 + 1]);
-          sum = vpadalq_s16(sum, product);
+        int32x4_t  sum = {biases[i]};
+        const auto row = reinterpret_cast<const int8x8_t*>(&weights[offset]);
+        for (IndexType j = 0; j < NumChunks; ++j)
+        {
+            int16x8_t product = vmull_s8(inputVector[j * 2], row[j * 2]);
+            product           = vmlal_s8(product, inputVector[j * 2 + 1], row[j * 2 + 1]);
+            sum               = vpadalq_s16(sum, product);
        }
        output[i] = sum[0] + sum[1] + sum[2] + sum[3];

-#else
-        OutputType sum = biases_[i];
-        for (IndexType j = 0; j < kInputDimensions; ++j) {
-          sum += weights_[offset + j] * input[j];
+        #endif
+    }
+    #else
+    std::memcpy(output, biases, sizeof(std::int32_t) * OutputDimensions);
+
+    // Traverse weights in transpose order to take advantage of input sparsity
+    for (IndexType i = 0; i < InputDimensions; ++i)
+        if (input[i])
+        {
+            const std::int8_t* w  = &weights[i];
+            const int          in = input[i];
+            for (IndexType j = 0; j < OutputDimensions; ++j)
+                output[j] += w[j * PaddedInputDimensions] * in;
        }
-        output[i] = sum;
-#endif
+    #endif
+}

-      }
-#if defined(USE_MMX)
-      _mm_empty();
-#endif
+#endif  // !ENABLE_SEQ_OPT

-#endif
+template<IndexType InDims, IndexType OutDims>
+class AffineTransform {
+   public:
+    // Input/output type
+    using InputType  = std::uint8_t;
+    using OutputType = std::int32_t;

-      return output;
+    // Number of input/output dimensions
+    static constexpr IndexType InputDimensions  = InDims;
+    static constexpr IndexType OutputDimensions = OutDims;
+
+    static constexpr IndexType PaddedInputDimensions =
+      ceil_to_multiple<IndexType>(InputDimensions, MaxSimdWidth);
+    static constexpr IndexType PaddedOutputDimensions =
+      ceil_to_multiple<IndexType>(OutputDimensions, MaxSimdWidth);
+
+    using OutputBuffer = OutputType[PaddedOutputDimensions];
+
+    // Hash value embedded in the evaluation file
+    static constexpr std::uint32_t get_hash_value(std::uint32_t prevHash) {
+        std::uint32_t hashValue = 0xCC03DAE4u;
+        hashValue += OutputDimensions;
+        hashValue ^= prevHash >> 1;
+        hashValue ^= prevHash << 31;
+        return hashValue;
+    }
+
+    static constexpr IndexType get_weight_index_scrambled(IndexType i) {
+        return (i / 4) % (PaddedInputDimensions / 4) * OutputDimensions * 4
+             + i / PaddedInputDimensions * 4 + i % 4;
+    }
+
+    static constexpr IndexType get_weight_index(IndexType i) {
+#ifdef ENABLE_SEQ_OPT
+        return get_weight_index_scrambled(i);
+#else
+        return i;
+#endif
+    }
+
+    // Read network parameters
+    bool read_parameters(std::istream& stream) {
+        read_little_endian<BiasType>(stream, biases, OutputDimensions);
+        for (IndexType i = 0; i < OutputDimensions * PaddedInputDimensions; ++i)
+            weights[get_weight_index(i)] = read_little_endian<WeightType>(stream);
+
+        return !stream.fail();
+    }
+
+    // Write network parameters
+    bool write_parameters(std::ostream& stream) const {
+        write_little_endian<BiasType>(stream, biases, OutputDimensions);
+
+        for (IndexType i = 0; i < OutputDimensions * PaddedInputDimensions; ++i)
+            write_little_endian<WeightType>(stream, weights[get_weight_index(i)]);
+
+        return !stream.fail();
+    }
+    // Forward propagation
+    void propagate(const InputType* input, OutputType* output) const {
+
+#ifdef ENABLE_SEQ_OPT
+
+        if constexpr (OutputDimensions > 1)
+        {
+    #if defined(USE_AVX512)
+            using vec_t = __m512i;
+        #define vec_set_32 _mm512_set1_epi32
+        #define vec_add_dpbusd_32 Simd::m512_add_dpbusd_epi32
+    #elif defined(USE_AVX2)
+            using vec_t = __m256i;
+        #define vec_set_32 _mm256_set1_epi32
+        #define vec_add_dpbusd_32 Simd::m256_add_dpbusd_epi32
+    #elif defined(USE_SSSE3)
+            using vec_t = __m128i;
+        #define vec_set_32 _mm_set1_epi32
+        #define vec_add_dpbusd_32 Simd::m128_add_dpbusd_epi32
+    #elif defined(USE_NEON_DOTPROD)
+            using vec_t = int32x4_t;
+        #define vec_set_32 vdupq_n_s32
+        #define vec_add_dpbusd_32(acc, a, b) \
+            Simd::dotprod_m128_add_dpbusd_epi32(acc, vreinterpretq_s8_s32(a), \
+                                                vreinterpretq_s8_s32(b))
+    #endif
+
+            static constexpr IndexType OutputSimdWidth = sizeof(vec_t) / sizeof(OutputType);
+
+            static_assert(OutputDimensions % OutputSimdWidth == 0);
+
+            constexpr IndexType NumChunks = ceil_to_multiple<IndexType>(InputDimensions, 8) / 4;
+            constexpr IndexType NumRegs   = OutputDimensions / OutputSimdWidth;
+
+            const auto   input32 = reinterpret_cast<const std::int32_t*>(input);
+            const vec_t* biasvec = reinterpret_cast<const vec_t*>(biases);
+            vec_t        acc[NumRegs];
+            for (IndexType k = 0; k < NumRegs; ++k)
+                acc[k] = biasvec[k];
+
+            for (IndexType i = 0; i < NumChunks; ++i)
+            {
+                const vec_t in0 = vec_set_32(input32[i]);
+                const auto  col0 =
+                  reinterpret_cast<const vec_t*>(&weights[i * OutputDimensions * 4]);
+
+                for (IndexType k = 0; k < NumRegs; ++k)
+                    vec_add_dpbusd_32(acc[k], in0, col0[k]);
+            }
+
+            vec_t* outptr = reinterpret_cast<vec_t*>(output);
+            for (IndexType k = 0; k < NumRegs; ++k)
+                outptr[k] = acc[k];
+
+    #undef vec_set_32
+    #undef vec_add_dpbusd_32
+        }
+        else if constexpr (OutputDimensions == 1)
+        {
+    // We cannot use AVX512 for the last layer because there are only 32 inputs
+    // and the buffer is not padded to 64 elements.
+    #if defined(USE_AVX2)
+            using vec_t = __m256i;
+        #define vec_setzero() _mm256_setzero_si256()
+        #define vec_set_32 _mm256_set1_epi32
+        #define vec_add_dpbusd_32 Simd::m256_add_dpbusd_epi32
+        #define vec_hadd Simd::m256_hadd
+    #elif defined(USE_SSSE3)
+            using vec_t = __m128i;
+        #define vec_setzero() _mm_setzero_si128()
+        #define vec_set_32 _mm_set1_epi32
+        #define vec_add_dpbusd_32 Simd::m128_add_dpbusd_epi32
+        #define vec_hadd Simd::m128_hadd
+    #elif defined(USE_NEON_DOTPROD)
+            using vec_t = int32x4_t;
+        #define vec_setzero() vdupq_n_s32(0)
+        #define vec_set_32 vdupq_n_s32
+        #define vec_add_dpbusd_32(acc, a, b) \
+            Simd::dotprod_m128_add_dpbusd_epi32(acc, vreinterpretq_s8_s32(a), \
+                                                vreinterpretq_s8_s32(b))
+        #define vec_hadd Simd::neon_m128_hadd
+    #endif
+
+            const auto inputVector = reinterpret_cast<const vec_t*>(input);
+
+            static constexpr IndexType InputSimdWidth = sizeof(vec_t) / sizeof(InputType);
+
+            static_assert(PaddedInputDimensions % InputSimdWidth == 0);
+
+            constexpr IndexType NumChunks = PaddedInputDimensions / InputSimdWidth;
+            vec_t               sum0      = vec_setzero();
+            const auto          row0      = reinterpret_cast<const vec_t*>(&weights[0]);
+
+            for (int j = 0; j < int(NumChunks); ++j)
+            {
+                const vec_t in = inputVector[j];
+                vec_add_dpbusd_32(sum0, in, row0[j]);
+            }
+            output[0] = vec_hadd(sum0, biases[0]);
+
+    #undef vec_setzero
+    #undef vec_set_32
+    #undef vec_add_dpbusd_32
+    #undef vec_hadd
+        }
+#else
+        // Use old implementation for the other architectures.
+        affine_transform_non_ssse3<InputDimensions, PaddedInputDimensions, OutputDimensions>(
+          output, weights, biases, input);
+#endif
    }

   private:
-    using BiasType = OutputType;
+    using BiasType   = OutputType;
    using WeightType = std::int8_t;

-    PreviousLayer previous_layer_;
+    alignas(CacheLineSize) BiasType biases[OutputDimensions];
+    alignas(CacheLineSize) WeightType weights[OutputDimensions * PaddedInputDimensions];
+};

-    alignas(kCacheLineSize) BiasType biases_[kOutputDimensions];
-    alignas(kCacheLineSize) WeightType weights_[kOutputDimensions * kPaddedInputDimensions];
-#if defined (USE_SSSE3)
-    struct CanSaturate {
-        int count;
-        struct Entry {
-            uint16_t out;
-            uint16_t in;
-            int8_t w;
-        } ids[kPaddedInputDimensions * kOutputDimensions * 3 / 4];
+}  // namespace Stockfish::Eval::NNUE::Layers

-        void add(int i, int j, int8_t w) {
-            ids[count].out = i;
-            ids[count].in = j;
-            ids[count].w = w;
-            ++count;
-        }
-    } canSaturate16;
-#endif
-  };
-
-}  // namespace Eval::NNUE::Layers
-
-#endif // #ifndef NNUE_LAYERS_AFFINE_TRANSFORM_H_INCLUDED
+#endif  // #ifndef NNUE_LAYERS_AFFINE_TRANSFORM_H_INCLUDED
@@ -0,0 +1,306 @@
+/*
+  Stockfish, a UCI chess playing engine derived from Glaurung 2.1
+  Copyright (C) 2004-2025 The Stockfish developers (see AUTHORS file)
+
+  Stockfish is free software: you can redistribute it and/or modify
+  it under the terms of the GNU General Public License as published by
+  the Free Software Foundation, either version 3 of the License, or
+  (at your option) any later version.
+
+  Stockfish is distributed in the hope that it will be useful,
+  but WITHOUT ANY WARRANTY; without even the implied warranty of
+  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+  GNU General Public License for more details.
+
+  You should have received a copy of the GNU General Public License
+  along with this program.  If not, see <http://www.gnu.org/licenses/>.
+*/
+
+// Definition of layer AffineTransformSparseInput of NNUE evaluation function
+
+#ifndef NNUE_LAYERS_AFFINE_TRANSFORM_SPARSE_INPUT_H_INCLUDED
+#define NNUE_LAYERS_AFFINE_TRANSFORM_SPARSE_INPUT_H_INCLUDED
+
+#include <algorithm>
+#include <array>
+#include <cstdint>
+#include <iostream>
+
+#include "../../bitboard.h"
+#include "../nnue_common.h"
+#include "affine_transform.h"
+#include "simd.h"
+
+/*
+  This file contains the definition for a fully connected layer (aka affine transform) with block sparse input.
+*/
+
+namespace Stockfish::Eval::NNUE::Layers {
+
+#if (USE_SSSE3 | (USE_NEON >= 8))
+static constexpr int lsb_index64[64] = {
+  0,  47, 1,  56, 48, 27, 2,  60, 57, 49, 41, 37, 28, 16, 3,  61, 54, 58, 35, 52, 50, 42,
+  21, 44, 38, 32, 29, 23, 17, 11, 4,  62, 46, 55, 26, 59, 40, 36, 15, 53, 34, 51, 20, 43,
+  31, 22, 10, 45, 25, 39, 14, 33, 19, 30, 9,  24, 13, 18, 8,  12, 7,  6,  5,  63};
+
+constexpr int constexpr_lsb(uint64_t bb) {
+    assert(bb != 0);
+    constexpr uint64_t debruijn64 = 0x03F79D71B4CB0A89ULL;
+    return lsb_index64[((bb ^ (bb - 1)) * debruijn64) >> 58];
+}
+
+alignas(CacheLineSize) static constexpr struct OffsetIndices {
+
+    #if (USE_SSE41)
+    std::uint8_t offset_indices[256][8];
+    #else
+    std::uint16_t offset_indices[256][8];
+    #endif
+
+    constexpr OffsetIndices() :
+        offset_indices() {
+        for (int i = 0; i < 256; ++i)
+        {
+            std::uint64_t j = i, k = 0;
+            while (j)
+            {
+                offset_indices[i][k++] = constexpr_lsb(j);
+                j &= j - 1;
+            }
+            while (k < 8)
+                offset_indices[i][k++] = 0;
+        }
+    }
+
+} Lookup;
+
+// Find indices of nonzero numbers in an int32_t array
+template<const IndexType InputDimensions>
+void find_nnz(const std::int32_t* input, std::uint16_t* out, IndexType& count_out) {
+    #if defined(USE_SSSE3)
+        #if defined(USE_AVX512)
+    using vec_t = __m512i;
+            #define vec_nnz(a) _mm512_cmpgt_epi32_mask(a, _mm512_setzero_si512())
+        #elif defined(USE_AVX2)
+    using vec_t = __m256i;
+            #if defined(USE_VNNI) && !defined(USE_AVXVNNI)
+                #define vec_nnz(a) _mm256_cmpgt_epi32_mask(a, _mm256_setzero_si256())
+            #else
+                #define vec_nnz(a) \
+                    _mm256_movemask_ps( \
+                      _mm256_castsi256_ps(_mm256_cmpgt_epi32(a, _mm256_setzero_si256())))
+            #endif
+        #elif defined(USE_SSSE3)
+    using vec_t = __m128i;
+            #define vec_nnz(a) \
+                _mm_movemask_ps(_mm_castsi128_ps(_mm_cmpgt_epi32(a, _mm_setzero_si128())))
+        #endif
+    using vec128_t = __m128i;
+        #define vec128_zero _mm_setzero_si128()
+        #define vec128_set_16(a) _mm_set1_epi16(a)
+        #if (USE_SSE41)
+            #define vec128_load(a) _mm_cvtepu8_epi16(_mm_loadl_epi64(a))
+        #else
+            #define vec128_load(a) _mm_load_si128(a)
+        #endif
+        #define vec128_storeu(a, b) _mm_storeu_si128(a, b)
+        #define vec128_add(a, b) _mm_add_epi16(a, b)
+    #elif defined(USE_NEON)
+    using vec_t                        = uint32x4_t;
+    static const std::uint32_t Mask[4] = {1, 2, 4, 8};
+        #define vec_nnz(a) vaddvq_u32(vandq_u32(vtstq_u32(a, a), vld1q_u32(Mask)))
+    using vec128_t                     = uint16x8_t;
+        #define vec128_zero vdupq_n_u16(0)
+        #define vec128_set_16(a) vdupq_n_u16(a)
+        #define vec128_load(a) vld1q_u16(reinterpret_cast<const std::uint16_t*>(a))
+        #define vec128_storeu(a, b) vst1q_u16(reinterpret_cast<std::uint16_t*>(a), b)
+        #define vec128_add(a, b) vaddq_u16(a, b)
+    #endif
+    constexpr IndexType InputSimdWidth = sizeof(vec_t) / sizeof(std::int32_t);
+    // Inputs are processed InputSimdWidth at a time and outputs are processed 8 at a time so we process in chunks of max(InputSimdWidth, 8)
+    constexpr IndexType ChunkSize       = std::max<IndexType>(InputSimdWidth, 8);
+    constexpr IndexType NumChunks       = InputDimensions / ChunkSize;
+    constexpr IndexType InputsPerChunk  = ChunkSize / InputSimdWidth;
+    constexpr IndexType OutputsPerChunk = ChunkSize / 8;
+
+    const auto     inputVector = reinterpret_cast<const vec_t*>(input);
+    IndexType      count       = 0;
+    vec128_t       base        = vec128_zero;
+    const vec128_t increment   = vec128_set_16(8);
+    for (IndexType i = 0; i < NumChunks; ++i)
+    {
+        // bitmask of nonzero values in this chunk
+        unsigned nnz = 0;
+        for (IndexType j = 0; j < InputsPerChunk; ++j)
+        {
+            const vec_t inputChunk = inputVector[i * InputsPerChunk + j];
+            nnz |= unsigned(vec_nnz(inputChunk)) << (j * InputSimdWidth);
+        }
+        for (IndexType j = 0; j < OutputsPerChunk; ++j)
+        {
+            const unsigned lookup = (nnz >> (j * 8)) & 0xFF;
+            const vec128_t offsets =
+              vec128_load(reinterpret_cast<const vec128_t*>(&Lookup.offset_indices[lookup]));
+            vec128_storeu(reinterpret_cast<vec128_t*>(out + count), vec128_add(base, offsets));
+            count += popcount(lookup);
+            base = vec128_add(base, increment);
+        }
+    }
+    count_out = count;
+}
+    #undef vec_nnz
+    #undef vec128_zero
+    #undef vec128_set_16
+    #undef vec128_load
+    #undef vec128_storeu
+    #undef vec128_add
+#endif
+
+// Sparse input implementation
+template<IndexType InDims, IndexType OutDims>
+class AffineTransformSparseInput {
+   public:
+    // Input/output type
+    using InputType  = std::uint8_t;
+    using OutputType = std::int32_t;
+
+    // Number of input/output dimensions
+    static constexpr IndexType InputDimensions  = InDims;
+    static constexpr IndexType OutputDimensions = OutDims;
+
+    static_assert(OutputDimensions % 16 == 0,
+                  "Only implemented for OutputDimensions divisible by 16.");
+
+    static constexpr IndexType PaddedInputDimensions =
+      ceil_to_multiple<IndexType>(InputDimensions, MaxSimdWidth);
+    static constexpr IndexType PaddedOutputDimensions =
+      ceil_to_multiple<IndexType>(OutputDimensions, MaxSimdWidth);
+
+#if (USE_SSSE3 | (USE_NEON >= 8))
+    static constexpr IndexType ChunkSize = 4;
+#else
+    static constexpr IndexType ChunkSize = 1;
+#endif
+
+    using OutputBuffer = OutputType[PaddedOutputDimensions];
+
+    // Hash value embedded in the evaluation file
+    static constexpr std::uint32_t get_hash_value(std::uint32_t prevHash) {
+        std::uint32_t hashValue = 0xCC03DAE4u;
+        hashValue += OutputDimensions;
+        hashValue ^= prevHash >> 1;
+        hashValue ^= prevHash << 31;
+        return hashValue;
+    }
+
+    static constexpr IndexType get_weight_index_scrambled(IndexType i) {
+        return (i / ChunkSize) % (PaddedInputDimensions / ChunkSize) * OutputDimensions * ChunkSize
+             + i / PaddedInputDimensions * ChunkSize + i % ChunkSize;
+    }
+
+    static constexpr IndexType get_weight_index(IndexType i) {
+#if (USE_SSSE3 | (USE_NEON >= 8))
+        return get_weight_index_scrambled(i);
+#else
+        return i;
+#endif
+    }
+
+    // Read network parameters
+    bool read_parameters(std::istream& stream) {
+        read_little_endian<BiasType>(stream, biases, OutputDimensions);
+        for (IndexType i = 0; i < OutputDimensions * PaddedInputDimensions; ++i)
+            weights[get_weight_index(i)] = read_little_endian<WeightType>(stream);
+
+        return !stream.fail();
+    }
+
+    // Write network parameters
+    bool write_parameters(std::ostream& stream) const {
+        write_little_endian<BiasType>(stream, biases, OutputDimensions);
+
+        for (IndexType i = 0; i < OutputDimensions * PaddedInputDimensions; ++i)
+            write_little_endian<WeightType>(stream, weights[get_weight_index(i)]);
+
+        return !stream.fail();
+    }
+    // Forward propagation
+    void propagate(const InputType* input, OutputType* output) const {
+
+#if (USE_SSSE3 | (USE_NEON >= 8))
+    #if defined(USE_AVX512)
+        using invec_t  = __m512i;
+        using outvec_t = __m512i;
+        #define vec_set_32 _mm512_set1_epi32
+        #define vec_add_dpbusd_32 Simd::m512_add_dpbusd_epi32
+    #elif defined(USE_AVX2)
+        using invec_t  = __m256i;
+        using outvec_t = __m256i;
+        #define vec_set_32 _mm256_set1_epi32
+        #define vec_add_dpbusd_32 Simd::m256_add_dpbusd_epi32
+    #elif defined(USE_SSSE3)
+        using invec_t  = __m128i;
+        using outvec_t = __m128i;
+        #define vec_set_32 _mm_set1_epi32
+        #define vec_add_dpbusd_32 Simd::m128_add_dpbusd_epi32
+    #elif defined(USE_NEON_DOTPROD)
+        using invec_t  = int8x16_t;
+        using outvec_t = int32x4_t;
+        #define vec_set_32(a) vreinterpretq_s8_u32(vdupq_n_u32(a))
+        #define vec_add_dpbusd_32 Simd::dotprod_m128_add_dpbusd_epi32
+    #elif defined(USE_NEON)
+        using invec_t  = int8x16_t;
+        using outvec_t = int32x4_t;
+        #define vec_set_32(a) vreinterpretq_s8_u32(vdupq_n_u32(a))
+        #define vec_add_dpbusd_32 Simd::neon_m128_add_dpbusd_epi32
+    #endif
+        static constexpr IndexType OutputSimdWidth = sizeof(outvec_t) / sizeof(OutputType);
+
+        constexpr IndexType NumChunks = ceil_to_multiple<IndexType>(InputDimensions, 8) / ChunkSize;
+        constexpr IndexType NumRegs   = OutputDimensions / OutputSimdWidth;
+        std::uint16_t       nnz[NumChunks];
+        IndexType           count;
+
+        const auto input32 = reinterpret_cast<const std::int32_t*>(input);
+
+        // Find indices of nonzero 32-bit blocks
+        find_nnz<NumChunks>(input32, nnz, count);
+
+        const outvec_t* biasvec = reinterpret_cast<const outvec_t*>(biases);
+        outvec_t        acc[NumRegs];
+        for (IndexType k = 0; k < NumRegs; ++k)
+            acc[k] = biasvec[k];
+
+        for (IndexType j = 0; j < count; ++j)
+        {
+            const auto    i  = nnz[j];
+            const invec_t in = vec_set_32(input32[i]);
+            const auto    col =
+              reinterpret_cast<const invec_t*>(&weights[i * OutputDimensions * ChunkSize]);
+            for (IndexType k = 0; k < NumRegs; ++k)
+                vec_add_dpbusd_32(acc[k], in, col[k]);
+        }
+
+        outvec_t* outptr = reinterpret_cast<outvec_t*>(output);
+        for (IndexType k = 0; k < NumRegs; ++k)
+            outptr[k] = acc[k];
+    #undef vec_set_32
+    #undef vec_add_dpbusd_32
+#else
+        // Use dense implementation for the other architectures.
+        affine_transform_non_ssse3<InputDimensions, PaddedInputDimensions, OutputDimensions>(
+          output, weights, biases, input);
+#endif
+    }
+
+   private:
+    using BiasType   = OutputType;
+    using WeightType = std::int8_t;
+
+    alignas(CacheLineSize) BiasType biases[OutputDimensions];
+    alignas(CacheLineSize) WeightType weights[OutputDimensions * PaddedInputDimensions];
+};
+
+}  // namespace Stockfish::Eval::NNUE::Layers
+
+#endif  // #ifndef NNUE_LAYERS_AFFINE_TRANSFORM_SPARSE_INPUT_H_INCLUDED
@@ -1,6 +1,6 @@
 /*
  Stockfish, a UCI chess playing engine derived from Glaurung 2.1
-  Copyright (C) 2004-2021 The Stockfish developers (see AUTHORS file)
+  Copyright (C) 2004-2025 The Stockfish developers (see AUTHORS file)

  Stockfish is free software: you can redistribute it and/or modify
  it under the terms of the GNU General Public License as published by
@@ -21,146 +21,144 @@
 #ifndef NNUE_LAYERS_CLIPPED_RELU_H_INCLUDED
 #define NNUE_LAYERS_CLIPPED_RELU_H_INCLUDED

+#include <algorithm>
+#include <cstdint>
+#include <iosfwd>
+
 #include "../nnue_common.h"

-namespace Eval::NNUE::Layers {
+namespace Stockfish::Eval::NNUE::Layers {

-  // Clipped ReLU
-  template <typename PreviousLayer>
-  class ClippedReLU {
+// Clipped ReLU
+template<IndexType InDims>
+class ClippedReLU {
   public:
    // Input/output type
-    using InputType = typename PreviousLayer::OutputType;
+    using InputType  = std::int32_t;
    using OutputType = std::uint8_t;
-    static_assert(std::is_same<InputType, std::int32_t>::value, "");

    // Number of input/output dimensions
-    static constexpr IndexType kInputDimensions =
-        PreviousLayer::kOutputDimensions;
-    static constexpr IndexType kOutputDimensions = kInputDimensions;
+    static constexpr IndexType InputDimensions  = InDims;
+    static constexpr IndexType OutputDimensions = InputDimensions;
+    static constexpr IndexType PaddedOutputDimensions =
+      ceil_to_multiple<IndexType>(OutputDimensions, 32);

-    // Size of forward propagation buffer used in this layer
-    static constexpr std::size_t kSelfBufferSize =
-        CeilToMultiple(kOutputDimensions * sizeof(OutputType), kCacheLineSize);
-
-    // Size of the forward propagation buffer used from the input layer to this layer
-    static constexpr std::size_t kBufferSize =
-        PreviousLayer::kBufferSize + kSelfBufferSize;
+    using OutputBuffer = OutputType[PaddedOutputDimensions];

    // Hash value embedded in the evaluation file
-    static constexpr std::uint32_t GetHashValue() {
-      std::uint32_t hash_value = 0x538D24C7u;
-      hash_value += PreviousLayer::GetHashValue();
-      return hash_value;
+    static constexpr std::uint32_t get_hash_value(std::uint32_t prevHash) {
+        std::uint32_t hashValue = 0x538D24C7u;
+        hashValue += prevHash;
+        return hashValue;
    }

    // Read network parameters
-    bool ReadParameters(std::istream& stream) {
-      return previous_layer_.ReadParameters(stream);
-    }
+    bool read_parameters(std::istream&) { return true; }
+
+    // Write network parameters
+    bool write_parameters(std::ostream&) const { return true; }

    // Forward propagation
-    const OutputType* Propagate(
-        const TransformedFeatureType* transformed_features, char* buffer) const {
-      const auto input = previous_layer_.Propagate(
-          transformed_features, buffer + kSelfBufferSize);
-      const auto output = reinterpret_cast<OutputType*>(buffer);
+    void propagate(const InputType* input, OutputType* output) const {

-  #if defined(USE_AVX2)
-      constexpr IndexType kNumChunks = kInputDimensions / kSimdWidth;
-      const __m256i kZero = _mm256_setzero_si256();
-      const __m256i kOffsets = _mm256_set_epi32(7, 3, 6, 2, 5, 1, 4, 0);
-      const auto in = reinterpret_cast<const __m256i*>(input);
-      const auto out = reinterpret_cast<__m256i*>(output);
-      for (IndexType i = 0; i < kNumChunks; ++i) {
-        const __m256i words0 = _mm256_srai_epi16(_mm256_packs_epi32(
-            _mm256_load_si256(&in[i * 4 + 0]),
-            _mm256_load_si256(&in[i * 4 + 1])), kWeightScaleBits);
-        const __m256i words1 = _mm256_srai_epi16(_mm256_packs_epi32(
-            _mm256_load_si256(&in[i * 4 + 2]),
-            _mm256_load_si256(&in[i * 4 + 3])), kWeightScaleBits);
-        _mm256_store_si256(&out[i], _mm256_permutevar8x32_epi32(_mm256_max_epi8(
-            _mm256_packs_epi16(words0, words1), kZero), kOffsets));
-      }
-      constexpr IndexType kStart = kNumChunks * kSimdWidth;
+#if defined(USE_AVX2)
+        if constexpr (InputDimensions % SimdWidth == 0)
+        {
+            constexpr IndexType NumChunks = InputDimensions / SimdWidth;
+            const __m256i       Offsets   = _mm256_set_epi32(7, 3, 6, 2, 5, 1, 4, 0);
+            const auto          in        = reinterpret_cast<const __m256i*>(input);
+            const auto          out       = reinterpret_cast<__m256i*>(output);
+            for (IndexType i = 0; i < NumChunks; ++i)
+            {
+                const __m256i words0 =
+                  _mm256_srli_epi16(_mm256_packus_epi32(_mm256_load_si256(&in[i * 4 + 0]),
+                                                        _mm256_load_si256(&in[i * 4 + 1])),
+                                    WeightScaleBits);
+                const __m256i words1 =
+                  _mm256_srli_epi16(_mm256_packus_epi32(_mm256_load_si256(&in[i * 4 + 2]),
+                                                        _mm256_load_si256(&in[i * 4 + 3])),
+                                    WeightScaleBits);
+                _mm256_store_si256(&out[i], _mm256_permutevar8x32_epi32(
+                                              _mm256_packs_epi16(words0, words1), Offsets));
+            }
+        }
+        else
+        {
+            constexpr IndexType NumChunks = InputDimensions / (SimdWidth / 2);
+            const auto          in        = reinterpret_cast<const __m128i*>(input);
+            const auto          out       = reinterpret_cast<__m128i*>(output);
+            for (IndexType i = 0; i < NumChunks; ++i)
+            {
+                const __m128i words0 = _mm_srli_epi16(
+                  _mm_packus_epi32(_mm_load_si128(&in[i * 4 + 0]), _mm_load_si128(&in[i * 4 + 1])),
+                  WeightScaleBits);
+                const __m128i words1 = _mm_srli_epi16(
+                  _mm_packus_epi32(_mm_load_si128(&in[i * 4 + 2]), _mm_load_si128(&in[i * 4 + 3])),
+                  WeightScaleBits);
+                _mm_store_si128(&out[i], _mm_packs_epi16(words0, words1));
+            }
+        }
+        constexpr IndexType Start = InputDimensions % SimdWidth == 0
+                                    ? InputDimensions / SimdWidth * SimdWidth
+                                    : InputDimensions / (SimdWidth / 2) * (SimdWidth / 2);

-  #elif defined(USE_SSE2)
-      constexpr IndexType kNumChunks = kInputDimensions / kSimdWidth;
+#elif defined(USE_SSE2)
+        constexpr IndexType NumChunks = InputDimensions / SimdWidth;

-  #ifdef USE_SSE41
-      const __m128i kZero = _mm_setzero_si128();
-  #else
-      const __m128i k0x80s = _mm_set1_epi8(-128);
-  #endif
+    #ifndef USE_SSE41
+        const __m128i k0x80s = _mm_set1_epi8(-128);
+    #endif

-      const auto in = reinterpret_cast<const __m128i*>(input);
-      const auto out = reinterpret_cast<__m128i*>(output);
-      for (IndexType i = 0; i < kNumChunks; ++i) {
-        const __m128i words0 = _mm_srai_epi16(_mm_packs_epi32(
-            _mm_load_si128(&in[i * 4 + 0]),
-            _mm_load_si128(&in[i * 4 + 1])), kWeightScaleBits);
-        const __m128i words1 = _mm_srai_epi16(_mm_packs_epi32(
-            _mm_load_si128(&in[i * 4 + 2]),
-            _mm_load_si128(&in[i * 4 + 3])), kWeightScaleBits);
-        const __m128i packedbytes = _mm_packs_epi16(words0, words1);
-        _mm_store_si128(&out[i],
+        const auto in  = reinterpret_cast<const __m128i*>(input);
+        const auto out = reinterpret_cast<__m128i*>(output);
+        for (IndexType i = 0; i < NumChunks; ++i)
+        {
+    #if defined(USE_SSE41)
+            const __m128i words0 = _mm_srli_epi16(
+              _mm_packus_epi32(_mm_load_si128(&in[i * 4 + 0]), _mm_load_si128(&in[i * 4 + 1])),
+              WeightScaleBits);
+            const __m128i words1 = _mm_srli_epi16(
+              _mm_packus_epi32(_mm_load_si128(&in[i * 4 + 2]), _mm_load_si128(&in[i * 4 + 3])),
+              WeightScaleBits);
+            _mm_store_si128(&out[i], _mm_packs_epi16(words0, words1));
+    #else
+            const __m128i words0 = _mm_srai_epi16(
+              _mm_packs_epi32(_mm_load_si128(&in[i * 4 + 0]), _mm_load_si128(&in[i * 4 + 1])),
+              WeightScaleBits);
+            const __m128i words1 = _mm_srai_epi16(
+              _mm_packs_epi32(_mm_load_si128(&in[i * 4 + 2]), _mm_load_si128(&in[i * 4 + 3])),
+              WeightScaleBits);
+            const __m128i packedbytes = _mm_packs_epi16(words0, words1);
+            _mm_store_si128(&out[i], _mm_subs_epi8(_mm_adds_epi8(packedbytes, k0x80s), k0x80s));
+    #endif
+        }
+        constexpr IndexType Start = NumChunks * SimdWidth;

-  #ifdef USE_SSE41
-          _mm_max_epi8(packedbytes, kZero)
-  #else
-          _mm_subs_epi8(_mm_adds_epi8(packedbytes, k0x80s), k0x80s)
-  #endif
+#elif defined(USE_NEON)
+        constexpr IndexType NumChunks = InputDimensions / (SimdWidth / 2);
+        const int8x8_t      Zero      = {0};
+        const auto          in        = reinterpret_cast<const int32x4_t*>(input);
+        const auto          out       = reinterpret_cast<int8x8_t*>(output);
+        for (IndexType i = 0; i < NumChunks; ++i)
+        {
+            int16x8_t  shifted;
+            const auto pack = reinterpret_cast<int16x4_t*>(&shifted);
+            pack[0]         = vqshrn_n_s32(in[i * 2 + 0], WeightScaleBits);
+            pack[1]         = vqshrn_n_s32(in[i * 2 + 1], WeightScaleBits);
+            out[i]          = vmax_s8(vqmovn_s16(shifted), Zero);
+        }
+        constexpr IndexType Start = NumChunks * (SimdWidth / 2);
+#else
+        constexpr IndexType Start = 0;
+#endif

-        );
-      }
-      constexpr IndexType kStart = kNumChunks * kSimdWidth;
-
-  #elif defined(USE_MMX)
-      constexpr IndexType kNumChunks = kInputDimensions / kSimdWidth;
-      const __m64 k0x80s = _mm_set1_pi8(-128);
-      const auto in = reinterpret_cast<const __m64*>(input);
-      const auto out = reinterpret_cast<__m64*>(output);
-      for (IndexType i = 0; i < kNumChunks; ++i) {
-        const __m64 words0 = _mm_srai_pi16(
-            _mm_packs_pi32(in[i * 4 + 0], in[i * 4 + 1]),
-            kWeightScaleBits);
-        const __m64 words1 = _mm_srai_pi16(
-            _mm_packs_pi32(in[i * 4 + 2], in[i * 4 + 3]),
-            kWeightScaleBits);
-        const __m64 packedbytes = _mm_packs_pi16(words0, words1);
-        out[i] = _mm_subs_pi8(_mm_adds_pi8(packedbytes, k0x80s), k0x80s);
-      }
-      _mm_empty();
-      constexpr IndexType kStart = kNumChunks * kSimdWidth;
-
-  #elif defined(USE_NEON)
-      constexpr IndexType kNumChunks = kInputDimensions / (kSimdWidth / 2);
-      const int8x8_t kZero = {0};
-      const auto in = reinterpret_cast<const int32x4_t*>(input);
-      const auto out = reinterpret_cast<int8x8_t*>(output);
-      for (IndexType i = 0; i < kNumChunks; ++i) {
-        int16x8_t shifted;
-        const auto pack = reinterpret_cast<int16x4_t*>(&shifted);
-        pack[0] = vqshrn_n_s32(in[i * 2 + 0], kWeightScaleBits);
-        pack[1] = vqshrn_n_s32(in[i * 2 + 1], kWeightScaleBits);
-        out[i] = vmax_s8(vqmovn_s16(shifted), kZero);
-      }
-      constexpr IndexType kStart = kNumChunks * (kSimdWidth / 2);
-  #else
-      constexpr IndexType kStart = 0;
-  #endif
-
-      for (IndexType i = kStart; i < kInputDimensions; ++i) {
-        output[i] = static_cast<OutputType>(
-            std::max(0, std::min(127, input[i] >> kWeightScaleBits)));
-      }
-      return output;
+        for (IndexType i = Start; i < InputDimensions; ++i)
+        {
+            output[i] = static_cast<OutputType>(std::clamp(input[i] >> WeightScaleBits, 0, 127));
+        }
    }
+};

-   private:
-    PreviousLayer previous_layer_;
-  };
+}  // namespace Stockfish::Eval::NNUE::Layers

-}  // namespace Eval::NNUE::Layers
-
-#endif // NNUE_LAYERS_CLIPPED_RELU_H_INCLUDED
+#endif  // NNUE_LAYERS_CLIPPED_RELU_H_INCLUDED
@@ -1,68 +0,0 @@
-/*
-  Stockfish, a UCI chess playing engine derived from Glaurung 2.1
-  Copyright (C) 2004-2021 The Stockfish developers (see AUTHORS file)
-
-  Stockfish is free software: you can redistribute it and/or modify
-  it under the terms of the GNU General Public License as published by
-  the Free Software Foundation, either version 3 of the License, or
-  (at your option) any later version.
-
-  Stockfish is distributed in the hope that it will be useful,
-  but WITHOUT ANY WARRANTY; without even the implied warranty of
-  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-  GNU General Public License for more details.
-
-  You should have received a copy of the GNU General Public License
-  along with this program.  If not, see <http://www.gnu.org/licenses/>.
-*/
-
-// NNUE evaluation function layer InputSlice definition
-
-#ifndef NNUE_LAYERS_INPUT_SLICE_H_INCLUDED
-#define NNUE_LAYERS_INPUT_SLICE_H_INCLUDED
-
-#include "../nnue_common.h"
-
-namespace Eval::NNUE::Layers {
-
-// Input layer
-template <IndexType OutputDimensions, IndexType Offset = 0>
-class InputSlice {
- public:
-  // Need to maintain alignment
-  static_assert(Offset % kMaxSimdWidth == 0, "");
-
-  // Output type
-  using OutputType = TransformedFeatureType;
-
-  // Output dimensionality
-  static constexpr IndexType kOutputDimensions = OutputDimensions;
-
-  // Size of forward propagation buffer used from the input layer to this layer
-  static constexpr std::size_t kBufferSize = 0;
-
-  // Hash value embedded in the evaluation file
-  static constexpr std::uint32_t GetHashValue() {
-    std::uint32_t hash_value = 0xEC42E90Du;
-    hash_value ^= kOutputDimensions ^ (Offset << 10);
-    return hash_value;
-  }
-
-  // Read network parameters
-  bool ReadParameters(std::istream& /*stream*/) {
-    return true;
-  }
-
-  // Forward propagation
-  const OutputType* Propagate(
-      const TransformedFeatureType* transformed_features,
-      char* /*buffer*/) const {
-    return transformed_features + Offset;
-  }
-
- private:
-};
-
-}  // namespace Layers
-
-#endif // #ifndef NNUE_LAYERS_INPUT_SLICE_H_INCLUDED
@@ -0,0 +1,134 @@
+/*
+  Stockfish, a UCI chess playing engine derived from Glaurung 2.1
+  Copyright (C) 2004-2025 The Stockfish developers (see AUTHORS file)
+
+  Stockfish is free software: you can redistribute it and/or modify
+  it under the terms of the GNU General Public License as published by
+  the Free Software Foundation, either version 3 of the License, or
+  (at your option) any later version.
+
+  Stockfish is distributed in the hope that it will be useful,
+  but WITHOUT ANY WARRANTY; without even the implied warranty of
+  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+  GNU General Public License for more details.
+
+  You should have received a copy of the GNU General Public License
+  along with this program.  If not, see <http://www.gnu.org/licenses/>.
+*/
+
+#ifndef STOCKFISH_SIMD_H_INCLUDED
+#define STOCKFISH_SIMD_H_INCLUDED
+
+#if defined(USE_AVX2)
+    #include <immintrin.h>
+
+#elif defined(USE_SSE41)
+    #include <smmintrin.h>
+
+#elif defined(USE_SSSE3)
+    #include <tmmintrin.h>
+
+#elif defined(USE_SSE2)
+    #include <emmintrin.h>
+
+#elif defined(USE_NEON)
+    #include <arm_neon.h>
+#endif
+
+namespace Stockfish::Simd {
+
+#if defined(USE_AVX512)
+
+[[maybe_unused]] static int m512_hadd(__m512i sum, int bias) {
+    return _mm512_reduce_add_epi32(sum) + bias;
+}
+
+[[maybe_unused]] static void m512_add_dpbusd_epi32(__m512i& acc, __m512i a, __m512i b) {
+
+    #if defined(USE_VNNI)
+    acc = _mm512_dpbusd_epi32(acc, a, b);
+    #else
+    __m512i product0 = _mm512_maddubs_epi16(a, b);
+    product0         = _mm512_madd_epi16(product0, _mm512_set1_epi16(1));
+    acc              = _mm512_add_epi32(acc, product0);
+    #endif
+}
+
+#endif
+
+#if defined(USE_AVX2)
+
+[[maybe_unused]] static int m256_hadd(__m256i sum, int bias) {
+    __m128i sum128 = _mm_add_epi32(_mm256_castsi256_si128(sum), _mm256_extracti128_si256(sum, 1));
+    sum128         = _mm_add_epi32(sum128, _mm_shuffle_epi32(sum128, _MM_PERM_BADC));
+    sum128         = _mm_add_epi32(sum128, _mm_shuffle_epi32(sum128, _MM_PERM_CDAB));
+    return _mm_cvtsi128_si32(sum128) + bias;
+}
+
+[[maybe_unused]] static void m256_add_dpbusd_epi32(__m256i& acc, __m256i a, __m256i b) {
+
+    #if defined(USE_VNNI)
+    acc = _mm256_dpbusd_epi32(acc, a, b);
+    #else
+    __m256i product0 = _mm256_maddubs_epi16(a, b);
+    product0         = _mm256_madd_epi16(product0, _mm256_set1_epi16(1));
+    acc              = _mm256_add_epi32(acc, product0);
+    #endif
+}
+
+#endif
+
+#if defined(USE_SSSE3)
+
+[[maybe_unused]] static int m128_hadd(__m128i sum, int bias) {
+    sum = _mm_add_epi32(sum, _mm_shuffle_epi32(sum, 0x4E));  //_MM_PERM_BADC
+    sum = _mm_add_epi32(sum, _mm_shuffle_epi32(sum, 0xB1));  //_MM_PERM_CDAB
+    return _mm_cvtsi128_si32(sum) + bias;
+}
+
+[[maybe_unused]] static void m128_add_dpbusd_epi32(__m128i& acc, __m128i a, __m128i b) {
+
+    __m128i product0 = _mm_maddubs_epi16(a, b);
+    product0         = _mm_madd_epi16(product0, _mm_set1_epi16(1));
+    acc              = _mm_add_epi32(acc, product0);
+}
+
+#endif
+
+#if defined(USE_NEON_DOTPROD)
+
+[[maybe_unused]] static void
+dotprod_m128_add_dpbusd_epi32(int32x4_t& acc, int8x16_t a, int8x16_t b) {
+
+    acc = vdotq_s32(acc, a, b);
+}
+#endif
+
+#if defined(USE_NEON)
+
+[[maybe_unused]] static int neon_m128_reduce_add_epi32(int32x4_t s) {
+    #if USE_NEON >= 8
+    return vaddvq_s32(s);
+    #else
+    return s[0] + s[1] + s[2] + s[3];
+    #endif
+}
+
+[[maybe_unused]] static int neon_m128_hadd(int32x4_t sum, int bias) {
+    return neon_m128_reduce_add_epi32(sum) + bias;
+}
+
+#endif
+
+#if USE_NEON >= 8
+[[maybe_unused]] static void neon_m128_add_dpbusd_epi32(int32x4_t& acc, int8x16_t a, int8x16_t b) {
+
+    int16x8_t product0 = vmull_s8(vget_low_s8(a), vget_low_s8(b));
+    int16x8_t product1 = vmull_high_s8(a, b);
+    int16x8_t sum      = vpaddq_s16(product0, product1);
+    acc                = vpadalq_s16(acc, sum);
+}
+#endif
+}
+
+#endif  // STOCKFISH_SIMD_H_INCLUDED
@@ -0,0 +1,103 @@
+/*
+  Stockfish, a UCI chess playing engine derived from Glaurung 2.1
+  Copyright (C) 2004-2025 The Stockfish developers (see AUTHORS file)
+
+  Stockfish is free software: you can redistribute it and/or modify
+  it under the terms of the GNU General Public License as published by
+  the Free Software Foundation, either version 3 of the License, or
+  (at your option) any later version.
+
+  Stockfish is distributed in the hope that it will be useful,
+  but WITHOUT ANY WARRANTY; without even the implied warranty of
+  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+  GNU General Public License for more details.
+
+  You should have received a copy of the GNU General Public License
+  along with this program.  If not, see <http://www.gnu.org/licenses/>.
+*/
+
+// Definition of layer ClippedReLU of NNUE evaluation function
+
+#ifndef NNUE_LAYERS_SQR_CLIPPED_RELU_H_INCLUDED
+#define NNUE_LAYERS_SQR_CLIPPED_RELU_H_INCLUDED
+
+#include <algorithm>
+#include <cstdint>
+#include <iosfwd>
+
+#include "../nnue_common.h"
+
+namespace Stockfish::Eval::NNUE::Layers {
+
+// Clipped ReLU
+template<IndexType InDims>
+class SqrClippedReLU {
+   public:
+    // Input/output type
+    using InputType  = std::int32_t;
+    using OutputType = std::uint8_t;
+
+    // Number of input/output dimensions
+    static constexpr IndexType InputDimensions  = InDims;
+    static constexpr IndexType OutputDimensions = InputDimensions;
+    static constexpr IndexType PaddedOutputDimensions =
+      ceil_to_multiple<IndexType>(OutputDimensions, 32);
+
+    using OutputBuffer = OutputType[PaddedOutputDimensions];
+
+    // Hash value embedded in the evaluation file
+    static constexpr std::uint32_t get_hash_value(std::uint32_t prevHash) {
+        std::uint32_t hashValue = 0x538D24C7u;
+        hashValue += prevHash;
+        return hashValue;
+    }
+
+    // Read network parameters
+    bool read_parameters(std::istream&) { return true; }
+
+    // Write network parameters
+    bool write_parameters(std::ostream&) const { return true; }
+
+    // Forward propagation
+    void propagate(const InputType* input, OutputType* output) const {
+
+#if defined(USE_SSE2)
+        constexpr IndexType NumChunks = InputDimensions / 16;
+
+        static_assert(WeightScaleBits == 6);
+        const auto in  = reinterpret_cast<const __m128i*>(input);
+        const auto out = reinterpret_cast<__m128i*>(output);
+        for (IndexType i = 0; i < NumChunks; ++i)
+        {
+            __m128i words0 =
+              _mm_packs_epi32(_mm_load_si128(&in[i * 4 + 0]), _mm_load_si128(&in[i * 4 + 1]));
+            __m128i words1 =
+              _mm_packs_epi32(_mm_load_si128(&in[i * 4 + 2]), _mm_load_si128(&in[i * 4 + 3]));
+
+            // We shift by WeightScaleBits * 2 = 12 and divide by 128
+            // which is an additional shift-right of 7, meaning 19 in total.
+            // MulHi strips the lower 16 bits so we need to shift out 3 more to match.
+            words0 = _mm_srli_epi16(_mm_mulhi_epi16(words0, words0), 3);
+            words1 = _mm_srli_epi16(_mm_mulhi_epi16(words1, words1), 3);
+
+            _mm_store_si128(&out[i], _mm_packs_epi16(words0, words1));
+        }
+        constexpr IndexType Start = NumChunks * 16;
+
+#else
+        constexpr IndexType Start = 0;
+#endif
+
+        for (IndexType i = Start; i < InputDimensions; ++i)
+        {
+            output[i] = static_cast<OutputType>(
+              // Really should be /127 but we need to make it fast so we right-shift
+              // by an extra 7 bits instead. Needs to be accounted for in the trainer.
+              std::min(127ll, ((long long) (input[i]) * input[i]) >> (2 * WeightScaleBits + 7)));
+        }
+    }
+};
+
+}  // namespace Stockfish::Eval::NNUE::Layers
+
+#endif  // NNUE_LAYERS_SQR_CLIPPED_RELU_H_INCLUDED
@@ -0,0 +1,467 @@
+/*
+  Stockfish, a UCI chess playing engine derived from Glaurung 2.1
+  Copyright (C) 2004-2025 The Stockfish developers (see AUTHORS file)
+
+  Stockfish is free software: you can redistribute it and/or modify
+  it under the terms of the GNU General Public License as published by
+  the Free Software Foundation, either version 3 of the License, or
+  (at your option) any later version.
+
+  Stockfish is distributed in the hope that it will be useful,
+  but WITHOUT ANY WARRANTY; without even the implied warranty of
+  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+  GNU General Public License for more details.
+
+  You should have received a copy of the GNU General Public License
+  along with this program.  If not, see <http://www.gnu.org/licenses/>.
+*/
+
+#include "network.h"
+
+#include <cstdlib>
+#include <fstream>
+#include <iostream>
+#include <memory>
+#include <optional>
+#include <type_traits>
+#include <vector>
+
+#define INCBIN_SILENCE_BITCODE_WARNING
+#include "../incbin/incbin.h"
+
+#include "../evaluate.h"
+#include "../memory.h"
+#include "../misc.h"
+#include "../position.h"
+#include "../types.h"
+#include "nnue_architecture.h"
+#include "nnue_common.h"
+#include "nnue_misc.h"
+
+// Macro to embed the default efficiently updatable neural network (NNUE) file
+// data in the engine binary (using incbin.h, by Dale Weiler).
+// This macro invocation will declare the following three variables
+//     const unsigned char        gEmbeddedNNUEData[];  // a pointer to the embedded data
+//     const unsigned char *const gEmbeddedNNUEEnd;     // a marker to the end
+//     const unsigned int         gEmbeddedNNUESize;    // the size of the embedded file
+// Note that this does not work in Microsoft Visual Studio.
+#if !defined(_MSC_VER) && !defined(NNUE_EMBEDDING_OFF)
+INCBIN(EmbeddedNNUEBig, EvalFileDefaultNameBig);
+INCBIN(EmbeddedNNUESmall, EvalFileDefaultNameSmall);
+#else
+const unsigned char        gEmbeddedNNUEBigData[1]   = {0x0};
+const unsigned char* const gEmbeddedNNUEBigEnd       = &gEmbeddedNNUEBigData[1];
+const unsigned int         gEmbeddedNNUEBigSize      = 1;
+const unsigned char        gEmbeddedNNUESmallData[1] = {0x0};
+const unsigned char* const gEmbeddedNNUESmallEnd     = &gEmbeddedNNUESmallData[1];
+const unsigned int         gEmbeddedNNUESmallSize    = 1;
+#endif
+
+namespace {
+
+struct EmbeddedNNUE {
+    EmbeddedNNUE(const unsigned char* embeddedData,
+                 const unsigned char* embeddedEnd,
+                 const unsigned int   embeddedSize) :
+        data(embeddedData),
+        end(embeddedEnd),
+        size(embeddedSize) {}
+    const unsigned char* data;
+    const unsigned char* end;
+    const unsigned int   size;
+};
+
+using namespace Stockfish::Eval::NNUE;
+
+EmbeddedNNUE get_embedded(EmbeddedNNUEType type) {
+    if (type == EmbeddedNNUEType::BIG)
+        return EmbeddedNNUE(gEmbeddedNNUEBigData, gEmbeddedNNUEBigEnd, gEmbeddedNNUEBigSize);
+    else
+        return EmbeddedNNUE(gEmbeddedNNUESmallData, gEmbeddedNNUESmallEnd, gEmbeddedNNUESmallSize);
+}
+
+}
+
+
+namespace Stockfish::Eval::NNUE {
+
+
+namespace Detail {
+
+// Read evaluation function parameters
+template<typename T>
+bool read_parameters(std::istream& stream, T& reference) {
+
+    std::uint32_t header;
+    header = read_little_endian<std::uint32_t>(stream);
+    if (!stream || header != T::get_hash_value())
+        return false;
+    return reference.read_parameters(stream);
+}
+
+// Write evaluation function parameters
+template<typename T>
+bool write_parameters(std::ostream& stream, T& reference) {
+
+    write_little_endian<std::uint32_t>(stream, T::get_hash_value());
+    return reference.write_parameters(stream);
+}
+
+}  // namespace Detail
+
+template<typename Arch, typename Transformer>
+Network<Arch, Transformer>::Network(const Network<Arch, Transformer>& other) :
+    evalFile(other.evalFile),
+    embeddedType(other.embeddedType) {
+
+    if (other.featureTransformer)
+        featureTransformer = make_unique_large_page<Transformer>(*other.featureTransformer);
+
+    network = make_unique_aligned<Arch[]>(LayerStacks);
+
+    if (!other.network)
+        return;
+
+    for (std::size_t i = 0; i < LayerStacks; ++i)
+        network[i] = other.network[i];
+}
+
+template<typename Arch, typename Transformer>
+Network<Arch, Transformer>&
+Network<Arch, Transformer>::operator=(const Network<Arch, Transformer>& other) {
+    evalFile     = other.evalFile;
+    embeddedType = other.embeddedType;
+
+    if (other.featureTransformer)
+        featureTransformer = make_unique_large_page<Transformer>(*other.featureTransformer);
+
+    network = make_unique_aligned<Arch[]>(LayerStacks);
+
+    if (!other.network)
+        return *this;
+
+    for (std::size_t i = 0; i < LayerStacks; ++i)
+        network[i] = other.network[i];
+
+    return *this;
+}
+
+template<typename Arch, typename Transformer>
+void Network<Arch, Transformer>::load(const std::string& rootDirectory, std::string evalfilePath) {
+#if defined(DEFAULT_NNUE_DIRECTORY)
+    std::vector<std::string> dirs = {"<internal>", "", rootDirectory,
+                                     stringify(DEFAULT_NNUE_DIRECTORY)};
+#else
+    std::vector<std::string> dirs = {"<internal>", "", rootDirectory};
+#endif
+
+    if (evalfilePath.empty())
+        evalfilePath = evalFile.defaultName;
+
+    for (const auto& directory : dirs)
+    {
+        if (evalFile.current != evalfilePath)
+        {
+            if (directory != "<internal>")
+            {
+                load_user_net(directory, evalfilePath);
+            }
+
+            if (directory == "<internal>" && evalfilePath == evalFile.defaultName)
+            {
+                load_internal();
+            }
+        }
+    }
+}
+
+
+template<typename Arch, typename Transformer>
+bool Network<Arch, Transformer>::save(const std::optional<std::string>& filename) const {
+    std::string actualFilename;
+    std::string msg;
+
+    if (filename.has_value())
+        actualFilename = filename.value();
+    else
+    {
+        if (evalFile.current != evalFile.defaultName)
+        {
+            msg = "Failed to export a net. "
+                  "A non-embedded net can only be saved if the filename is specified";
+
+            sync_cout << msg << sync_endl;
+            return false;
+        }
+
+        actualFilename = evalFile.defaultName;
+    }
+
+    std::ofstream stream(actualFilename, std::ios_base::binary);
+    bool          saved = save(stream, evalFile.current, evalFile.netDescription);
+
+    msg = saved ? "Network saved successfully to " + actualFilename : "Failed to export a net";
+
+    sync_cout << msg << sync_endl;
+    return saved;
+}
+
+
+template<typename Arch, typename Transformer>
+NetworkOutput
+Network<Arch, Transformer>::evaluate(const Position&                         pos,
+                                     AccumulatorCaches::Cache<FTDimensions>* cache) const {
+    // We manually align the arrays on the stack because with gcc < 9.3
+    // overaligning stack variables with alignas() doesn't work correctly.
+
+    constexpr uint64_t alignment = CacheLineSize;
+
+#if defined(ALIGNAS_ON_STACK_VARIABLES_BROKEN)
+    TransformedFeatureType
+      transformedFeaturesUnaligned[FeatureTransformer<FTDimensions, nullptr>::BufferSize
+                                   + alignment / sizeof(TransformedFeatureType)];
+
+    auto* transformedFeatures = align_ptr_up<alignment>(&transformedFeaturesUnaligned[0]);
+#else
+    alignas(alignment) TransformedFeatureType
+      transformedFeatures[FeatureTransformer<FTDimensions, nullptr>::BufferSize];
+#endif
+
+    ASSERT_ALIGNED(transformedFeatures, alignment);
+
+    const int  bucket     = (pos.count<ALL_PIECES>() - 1) / 4;
+    const auto psqt       = featureTransformer->transform(pos, cache, transformedFeatures, bucket);
+    const auto positional = network[bucket].propagate(transformedFeatures);
+    return {static_cast<Value>(psqt / OutputScale), static_cast<Value>(positional / OutputScale)};
+}
+
+
+template<typename Arch, typename Transformer>
+void Network<Arch, Transformer>::verify(std::string                                  evalfilePath,
+                                        const std::function<void(std::string_view)>& f) const {
+    if (evalfilePath.empty())
+        evalfilePath = evalFile.defaultName;
+
+    if (evalFile.current != evalfilePath)
+    {
+        if (f)
+        {
+            std::string msg1 =
+              "Network evaluation parameters compatible with the engine must be available.";
+            std::string msg2 = "The network file " + evalfilePath + " was not loaded successfully.";
+            std::string msg3 = "The UCI option EvalFile might need to specify the full path, "
+                               "including the directory name, to the network file.";
+            std::string msg4 = "The default net can be downloaded from: "
+                               "https://tests.stockfishchess.org/api/nn/"
+                             + evalFile.defaultName;
+            std::string msg5 = "The engine will be terminated now.";
+
+            std::string msg = "ERROR: " + msg1 + '\n' + "ERROR: " + msg2 + '\n' + "ERROR: " + msg3
+                            + '\n' + "ERROR: " + msg4 + '\n' + "ERROR: " + msg5 + '\n';
+
+            f(msg);
+        }
+
+        exit(EXIT_FAILURE);
+    }
+
+    if (f)
+    {
+        size_t size = sizeof(*featureTransformer) + sizeof(Arch) * LayerStacks;
+        f("info string NNUE evaluation using " + evalfilePath + " ("
+          + std::to_string(size / (1024 * 1024)) + "MiB, ("
+          + std::to_string(featureTransformer->InputDimensions) + ", "
+          + std::to_string(network[0].TransformedFeatureDimensions) + ", "
+          + std::to_string(network[0].FC_0_OUTPUTS) + ", " + std::to_string(network[0].FC_1_OUTPUTS)
+          + ", 1))");
+    }
+}
+
+
+template<typename Arch, typename Transformer>
+void Network<Arch, Transformer>::hint_common_access(
+  const Position& pos, AccumulatorCaches::Cache<FTDimensions>* cache) const {
+    featureTransformer->hint_common_access(pos, cache);
+}
+
+template<typename Arch, typename Transformer>
+NnueEvalTrace
+Network<Arch, Transformer>::trace_evaluate(const Position&                         pos,
+                                           AccumulatorCaches::Cache<FTDimensions>* cache) const {
+    // We manually align the arrays on the stack because with gcc < 9.3
+    // overaligning stack variables with alignas() doesn't work correctly.
+    constexpr uint64_t alignment = CacheLineSize;
+
+#if defined(ALIGNAS_ON_STACK_VARIABLES_BROKEN)
+    TransformedFeatureType
+      transformedFeaturesUnaligned[FeatureTransformer<FTDimensions, nullptr>::BufferSize
+                                   + alignment / sizeof(TransformedFeatureType)];
+
+    auto* transformedFeatures = align_ptr_up<alignment>(&transformedFeaturesUnaligned[0]);
+#else
+    alignas(alignment) TransformedFeatureType
+      transformedFeatures[FeatureTransformer<FTDimensions, nullptr>::BufferSize];
+#endif
+
+    ASSERT_ALIGNED(transformedFeatures, alignment);
+
+    NnueEvalTrace t{};
+    t.correctBucket = (pos.count<ALL_PIECES>() - 1) / 4;
+    for (IndexType bucket = 0; bucket < LayerStacks; ++bucket)
+    {
+        const auto materialist =
+          featureTransformer->transform(pos, cache, transformedFeatures, bucket);
+        const auto positional = network[bucket].propagate(transformedFeatures);
+
+        t.psqt[bucket]       = static_cast<Value>(materialist / OutputScale);
+        t.positional[bucket] = static_cast<Value>(positional / OutputScale);
+    }
+
+    return t;
+}
+
+
+template<typename Arch, typename Transformer>
+void Network<Arch, Transformer>::load_user_net(const std::string& dir,
+                                               const std::string& evalfilePath) {
+    std::ifstream stream(dir + evalfilePath, std::ios::binary);
+    auto          description = load(stream);
+
+    if (description.has_value())
+    {
+        evalFile.current        = evalfilePath;
+        evalFile.netDescription = description.value();
+    }
+}
+
+
+template<typename Arch, typename Transformer>
+void Network<Arch, Transformer>::load_internal() {
+    // C++ way to prepare a buffer for a memory stream
+    class MemoryBuffer: public std::basic_streambuf<char> {
+       public:
+        MemoryBuffer(char* p, size_t n) {
+            setg(p, p, p + n);
+            setp(p, p + n);
+        }
+    };
+
+    const auto embedded = get_embedded(embeddedType);
+
+    MemoryBuffer buffer(const_cast<char*>(reinterpret_cast<const char*>(embedded.data)),
+                        size_t(embedded.size));
+
+    std::istream stream(&buffer);
+    auto         description = load(stream);
+
+    if (description.has_value())
+    {
+        evalFile.current        = evalFile.defaultName;
+        evalFile.netDescription = description.value();
+    }
+}
+
+
+template<typename Arch, typename Transformer>
+void Network<Arch, Transformer>::initialize() {
+    featureTransformer = make_unique_large_page<Transformer>();
+    network            = make_unique_aligned<Arch[]>(LayerStacks);
+}
+
+
+template<typename Arch, typename Transformer>
+bool Network<Arch, Transformer>::save(std::ostream&      stream,
+                                      const std::string& name,
+                                      const std::string& netDescription) const {
+    if (name.empty() || name == "None")
+        return false;
+
+    return write_parameters(stream, netDescription);
+}
+
+
+template<typename Arch, typename Transformer>
+std::optional<std::string> Network<Arch, Transformer>::load(std::istream& stream) {
+    initialize();
+    std::string description;
+
+    return read_parameters(stream, description) ? std::make_optional(description) : std::nullopt;
+}
+
+
+// Read network header
+template<typename Arch, typename Transformer>
+bool Network<Arch, Transformer>::read_header(std::istream&  stream,
+                                             std::uint32_t* hashValue,
+                                             std::string*   desc) const {
+    std::uint32_t version, size;
+
+    version    = read_little_endian<std::uint32_t>(stream);
+    *hashValue = read_little_endian<std::uint32_t>(stream);
+    size       = read_little_endian<std::uint32_t>(stream);
+    if (!stream || version != Version)
+        return false;
+    desc->resize(size);
+    stream.read(&(*desc)[0], size);
+    return !stream.fail();
+}
+
+
+// Write network header
+template<typename Arch, typename Transformer>
+bool Network<Arch, Transformer>::write_header(std::ostream&      stream,
+                                              std::uint32_t      hashValue,
+                                              const std::string& desc) const {
+    write_little_endian<std::uint32_t>(stream, Version);
+    write_little_endian<std::uint32_t>(stream, hashValue);
+    write_little_endian<std::uint32_t>(stream, std::uint32_t(desc.size()));
+    stream.write(&desc[0], desc.size());
+    return !stream.fail();
+}
+
+
+template<typename Arch, typename Transformer>
+bool Network<Arch, Transformer>::read_parameters(std::istream& stream,
+                                                 std::string&  netDescription) const {
+    std::uint32_t hashValue;
+    if (!read_header(stream, &hashValue, &netDescription))
+        return false;
+    if (hashValue != Network::hash)
+        return false;
+    if (!Detail::read_parameters(stream, *featureTransformer))
+        return false;
+    for (std::size_t i = 0; i < LayerStacks; ++i)
+    {
+        if (!Detail::read_parameters(stream, network[i]))
+            return false;
+    }
+    return stream && stream.peek() == std::ios::traits_type::eof();
+}
+
+
+template<typename Arch, typename Transformer>
+bool Network<Arch, Transformer>::write_parameters(std::ostream&      stream,
+                                                  const std::string& netDescription) const {
+    if (!write_header(stream, Network::hash, netDescription))
+        return false;
+    if (!Detail::write_parameters(stream, *featureTransformer))
+        return false;
+    for (std::size_t i = 0; i < LayerStacks; ++i)
+    {
+        if (!Detail::write_parameters(stream, network[i]))
+            return false;
+    }
+    return bool(stream);
+}
+
+// Explicit template instantiation
+
+template class Network<
+  NetworkArchitecture<TransformedFeatureDimensionsBig, L2Big, L3Big>,
+  FeatureTransformer<TransformedFeatureDimensionsBig, &StateInfo::accumulatorBig>>;
+
+template class Network<
+  NetworkArchitecture<TransformedFeatureDimensionsSmall, L2Small, L3Small>,
+  FeatureTransformer<TransformedFeatureDimensionsSmall, &StateInfo::accumulatorSmall>>;
+
+}  // namespace Stockfish::Eval::NNUE
@@ -0,0 +1,134 @@
+/*
+  Stockfish, a UCI chess playing engine derived from Glaurung 2.1
+  Copyright (C) 2004-2025 The Stockfish developers (see AUTHORS file)
+
+  Stockfish is free software: you can redistribute it and/or modify
+  it under the terms of the GNU General Public License as published by
+  the Free Software Foundation, either version 3 of the License, or
+  (at your option) any later version.
+
+  Stockfish is distributed in the hope that it will be useful,
+  but WITHOUT ANY WARRANTY; without even the implied warranty of
+  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+  GNU General Public License for more details.
+
+  You should have received a copy of the GNU General Public License
+  along with this program.  If not, see <http://www.gnu.org/licenses/>.
+*/
+
+#ifndef NETWORK_H_INCLUDED
+#define NETWORK_H_INCLUDED
+
+#include <cstdint>
+#include <functional>
+#include <iostream>
+#include <optional>
+#include <string>
+#include <string_view>
+#include <tuple>
+#include <utility>
+
+#include "../memory.h"
+#include "../position.h"
+#include "../types.h"
+#include "nnue_accumulator.h"
+#include "nnue_architecture.h"
+#include "nnue_feature_transformer.h"
+#include "nnue_misc.h"
+
+namespace Stockfish::Eval::NNUE {
+
+enum class EmbeddedNNUEType {
+    BIG,
+    SMALL,
+};
+
+using NetworkOutput = std::tuple<Value, Value>;
+
+template<typename Arch, typename Transformer>
+class Network {
+    static constexpr IndexType FTDimensions = Arch::TransformedFeatureDimensions;
+
+   public:
+    Network(EvalFile file, EmbeddedNNUEType type) :
+        evalFile(file),
+        embeddedType(type) {}
+
+    Network(const Network& other);
+    Network(Network&& other) = default;
+
+    Network& operator=(const Network& other);
+    Network& operator=(Network&& other) = default;
+
+    void load(const std::string& rootDirectory, std::string evalfilePath);
+    bool save(const std::optional<std::string>& filename) const;
+
+    NetworkOutput evaluate(const Position&                         pos,
+                           AccumulatorCaches::Cache<FTDimensions>* cache) const;
+
+
+    void hint_common_access(const Position&                         pos,
+                            AccumulatorCaches::Cache<FTDimensions>* cache) const;
+
+    void verify(std::string evalfilePath, const std::function<void(std::string_view)>&) const;
+    NnueEvalTrace trace_evaluate(const Position&                         pos,
+                                 AccumulatorCaches::Cache<FTDimensions>* cache) const;
+
+   private:
+    void load_user_net(const std::string&, const std::string&);
+    void load_internal();
+
+    void initialize();
+
+    bool                       save(std::ostream&, const std::string&, const std::string&) const;
+    std::optional<std::string> load(std::istream&);
+
+    bool read_header(std::istream&, std::uint32_t*, std::string*) const;
+    bool write_header(std::ostream&, std::uint32_t, const std::string&) const;
+
+    bool read_parameters(std::istream&, std::string&) const;
+    bool write_parameters(std::ostream&, const std::string&) const;
+
+    // Input feature converter
+    LargePagePtr<Transformer> featureTransformer;
+
+    // Evaluation function
+    AlignedPtr<Arch[]> network;
+
+    EvalFile         evalFile;
+    EmbeddedNNUEType embeddedType;
+
+    // Hash value of evaluation function structure
+    static constexpr std::uint32_t hash = Transformer::get_hash_value() ^ Arch::get_hash_value();
+
+    template<IndexType Size>
+    friend struct AccumulatorCaches::Cache;
+};
+
+// Definitions of the network types
+using SmallFeatureTransformer =
+  FeatureTransformer<TransformedFeatureDimensionsSmall, &StateInfo::accumulatorSmall>;
+using SmallNetworkArchitecture =
+  NetworkArchitecture<TransformedFeatureDimensionsSmall, L2Small, L3Small>;
+
+using BigFeatureTransformer =
+  FeatureTransformer<TransformedFeatureDimensionsBig, &StateInfo::accumulatorBig>;
+using BigNetworkArchitecture = NetworkArchitecture<TransformedFeatureDimensionsBig, L2Big, L3Big>;
+
+using NetworkBig   = Network<BigNetworkArchitecture, BigFeatureTransformer>;
+using NetworkSmall = Network<SmallNetworkArchitecture, SmallFeatureTransformer>;
+
+
+struct Networks {
+    Networks(NetworkBig&& nB, NetworkSmall&& nS) :
+        big(std::move(nB)),
+        small(std::move(nS)) {}
+
+    NetworkBig   big;
+    NetworkSmall small;
+};
+
+
+}  // namespace Stockfish
+
+#endif
@@ -1,6 +1,6 @@
 /*
  Stockfish, a UCI chess playing engine derived from Glaurung 2.1
-  Copyright (C) 2004-2021 The Stockfish developers (see AUTHORS file)
+  Copyright (C) 2004-2025 The Stockfish developers (see AUTHORS file)

  Stockfish is free software: you can redistribute it and/or modify
  it under the terms of the GNU General Public License as published by
@@ -21,20 +21,80 @@
 #ifndef NNUE_ACCUMULATOR_H_INCLUDED
 #define NNUE_ACCUMULATOR_H_INCLUDED

+#include <cstdint>
+
 #include "nnue_architecture.h"
+#include "nnue_common.h"

-namespace Eval::NNUE {
+namespace Stockfish::Eval::NNUE {

-  // The accumulator of a StateInfo without parent is set to the INIT state
-  enum AccumulatorState { EMPTY, COMPUTED, INIT };
+using BiasType       = std::int16_t;
+using PSQTWeightType = std::int32_t;
+using IndexType      = std::uint32_t;

-  // Class that holds the result of affine transformation of input features
-  struct alignas(kCacheLineSize) Accumulator {
-    std::int16_t
-        accumulation[2][kRefreshTriggers.size()][kTransformedFeatureDimensions];
-    AccumulatorState state[2];
-  };
+// Class that holds the result of affine transformation of input features
+template<IndexType Size>
+struct alignas(CacheLineSize) Accumulator {
+    std::int16_t accumulation[COLOR_NB][Size];
+    std::int32_t psqtAccumulation[COLOR_NB][PSQTBuckets];
+    bool         computed[COLOR_NB];
+};

-}  // namespace Eval::NNUE

-#endif // NNUE_ACCUMULATOR_H_INCLUDED
+// AccumulatorCaches struct provides per-thread accumulator caches, where each
+// cache contains multiple entries for each of the possible king squares.
+// When the accumulator needs to be refreshed, the cached entry is used to more
+// efficiently update the accumulator, instead of rebuilding it from scratch.
+// This idea, was first described by Luecx (author of Koivisto) and
+// is commonly referred to as "Finny Tables".
+struct AccumulatorCaches {
+
+    template<typename Networks>
+    AccumulatorCaches(const Networks& networks) {
+        clear(networks);
+    }
+
+    template<IndexType Size>
+    struct alignas(CacheLineSize) Cache {
+
+        struct alignas(CacheLineSize) Entry {
+            BiasType       accumulation[Size];
+            PSQTWeightType psqtAccumulation[PSQTBuckets];
+            Bitboard       byColorBB[COLOR_NB];
+            Bitboard       byTypeBB[PIECE_TYPE_NB];
+
+            // To initialize a refresh entry, we set all its bitboards empty,
+            // so we put the biases in the accumulation, without any weights on top
+            void clear(const BiasType* biases) {
+
+                std::memcpy(accumulation, biases, sizeof(accumulation));
+                std::memset((uint8_t*) this + offsetof(Entry, psqtAccumulation), 0,
+                            sizeof(Entry) - offsetof(Entry, psqtAccumulation));
+            }
+        };
+
+        template<typename Network>
+        void clear(const Network& network) {
+            for (auto& entries1D : entries)
+                for (auto& entry : entries1D)
+                    entry.clear(network.featureTransformer->biases);
+        }
+
+        std::array<Entry, COLOR_NB>& operator[](Square sq) { return entries[sq]; }
+
+        std::array<std::array<Entry, COLOR_NB>, SQUARE_NB> entries;
+    };
+
+    template<typename Networks>
+    void clear(const Networks& networks) {
+        big.clear(networks.big);
+        small.clear(networks.small);
+    }
+
+    Cache<TransformedFeatureDimensionsBig>   big;
+    Cache<TransformedFeatureDimensionsSmall> small;
+};
+
+}  // namespace Stockfish::Eval::NNUE
+
+#endif  // NNUE_ACCUMULATOR_H_INCLUDED
@@ -1,6 +1,6 @@
 /*
  Stockfish, a UCI chess playing engine derived from Glaurung 2.1
-  Copyright (C) 2004-2021 The Stockfish developers (see AUTHORS file)
+  Copyright (C) 2004-2025 The Stockfish developers (see AUTHORS file)

  Stockfish is free software: you can redistribute it and/or modify
  it under the terms of the GNU General Public License as published by
@@ -21,18 +21,117 @@
 #ifndef NNUE_ARCHITECTURE_H_INCLUDED
 #define NNUE_ARCHITECTURE_H_INCLUDED

-// Defines the network structure
-#include "architectures/halfkp_256x2-32-32.h"
+#include <cstdint>
+#include <cstring>
+#include <iosfwd>

-namespace Eval::NNUE {
+#include "features/half_ka_v2_hm.h"
+#include "layers/affine_transform.h"
+#include "layers/affine_transform_sparse_input.h"
+#include "layers/clipped_relu.h"
+#include "layers/sqr_clipped_relu.h"
+#include "nnue_common.h"

-  static_assert(kTransformedFeatureDimensions % kMaxSimdWidth == 0, "");
-  static_assert(Network::kOutputDimensions == 1, "");
-  static_assert(std::is_same<Network::OutputType, std::int32_t>::value, "");
+namespace Stockfish::Eval::NNUE {

-  // Trigger for full calculation instead of difference calculation
-  constexpr auto kRefreshTriggers = RawFeatures::kRefreshTriggers;
+// Input features used in evaluation function
+using FeatureSet = Features::HalfKAv2_hm;

-}  // namespace Eval::NNUE
+// Number of input feature dimensions after conversion
+constexpr IndexType TransformedFeatureDimensionsBig = 3072;
+constexpr int       L2Big                           = 15;
+constexpr int       L3Big                           = 32;

-#endif // #ifndef NNUE_ARCHITECTURE_H_INCLUDED
+constexpr IndexType TransformedFeatureDimensionsSmall = 128;
+constexpr int       L2Small                           = 15;
+constexpr int       L3Small                           = 32;
+
+constexpr IndexType PSQTBuckets = 8;
+constexpr IndexType LayerStacks = 8;
+
+template<IndexType L1, int L2, int L3>
+struct NetworkArchitecture {
+    static constexpr IndexType TransformedFeatureDimensions = L1;
+    static constexpr int       FC_0_OUTPUTS                 = L2;
+    static constexpr int       FC_1_OUTPUTS                 = L3;
+
+    Layers::AffineTransformSparseInput<TransformedFeatureDimensions, FC_0_OUTPUTS + 1> fc_0;
+    Layers::SqrClippedReLU<FC_0_OUTPUTS + 1>                                           ac_sqr_0;
+    Layers::ClippedReLU<FC_0_OUTPUTS + 1>                                              ac_0;
+    Layers::AffineTransform<FC_0_OUTPUTS * 2, FC_1_OUTPUTS>                            fc_1;
+    Layers::ClippedReLU<FC_1_OUTPUTS>                                                  ac_1;
+    Layers::AffineTransform<FC_1_OUTPUTS, 1>                                           fc_2;
+
+    // Hash value embedded in the evaluation file
+    static constexpr std::uint32_t get_hash_value() {
+        // input slice hash
+        std::uint32_t hashValue = 0xEC42E90Du;
+        hashValue ^= TransformedFeatureDimensions * 2;
+
+        hashValue = decltype(fc_0)::get_hash_value(hashValue);
+        hashValue = decltype(ac_0)::get_hash_value(hashValue);
+        hashValue = decltype(fc_1)::get_hash_value(hashValue);
+        hashValue = decltype(ac_1)::get_hash_value(hashValue);
+        hashValue = decltype(fc_2)::get_hash_value(hashValue);
+
+        return hashValue;
+    }
+
+    // Read network parameters
+    bool read_parameters(std::istream& stream) {
+        return fc_0.read_parameters(stream) && ac_0.read_parameters(stream)
+            && fc_1.read_parameters(stream) && ac_1.read_parameters(stream)
+            && fc_2.read_parameters(stream);
+    }
+
+    // Write network parameters
+    bool write_parameters(std::ostream& stream) const {
+        return fc_0.write_parameters(stream) && ac_0.write_parameters(stream)
+            && fc_1.write_parameters(stream) && ac_1.write_parameters(stream)
+            && fc_2.write_parameters(stream);
+    }
+
+    std::int32_t propagate(const TransformedFeatureType* transformedFeatures) {
+        struct alignas(CacheLineSize) Buffer {
+            alignas(CacheLineSize) typename decltype(fc_0)::OutputBuffer fc_0_out;
+            alignas(CacheLineSize) typename decltype(ac_sqr_0)::OutputType
+              ac_sqr_0_out[ceil_to_multiple<IndexType>(FC_0_OUTPUTS * 2, 32)];
+            alignas(CacheLineSize) typename decltype(ac_0)::OutputBuffer ac_0_out;
+            alignas(CacheLineSize) typename decltype(fc_1)::OutputBuffer fc_1_out;
+            alignas(CacheLineSize) typename decltype(ac_1)::OutputBuffer ac_1_out;
+            alignas(CacheLineSize) typename decltype(fc_2)::OutputBuffer fc_2_out;
+
+            Buffer() { std::memset(this, 0, sizeof(*this)); }
+        };
+
+#if defined(__clang__) && (__APPLE__)
+        // workaround for a bug reported with xcode 12
+        static thread_local auto tlsBuffer = std::make_unique<Buffer>();
+        // Access TLS only once, cache result.
+        Buffer& buffer = *tlsBuffer;
+#else
+        alignas(CacheLineSize) static thread_local Buffer buffer;
+#endif
+
+        fc_0.propagate(transformedFeatures, buffer.fc_0_out);
+        ac_sqr_0.propagate(buffer.fc_0_out, buffer.ac_sqr_0_out);
+        ac_0.propagate(buffer.fc_0_out, buffer.ac_0_out);
+        std::memcpy(buffer.ac_sqr_0_out + FC_0_OUTPUTS, buffer.ac_0_out,
+                    FC_0_OUTPUTS * sizeof(typename decltype(ac_0)::OutputType));
+        fc_1.propagate(buffer.ac_sqr_0_out, buffer.fc_1_out);
+        ac_1.propagate(buffer.fc_1_out, buffer.ac_1_out);
+        fc_2.propagate(buffer.ac_1_out, buffer.fc_2_out);
+
+        // buffer.fc_0_out[FC_0_OUTPUTS] is such that 1.0 is equal to 127*(1<<WeightScaleBits) in
+        // quantized form, but we want 1.0 to be equal to 600*OutputScale
+        std::int32_t fwdOut =
+          (buffer.fc_0_out[FC_0_OUTPUTS]) * (600 * OutputScale) / (127 * (1 << WeightScaleBits));
+        std::int32_t outputValue = buffer.fc_2_out[0] + fwdOut;
+
+        return outputValue;
+    }
+};
+
+}  // namespace Stockfish::Eval::NNUE
+
+#endif  // #ifndef NNUE_ARCHITECTURE_H_INCLUDED
@@ -1,6 +1,6 @@
 /*
  Stockfish, a UCI chess playing engine derived from Glaurung 2.1
-  Copyright (C) 2004-2021 The Stockfish developers (see AUTHORS file)
+  Copyright (C) 2004-2025 The Stockfish developers (see AUTHORS file)

  Stockfish is free software: you can redistribute it and/or modify
  it under the terms of the GNU General Public License as published by
@@ -21,112 +21,264 @@
 #ifndef NNUE_COMMON_H_INCLUDED
 #define NNUE_COMMON_H_INCLUDED

+#include <algorithm>
+#include <cassert>
+#include <cstdint>
 #include <cstring>
 #include <iostream>
+#include <type_traits>
+
+#include "../misc.h"

 #if defined(USE_AVX2)
-#include <immintrin.h>
+    #include <immintrin.h>

 #elif defined(USE_SSE41)
-#include <smmintrin.h>
+    #include <smmintrin.h>

 #elif defined(USE_SSSE3)
-#include <tmmintrin.h>
+    #include <tmmintrin.h>

 #elif defined(USE_SSE2)
-#include <emmintrin.h>
-
-#elif defined(USE_MMX)
-#include <mmintrin.h>
+    #include <emmintrin.h>

 #elif defined(USE_NEON)
-#include <arm_neon.h>
+    #include <arm_neon.h>
 #endif

-namespace Eval::NNUE {
+namespace Stockfish::Eval::NNUE {

-  // Version of the evaluation file
-  constexpr std::uint32_t kVersion = 0x7AF32F16u;
+// Version of the evaluation file
+constexpr std::uint32_t Version = 0x7AF32F20u;

-  // Constant used in evaluation value calculation
-  constexpr int FV_SCALE = 16;
-  constexpr int kWeightScaleBits = 6;
+// Constant used in evaluation value calculation
+constexpr int OutputScale     = 16;
+constexpr int WeightScaleBits = 6;

-  // Size of cache line (in bytes)
-  constexpr std::size_t kCacheLineSize = 64;
+// Size of cache line (in bytes)
+constexpr std::size_t CacheLineSize = 64;

-  // SIMD width (in bytes)
-  #if defined(USE_AVX2)
-  constexpr std::size_t kSimdWidth = 32;
+constexpr const char        Leb128MagicString[]   = "COMPRESSED_LEB128";
+constexpr const std::size_t Leb128MagicStringSize = sizeof(Leb128MagicString) - 1;

-  #elif defined(USE_SSE2)
-  constexpr std::size_t kSimdWidth = 16;
+// SIMD width (in bytes)
+#if defined(USE_AVX2)
+constexpr std::size_t SimdWidth = 32;

-  #elif defined(USE_MMX)
-  constexpr std::size_t kSimdWidth = 8;
+#elif defined(USE_SSE2)
+constexpr std::size_t SimdWidth = 16;

-  #elif defined(USE_NEON)
-  constexpr std::size_t kSimdWidth = 16;
-  #endif
+#elif defined(USE_NEON)
+constexpr std::size_t SimdWidth = 16;
+#endif

-  constexpr std::size_t kMaxSimdWidth = 32;
+constexpr std::size_t MaxSimdWidth = 32;

-  // unique number for each piece type on each square
-  enum {
-    PS_NONE     =  0,
-    PS_W_PAWN   =  1,
-    PS_B_PAWN   =  1 * SQUARE_NB + 1,
-    PS_W_KNIGHT =  2 * SQUARE_NB + 1,
-    PS_B_KNIGHT =  3 * SQUARE_NB + 1,
-    PS_W_BISHOP =  4 * SQUARE_NB + 1,
-    PS_B_BISHOP =  5 * SQUARE_NB + 1,
-    PS_W_ROOK   =  6 * SQUARE_NB + 1,
-    PS_B_ROOK   =  7 * SQUARE_NB + 1,
-    PS_W_QUEEN  =  8 * SQUARE_NB + 1,
-    PS_B_QUEEN  =  9 * SQUARE_NB + 1,
-    PS_W_KING   = 10 * SQUARE_NB + 1,
-    PS_END      = PS_W_KING, // pieces without kings (pawns included)
-    PS_B_KING   = 11 * SQUARE_NB + 1,
-    PS_END2     = 12 * SQUARE_NB + 1
-  };
+// Type of input feature after conversion
+using TransformedFeatureType = std::uint8_t;
+using IndexType              = std::uint32_t;

-  constexpr uint32_t kpp_board_index[COLOR_NB][PIECE_NB] = {
-    // convention: W - us, B - them
-    // viewed from other side, W and B are reversed
-    { PS_NONE, PS_W_PAWN, PS_W_KNIGHT, PS_W_BISHOP, PS_W_ROOK, PS_W_QUEEN, PS_W_KING, PS_NONE,
-      PS_NONE, PS_B_PAWN, PS_B_KNIGHT, PS_B_BISHOP, PS_B_ROOK, PS_B_QUEEN, PS_B_KING, PS_NONE },
-    { PS_NONE, PS_B_PAWN, PS_B_KNIGHT, PS_B_BISHOP, PS_B_ROOK, PS_B_QUEEN, PS_B_KING, PS_NONE,
-      PS_NONE, PS_W_PAWN, PS_W_KNIGHT, PS_W_BISHOP, PS_W_ROOK, PS_W_QUEEN, PS_W_KING, PS_NONE }
-  };
+// Round n up to be a multiple of base
+template<typename IntType>
+constexpr IntType ceil_to_multiple(IntType n, IntType base) {
+    return (n + base - 1) / base * base;
+}

-  // Type of input feature after conversion
-  using TransformedFeatureType = std::uint8_t;
-  using IndexType = std::uint32_t;

-  // Round n up to be a multiple of base
-  template <typename IntType>
-  constexpr IntType CeilToMultiple(IntType n, IntType base) {
-      return (n + base - 1) / base * base;
-  }
+// Utility to read an integer (signed or unsigned, any size)
+// from a stream in little-endian order. We swap the byte order after the read if
+// necessary to return a result with the byte ordering of the compiling machine.
+template<typename IntType>
+inline IntType read_little_endian(std::istream& stream) {
+    IntType result;

-  // read_little_endian() is our utility to read an integer (signed or unsigned, any size)
-  // from a stream in little-endian order. We swap the byte order after the read if
-  // necessary to return a result with the byte ordering of the compiling machine.
-  template <typename IntType>
-  inline IntType read_little_endian(std::istream& stream) {
+    if (IsLittleEndian)
+        stream.read(reinterpret_cast<char*>(&result), sizeof(IntType));
+    else
+    {
+        std::uint8_t                  u[sizeof(IntType)];
+        std::make_unsigned_t<IntType> v = 0;

-      IntType result;
-      std::uint8_t u[sizeof(IntType)];
-      typename std::make_unsigned<IntType>::type v = 0;
+        stream.read(reinterpret_cast<char*>(u), sizeof(IntType));
+        for (std::size_t i = 0; i < sizeof(IntType); ++i)
+            v = (v << 8) | u[sizeof(IntType) - i - 1];

-      stream.read(reinterpret_cast<char*>(u), sizeof(IntType));
-      for (std::size_t i = 0; i < sizeof(IntType); ++i)
-          v = (v << 8) | u[sizeof(IntType) - i - 1];
+        std::memcpy(&result, &v, sizeof(IntType));
+    }

-      std::memcpy(&result, &v, sizeof(IntType));
-      return result;
-  }
+    return result;
+}

-}  // namespace Eval::NNUE

-#endif // #ifndef NNUE_COMMON_H_INCLUDED
+// Utility to write an integer (signed or unsigned, any size)
+// to a stream in little-endian order. We swap the byte order before the write if
+// necessary to always write in little-endian order, independently of the byte
+// ordering of the compiling machine.
+template<typename IntType>
+inline void write_little_endian(std::ostream& stream, IntType value) {
+
+    if (IsLittleEndian)
+        stream.write(reinterpret_cast<const char*>(&value), sizeof(IntType));
+    else
+    {
+        std::uint8_t                  u[sizeof(IntType)];
+        std::make_unsigned_t<IntType> v = value;
+
+        std::size_t i = 0;
+        // if constexpr to silence the warning about shift by 8
+        if constexpr (sizeof(IntType) > 1)
+        {
+            for (; i + 1 < sizeof(IntType); ++i)
+            {
+                u[i] = std::uint8_t(v);
+                v >>= 8;
+            }
+        }
+        u[i] = std::uint8_t(v);
+
+        stream.write(reinterpret_cast<char*>(u), sizeof(IntType));
+    }
+}
+
+
+// Read integers in bulk from a little-endian stream.
+// This reads N integers from stream s and puts them in array out.
+template<typename IntType>
+inline void read_little_endian(std::istream& stream, IntType* out, std::size_t count) {
+    if (IsLittleEndian)
+        stream.read(reinterpret_cast<char*>(out), sizeof(IntType) * count);
+    else
+        for (std::size_t i = 0; i < count; ++i)
+            out[i] = read_little_endian<IntType>(stream);
+}
+
+
+// Write integers in bulk to a little-endian stream.
+// This takes N integers from array values and writes them on stream s.
+template<typename IntType>
+inline void write_little_endian(std::ostream& stream, const IntType* values, std::size_t count) {
+    if (IsLittleEndian)
+        stream.write(reinterpret_cast<const char*>(values), sizeof(IntType) * count);
+    else
+        for (std::size_t i = 0; i < count; ++i)
+            write_little_endian<IntType>(stream, values[i]);
+}
+
+
+// Read N signed integers from the stream s, putting them in the array out.
+// The stream is assumed to be compressed using the signed LEB128 format.
+// See https://en.wikipedia.org/wiki/LEB128 for a description of the compression scheme.
+template<typename IntType>
+inline void read_leb_128(std::istream& stream, IntType* out, std::size_t count) {
+
+    // Check the presence of our LEB128 magic string
+    char leb128MagicString[Leb128MagicStringSize];
+    stream.read(leb128MagicString, Leb128MagicStringSize);
+    assert(strncmp(Leb128MagicString, leb128MagicString, Leb128MagicStringSize) == 0);
+
+    static_assert(std::is_signed_v<IntType>, "Not implemented for unsigned types");
+
+    const std::uint32_t BUF_SIZE = 4096;
+    std::uint8_t        buf[BUF_SIZE];
+
+    auto bytes_left = read_little_endian<std::uint32_t>(stream);
+
+    std::uint32_t buf_pos = BUF_SIZE;
+    for (std::size_t i = 0; i < count; ++i)
+    {
+        IntType result = 0;
+        size_t  shift  = 0;
+        do
+        {
+            if (buf_pos == BUF_SIZE)
+            {
+                stream.read(reinterpret_cast<char*>(buf), std::min(bytes_left, BUF_SIZE));
+                buf_pos = 0;
+            }
+
+            std::uint8_t byte = buf[buf_pos++];
+            --bytes_left;
+            result |= (byte & 0x7f) << shift;
+            shift += 7;
+
+            if ((byte & 0x80) == 0)
+            {
+                out[i] = (sizeof(IntType) * 8 <= shift || (byte & 0x40) == 0)
+                         ? result
+                         : result | ~((1 << shift) - 1);
+                break;
+            }
+        } while (shift < sizeof(IntType) * 8);
+    }
+
+    assert(bytes_left == 0);
+}
+
+
+// Write signed integers to a stream with LEB128 compression.
+// This takes N integers from array values, compresses them with
+// the LEB128 algorithm and writes the result on the stream s.
+// See https://en.wikipedia.org/wiki/LEB128 for a description of the compression scheme.
+template<typename IntType>
+inline void write_leb_128(std::ostream& stream, const IntType* values, std::size_t count) {
+
+    // Write our LEB128 magic string
+    stream.write(Leb128MagicString, Leb128MagicStringSize);
+
+    static_assert(std::is_signed_v<IntType>, "Not implemented for unsigned types");
+
+    std::uint32_t byte_count = 0;
+    for (std::size_t i = 0; i < count; ++i)
+    {
+        IntType      value = values[i];
+        std::uint8_t byte;
+        do
+        {
+            byte = value & 0x7f;
+            value >>= 7;
+            ++byte_count;
+        } while ((byte & 0x40) == 0 ? value != 0 : value != -1);
+    }
+
+    write_little_endian(stream, byte_count);
+
+    const std::uint32_t BUF_SIZE = 4096;
+    std::uint8_t        buf[BUF_SIZE];
+    std::uint32_t       buf_pos = 0;
+
+    auto flush = [&]() {
+        if (buf_pos > 0)
+        {
+            stream.write(reinterpret_cast<char*>(buf), buf_pos);
+            buf_pos = 0;
+        }
+    };
+
+    auto write = [&](std::uint8_t byte) {
+        buf[buf_pos++] = byte;
+        if (buf_pos == BUF_SIZE)
+            flush();
+    };
+
+    for (std::size_t i = 0; i < count; ++i)
+    {
+        IntType value = values[i];
+        while (true)
+        {
+            std::uint8_t byte = value & 0x7f;
+            value >>= 7;
+            if ((byte & 0x40) == 0 ? value == 0 : value == -1)
+            {
+                write(byte);
+                break;
+            }
+            write(byte | 0x80);
+        }
+    }
+
+    flush();
+}
+
+}  // namespace Stockfish::Eval::NNUE
+
+#endif  // #ifndef NNUE_COMMON_H_INCLUDED
@@ -0,0 +1,203 @@
+/*
+  Stockfish, a UCI chess playing engine derived from Glaurung 2.1
+  Copyright (C) 2004-2025 The Stockfish developers (see AUTHORS file)
+
+  Stockfish is free software: you can redistribute it and/or modify
+  it under the terms of the GNU General Public License as published by
+  the Free Software Foundation, either version 3 of the License, or
+  (at your option) any later version.
+
+  Stockfish is distributed in the hope that it will be useful,
+  but WITHOUT ANY WARRANTY; without even the implied warranty of
+  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+  GNU General Public License for more details.
+
+  You should have received a copy of the GNU General Public License
+  along with this program.  If not, see <http://www.gnu.org/licenses/>.
+*/
+
+// Code for calculating NNUE evaluation function
+
+#include "nnue_misc.h"
+
+#include <cmath>
+#include <cstdlib>
+#include <cstring>
+#include <iomanip>
+#include <iosfwd>
+#include <iostream>
+#include <sstream>
+#include <string_view>
+#include <tuple>
+
+#include "../evaluate.h"
+#include "../position.h"
+#include "../types.h"
+#include "../uci.h"
+#include "network.h"
+#include "nnue_accumulator.h"
+
+namespace Stockfish::Eval::NNUE {
+
+
+constexpr std::string_view PieceToChar(" PNBRQK  pnbrqk");
+
+
+void hint_common_parent_position(const Position&    pos,
+                                 const Networks&    networks,
+                                 AccumulatorCaches& caches) {
+    if (Eval::use_smallnet(pos))
+        networks.small.hint_common_access(pos, &caches.small);
+    else
+        networks.big.hint_common_access(pos, &caches.big);
+}
+
+namespace {
+// Converts a Value into (centi)pawns and writes it in a buffer.
+// The buffer must have capacity for at least 5 chars.
+void format_cp_compact(Value v, char* buffer, const Position& pos) {
+
+    buffer[0] = (v < 0 ? '-' : v > 0 ? '+' : ' ');
+
+    int cp = std::abs(UCIEngine::to_cp(v, pos));
+    if (cp >= 10000)
+    {
+        buffer[1] = '0' + cp / 10000;
+        cp %= 10000;
+        buffer[2] = '0' + cp / 1000;
+        cp %= 1000;
+        buffer[3] = '0' + cp / 100;
+        buffer[4] = ' ';
+    }
+    else if (cp >= 1000)
+    {
+        buffer[1] = '0' + cp / 1000;
+        cp %= 1000;
+        buffer[2] = '0' + cp / 100;
+        cp %= 100;
+        buffer[3] = '.';
+        buffer[4] = '0' + cp / 10;
+    }
+    else
+    {
+        buffer[1] = '0' + cp / 100;
+        cp %= 100;
+        buffer[2] = '.';
+        buffer[3] = '0' + cp / 10;
+        cp %= 10;
+        buffer[4] = '0' + cp / 1;
+    }
+}
+
+
+// Converts a Value into pawns, always keeping two decimals
+void format_cp_aligned_dot(Value v, std::stringstream& stream, const Position& pos) {
+
+    const double pawns = std::abs(0.01 * UCIEngine::to_cp(v, pos));
+
+    stream << (v < 0   ? '-'
+               : v > 0 ? '+'
+                       : ' ')
+           << std::setiosflags(std::ios::fixed) << std::setw(6) << std::setprecision(2) << pawns;
+}
+}
+
+
+// Returns a string with the value of each piece on a board,
+// and a table for (PSQT, Layers) values bucket by bucket.
+std::string
+trace(Position& pos, const Eval::NNUE::Networks& networks, Eval::NNUE::AccumulatorCaches& caches) {
+
+    std::stringstream ss;
+
+    char board[3 * 8 + 1][8 * 8 + 2];
+    std::memset(board, ' ', sizeof(board));
+    for (int row = 0; row < 3 * 8 + 1; ++row)
+        board[row][8 * 8 + 1] = '\0';
+
+    // A lambda to output one box of the board
+    auto writeSquare = [&board, &pos](File file, Rank rank, Piece pc, Value value) {
+        const int x = int(file) * 8;
+        const int y = (7 - int(rank)) * 3;
+        for (int i = 1; i < 8; ++i)
+            board[y][x + i] = board[y + 3][x + i] = '-';
+        for (int i = 1; i < 3; ++i)
+            board[y + i][x] = board[y + i][x + 8] = '|';
+        board[y][x] = board[y][x + 8] = board[y + 3][x + 8] = board[y + 3][x] = '+';
+        if (pc != NO_PIECE)
+            board[y + 1][x + 4] = PieceToChar[pc];
+        if (is_valid(value))
+            format_cp_compact(value, &board[y + 2][x + 2], pos);
+    };
+
+    // We estimate the value of each piece by doing a differential evaluation from
+    // the current base eval, simulating the removal of the piece from its square.
+    auto [psqt, positional] = networks.big.evaluate(pos, &caches.big);
+    Value base              = psqt + positional;
+    base                    = pos.side_to_move() == WHITE ? base : -base;
+
+    for (File f = FILE_A; f <= FILE_H; ++f)
+        for (Rank r = RANK_1; r <= RANK_8; ++r)
+        {
+            Square sq = make_square(f, r);
+            Piece  pc = pos.piece_on(sq);
+            Value  v  = VALUE_NONE;
+
+            if (pc != NO_PIECE && type_of(pc) != KING)
+            {
+                auto st = pos.state();
+
+                pos.remove_piece(sq);
+                st->accumulatorBig.computed[WHITE] = st->accumulatorBig.computed[BLACK] = false;
+
+                std::tie(psqt, positional) = networks.big.evaluate(pos, &caches.big);
+                Value eval                 = psqt + positional;
+                eval                       = pos.side_to_move() == WHITE ? eval : -eval;
+                v                          = base - eval;
+
+                pos.put_piece(pc, sq);
+                st->accumulatorBig.computed[WHITE] = st->accumulatorBig.computed[BLACK] = false;
+            }
+
+            writeSquare(f, r, pc, v);
+        }
+
+    ss << " NNUE derived piece values:\n";
+    for (int row = 0; row < 3 * 8 + 1; ++row)
+        ss << board[row] << '\n';
+    ss << '\n';
+
+    auto t = networks.big.trace_evaluate(pos, &caches.big);
+
+    ss << " NNUE network contributions "
+       << (pos.side_to_move() == WHITE ? "(White to move)" : "(Black to move)") << std::endl
+       << "+------------+------------+------------+------------+\n"
+       << "|   Bucket   |  Material  | Positional |   Total    |\n"
+       << "|            |   (PSQT)   |  (Layers)  |            |\n"
+       << "+------------+------------+------------+------------+\n";
+
+    for (std::size_t bucket = 0; bucket < LayerStacks; ++bucket)
+    {
+        ss << "|  " << bucket << "        "  //
+           << " |  ";
+        format_cp_aligned_dot(t.psqt[bucket], ss, pos);
+        ss << "  "  //
+           << " |  ";
+        format_cp_aligned_dot(t.positional[bucket], ss, pos);
+        ss << "  "  //
+           << " |  ";
+        format_cp_aligned_dot(t.psqt[bucket] + t.positional[bucket], ss, pos);
+        ss << "  "  //
+           << " |";
+        if (bucket == t.correctBucket)
+            ss << " <-- this bucket is used";
+        ss << '\n';
+    }
+
+    ss << "+------------+------------+------------+------------+\n";
+
+    return ss.str();
+}
+
+
+}  // namespace Stockfish::Eval::NNUE
@@ -0,0 +1,64 @@
+/*
+  Stockfish, a UCI chess playing engine derived from Glaurung 2.1
+  Copyright (C) 2004-2025 The Stockfish developers (see AUTHORS file)
+
+  Stockfish is free software: you can redistribute it and/or modify
+  it under the terms of the GNU General Public License as published by
+  the Free Software Foundation, either version 3 of the License, or
+  (at your option) any later version.
+
+  Stockfish is distributed in the hope that it will be useful,
+  but WITHOUT ANY WARRANTY; without even the implied warranty of
+  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+  GNU General Public License for more details.
+
+  You should have received a copy of the GNU General Public License
+  along with this program.  If not, see <http://www.gnu.org/licenses/>.
+*/
+
+#ifndef NNUE_MISC_H_INCLUDED
+#define NNUE_MISC_H_INCLUDED
+
+#include <cstddef>
+#include <string>
+
+#include "../types.h"
+#include "nnue_architecture.h"
+
+namespace Stockfish {
+
+class Position;
+
+namespace Eval::NNUE {
+
+struct EvalFile {
+    // Default net name, will use one of the EvalFileDefaultName* macros defined
+    // in evaluate.h
+    std::string defaultName;
+    // Selected net name, either via uci option or default
+    std::string current;
+    // Net description extracted from the net file
+    std::string netDescription;
+};
+
+
+struct NnueEvalTrace {
+    static_assert(LayerStacks == PSQTBuckets);
+
+    Value       psqt[LayerStacks];
+    Value       positional[LayerStacks];
+    std::size_t correctBucket;
+};
+
+struct Networks;
+struct AccumulatorCaches;
+
+std::string trace(Position& pos, const Networks& networks, AccumulatorCaches& caches);
+void        hint_common_parent_position(const Position&    pos,
+                                        const Networks&    networks,
+                                        AccumulatorCaches& caches);
+
+}  // namespace Stockfish::Eval::NNUE
+}  // namespace Stockfish
+
+#endif  // #ifndef NNUE_MISC_H_INCLUDED
@@ -1,300 +0,0 @@
-/*
-  Stockfish, a UCI chess playing engine derived from Glaurung 2.1
-  Copyright (C) 2004-2021 The Stockfish developers (see AUTHORS file)
-
-  Stockfish is free software: you can redistribute it and/or modify
-  it under the terms of the GNU General Public License as published by
-  the Free Software Foundation, either version 3 of the License, or
-  (at your option) any later version.
-
-  Stockfish is distributed in the hope that it will be useful,
-  but WITHOUT ANY WARRANTY; without even the implied warranty of
-  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-  GNU General Public License for more details.
-
-  You should have received a copy of the GNU General Public License
-  along with this program.  If not, see <http://www.gnu.org/licenses/>.
-*/
-
-#include <algorithm>
-#include <cassert>
-
-#include "bitboard.h"
-#include "pawns.h"
-#include "position.h"
-#include "thread.h"
-
-namespace {
-
-  #define V Value
-  #define S(mg, eg) make_score(mg, eg)
-
-  // Pawn penalties
-  constexpr Score Backward      = S( 9, 22);
-  constexpr Score Doubled       = S(13, 51);
-  constexpr Score DoubledEarly  = S(20,  7);
-  constexpr Score Isolated      = S( 3, 15);
-  constexpr Score WeakLever     = S( 4, 58);
-  constexpr Score WeakUnopposed = S(13, 24);
-
-  // Bonus for blocked pawns at 5th or 6th rank
-  constexpr Score BlockedPawn[2] = { S(-17, -6), S(-9, 2) };
-
-  constexpr Score BlockedStorm[RANK_NB] = {
-    S(0, 0), S(0, 0), S(75, 78), S(-8, 16), S(-6, 10), S(-6, 6), S(0, 2)
-  };
-
-  // Connected pawn bonus
-  constexpr int Connected[RANK_NB] = { 0, 5, 7, 11, 23, 48, 87 };
-
-  // Strength of pawn shelter for our king by [distance from edge][rank].
-  // RANK_1 = 0 is used for files where we have no pawn, or pawn is behind our king.
-  constexpr Value ShelterStrength[int(FILE_NB) / 2][RANK_NB] = {
-    { V( -5), V( 82), V( 92), V( 54), V( 36), V( 22), V(  28) },
-    { V(-44), V( 63), V( 33), V(-50), V(-30), V(-12), V( -62) },
-    { V(-11), V( 77), V( 22), V( -6), V( 31), V(  8), V( -45) },
-    { V(-39), V(-12), V(-29), V(-50), V(-43), V(-68), V(-164) }
-  };
-
-  // Danger of enemy pawns moving toward our king by [distance from edge][rank].
-  // RANK_1 = 0 is used for files where the enemy has no pawn, or their pawn
-  // is behind our king. Note that UnblockedStorm[0][1-2] accommodate opponent pawn
-  // on edge, likely blocked by our king.
-  constexpr Value UnblockedStorm[int(FILE_NB) / 2][RANK_NB] = {
-    { V( 87), V(-288), V(-168), V( 96), V( 47), V( 44), V( 46) },
-    { V( 42), V( -25), V( 120), V( 45), V( 34), V( -9), V( 24) },
-    { V( -8), V(  51), V( 167), V( 35), V( -4), V(-16), V(-12) },
-    { V(-17), V( -13), V( 100), V(  4), V(  9), V(-16), V(-31) }
-  };
-
-
-  // KingOnFile[semi-open Us][semi-open Them] contains bonuses/penalties
-  // for king when the king is on a semi-open or open file.
-  constexpr Score KingOnFile[2][2] = {{ S(-21,10), S(-7, 1)  },
-                                     {  S(  0,-3), S( 9,-4) }};
-
-  #undef S
-  #undef V
-
-
-  /// evaluate() calculates a score for the static pawn structure of the given position.
-  /// We cannot use the location of pieces or king in this function, as the evaluation
-  /// of the pawn structure will be stored in a small cache for speed reasons, and will
-  /// be re-used even when the pieces have moved.
-
-  template<Color Us>
-  Score evaluate(const Position& pos, Pawns::Entry* e) {
-
-    constexpr Color     Them = ~Us;
-    constexpr Direction Up   = pawn_push(Us);
-    constexpr Direction Down = -Up;
-
-    Bitboard neighbours, stoppers, support, phalanx, opposed;
-    Bitboard lever, leverPush, blocked;
-    Square s;
-    bool backward, passed, doubled;
-    Score score = SCORE_ZERO;
-    Bitboard b = pos.pieces(Us, PAWN);
-
-    Bitboard ourPawns   = pos.pieces(  Us, PAWN);
-    Bitboard theirPawns = pos.pieces(Them, PAWN);
-
-    Bitboard doubleAttackThem = pawn_double_attacks_bb<Them>(theirPawns);
-
-    e->passedPawns[Us] = 0;
-    e->kingSquares[Us] = SQ_NONE;
-    e->pawnAttacks[Us] = e->pawnAttacksSpan[Us] = pawn_attacks_bb<Us>(ourPawns);
-    e->blockedCount += popcount(shift<Up>(ourPawns) & (theirPawns | doubleAttackThem));
-
-    // Loop through all pawns of the current color and score each pawn
-    while (b) {
-        s = pop_lsb(&b);
-
-        assert(pos.piece_on(s) == make_piece(Us, PAWN));
-
-        Rank r = relative_rank(Us, s);
-
-        // Flag the pawn
-        opposed    = theirPawns & forward_file_bb(Us, s);
-        blocked    = theirPawns & (s + Up);
-        stoppers   = theirPawns & passed_pawn_span(Us, s);
-        lever      = theirPawns & pawn_attacks_bb(Us, s);
-        leverPush  = theirPawns & pawn_attacks_bb(Us, s + Up);
-        doubled    = ourPawns   & (s - Up);
-        neighbours = ourPawns   & adjacent_files_bb(s);
-        phalanx    = neighbours & rank_bb(s);
-        support    = neighbours & rank_bb(s - Up);
-
-        if (doubled)
-        {
-            // Additional doubled penalty if none of their pawns is fixed
-            if (!(ourPawns & shift<Down>(theirPawns | pawn_attacks_bb<Them>(theirPawns))))
-                score -= DoubledEarly;
-        }
-
-        // A pawn is backward when it is behind all pawns of the same color on
-        // the adjacent files and cannot safely advance.
-        backward =  !(neighbours & forward_ranks_bb(Them, s + Up))
-                  && (leverPush | blocked);
-
-        // Compute additional span if pawn is not backward nor blocked
-        if (!backward && !blocked)
-            e->pawnAttacksSpan[Us] |= pawn_attack_span(Us, s);
-
-        // A pawn is passed if one of the three following conditions is true:
-        // (a) there is no stoppers except some levers
-        // (b) the only stoppers are the leverPush, but we outnumber them
-        // (c) there is only one front stopper which can be levered.
-        //     (Refined in Evaluation::passed)
-        passed =   !(stoppers ^ lever)
-                || (   !(stoppers ^ leverPush)
-                    && popcount(phalanx) >= popcount(leverPush))
-                || (   stoppers == blocked && r >= RANK_5
-                    && (shift<Up>(support) & ~(theirPawns | doubleAttackThem)));
-
-        passed &= !(forward_file_bb(Us, s) & ourPawns);
-
-        // Passed pawns will be properly scored later in evaluation when we have
-        // full attack info.
-        if (passed)
-            e->passedPawns[Us] |= s;
-
-        // Score this pawn
-        if (support | phalanx)
-        {
-            int v =  Connected[r] * (2 + bool(phalanx) - bool(opposed))
-                   + 22 * popcount(support);
-
-            score += make_score(v, v * (r - 2) / 4);
-        }
-
-        else if (!neighbours)
-        {
-            if (     opposed
-                &&  (ourPawns & forward_file_bb(Them, s))
-                && !(theirPawns & adjacent_files_bb(s)))
-                score -= Doubled;
-            else
-                score -=  Isolated
-                        + WeakUnopposed * !opposed;
-        }
-
-        else if (backward)
-            score -=  Backward
-                    + WeakUnopposed * !opposed * bool(~(FileABB | FileHBB) & s);
-
-        if (!support)
-            score -=  Doubled * doubled
-                    + WeakLever * more_than_one(lever);
-
-        if (blocked && r >= RANK_5)
-            score += BlockedPawn[r - RANK_5];
-    }
-
-    return score;
-  }
-
-} // namespace
-
-namespace Pawns {
-
-
-/// Pawns::probe() looks up the current position's pawns configuration in
-/// the pawns hash table. It returns a pointer to the Entry if the position
-/// is found. Otherwise a new Entry is computed and stored there, so we don't
-/// have to recompute all when the same pawns configuration occurs again.
-
-Entry* probe(const Position& pos) {
-
-  Key key = pos.pawn_key();
-  Entry* e = pos.this_thread()->pawnsTable[key];
-
-  if (e->key == key)
-      return e;
-
-  e->key = key;
-  e->blockedCount = 0;
-  e->scores[WHITE] = evaluate<WHITE>(pos, e);
-  e->scores[BLACK] = evaluate<BLACK>(pos, e);
-
-  return e;
-}
-
-
-/// Entry::evaluate_shelter() calculates the shelter bonus and the storm
-/// penalty for a king, looking at the king file and the two closest files.
-
-template<Color Us>
-Score Entry::evaluate_shelter(const Position& pos, Square ksq) const {
-
-  constexpr Color Them = ~Us;
-
-  Bitboard b = pos.pieces(PAWN) & ~forward_ranks_bb(Them, ksq);
-  Bitboard ourPawns = b & pos.pieces(Us) & ~pawnAttacks[Them];
-  Bitboard theirPawns = b & pos.pieces(Them);
-
-  Score bonus = make_score(5, 5);
-
-  File center = std::clamp(file_of(ksq), FILE_B, FILE_G);
-  for (File f = File(center - 1); f <= File(center + 1); ++f)
-  {
-      b = ourPawns & file_bb(f);
-      int ourRank = b ? relative_rank(Us, frontmost_sq(Them, b)) : 0;
-
-      b = theirPawns & file_bb(f);
-      int theirRank = b ? relative_rank(Us, frontmost_sq(Them, b)) : 0;
-
-      int d = edge_distance(f);
-      bonus += make_score(ShelterStrength[d][ourRank], 0);
-
-      if (ourRank && (ourRank == theirRank - 1))
-          bonus -= BlockedStorm[theirRank];
-      else
-          bonus -= make_score(UnblockedStorm[d][theirRank], 0);
-  }
-
-  // King On File
-  bonus -= KingOnFile[pos.is_on_semiopen_file(Us, ksq)][pos.is_on_semiopen_file(Them, ksq)];
-
-  return bonus;
-}
-
-
-/// Entry::do_king_safety() calculates a bonus for king safety. It is called only
-/// when king square changes, which is about 20% of total king_safety() calls.
-
-template<Color Us>
-Score Entry::do_king_safety(const Position& pos) {
-
-  Square ksq = pos.square<KING>(Us);
-  kingSquares[Us] = ksq;
-  castlingRights[Us] = pos.castling_rights(Us);
-  auto compare = [](Score a, Score b) { return mg_value(a) < mg_value(b); };
-
-  Score shelter = evaluate_shelter<Us>(pos, ksq);
-
-  // If we can castle use the bonus after castling if it is bigger
-
-  if (pos.can_castle(Us & KING_SIDE))
-      shelter = std::max(shelter, evaluate_shelter<Us>(pos, relative_square(Us, SQ_G1)), compare);
-
-  if (pos.can_castle(Us & QUEEN_SIDE))
-      shelter = std::max(shelter, evaluate_shelter<Us>(pos, relative_square(Us, SQ_C1)), compare);
-
-  // In endgame we like to bring our king near our closest pawn
-  Bitboard pawns = pos.pieces(Us, PAWN);
-  int minPawnDist = 6;
-
-  if (pawns & attacks_bb<KING>(ksq))
-      minPawnDist = 1;
-  else while (pawns)
-      minPawnDist = std::min(minPawnDist, distance(ksq, pop_lsb(&pawns)));
-
-  return shelter - make_score(0, 16 * minPawnDist);
-}
-
-// Explicit template instantiation
-template Score Entry::do_king_safety<WHITE>(const Position& pos);
-template Score Entry::do_king_safety<BLACK>(const Position& pos);
-
-} // namespace Pawns
@@ -1,70 +0,0 @@
-/*
-  Stockfish, a UCI chess playing engine derived from Glaurung 2.1
-  Copyright (C) 2004-2021 The Stockfish developers (see AUTHORS file)
-
-  Stockfish is free software: you can redistribute it and/or modify
-  it under the terms of the GNU General Public License as published by
-  the Free Software Foundation, either version 3 of the License, or
-  (at your option) any later version.
-
-  Stockfish is distributed in the hope that it will be useful,
-  but WITHOUT ANY WARRANTY; without even the implied warranty of
-  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-  GNU General Public License for more details.
-
-  You should have received a copy of the GNU General Public License
-  along with this program.  If not, see <http://www.gnu.org/licenses/>.
-*/
-
-#ifndef PAWNS_H_INCLUDED
-#define PAWNS_H_INCLUDED
-
-#include "misc.h"
-#include "position.h"
-#include "types.h"
-
-namespace Pawns {
-
-/// Pawns::Entry contains various information about a pawn structure. A lookup
-/// to the pawn hash table (performed by calling the probe function) returns a
-/// pointer to an Entry object.
-
-struct Entry {
-
-  Score pawn_score(Color c) const { return scores[c]; }
-  Bitboard pawn_attacks(Color c) const { return pawnAttacks[c]; }
-  Bitboard passed_pawns(Color c) const { return passedPawns[c]; }
-  Bitboard pawn_attacks_span(Color c) const { return pawnAttacksSpan[c]; }
-  int passed_count() const { return popcount(passedPawns[WHITE] | passedPawns[BLACK]); }
-  int blocked_count() const { return blockedCount; }
-
-  template<Color Us>
-  Score king_safety(const Position& pos) {
-    return  kingSquares[Us] == pos.square<KING>(Us) && castlingRights[Us] == pos.castling_rights(Us)
-          ? kingSafety[Us] : (kingSafety[Us] = do_king_safety<Us>(pos));
-  }
-
-  template<Color Us>
-  Score do_king_safety(const Position& pos);
-
-  template<Color Us>
-  Score evaluate_shelter(const Position& pos, Square ksq) const;
-
-  Key key;
-  Score scores[COLOR_NB];
-  Bitboard passedPawns[COLOR_NB];
-  Bitboard pawnAttacks[COLOR_NB];
-  Bitboard pawnAttacksSpan[COLOR_NB];
-  Square kingSquares[COLOR_NB];
-  Score kingSafety[COLOR_NB];
-  int castlingRights[COLOR_NB];
-  int blockedCount;
-};
-
-typedef HashTable<Entry, 131072> Table;
-
-Entry* probe(const Position& pos);
-
-} // namespace Pawns
-
-#endif // #ifndef PAWNS_H_INCLUDED
@@ -0,0 +1,68 @@
+/*
+  Stockfish, a UCI chess playing engine derived from Glaurung 2.1
+  Copyright (C) 2004-2025 The Stockfish developers (see AUTHORS file)
+
+  Stockfish is free software: you can redistribute it and/or modify
+  it under the terms of the GNU General Public License as published by
+  the Free Software Foundation, either version 3 of the License, or
+  (at your option) any later version.
+
+  Stockfish is distributed in the hope that it will be useful,
+  but WITHOUT ANY WARRANTY; without even the implied warranty of
+  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+  GNU General Public License for more details.
+
+  You should have received a copy of the GNU General Public License
+  along with this program.  If not, see <http://www.gnu.org/licenses/>.
+*/
+
+#ifndef PERFT_H_INCLUDED
+#define PERFT_H_INCLUDED
+
+#include <cstdint>
+
+#include "movegen.h"
+#include "position.h"
+#include "types.h"
+#include "uci.h"
+
+namespace Stockfish::Benchmark {
+
+// Utility to verify move generation. All the leaf nodes up
+// to the given depth are generated and counted, and the sum is returned.
+template<bool Root>
+uint64_t perft(Position& pos, Depth depth) {
+
+    StateInfo st;
+    ASSERT_ALIGNED(&st, Eval::NNUE::CacheLineSize);
+
+    uint64_t   cnt, nodes = 0;
+    const bool leaf = (depth == 2);
+
+    for (const auto& m : MoveList<LEGAL>(pos))
+    {
+        if (Root && depth <= 1)
+            cnt = 1, nodes++;
+        else
+        {
+            pos.do_move(m, st);
+            cnt = leaf ? MoveList<LEGAL>(pos).size() : perft<false>(pos, depth - 1);
+            nodes += cnt;
+            pos.undo_move(m);
+        }
+        if (Root)
+            sync_cout << UCIEngine::move(m, pos.is_chess960()) << ": " << cnt << sync_endl;
+    }
+    return nodes;
+}
+
+inline uint64_t perft(const std::string& fen, Depth depth, bool isChess960) {
+    StateListPtr states(new std::deque<StateInfo>(1));
+    Position     p;
+    p.set(fen, isChess960, &states->back());
+
+    return perft<true>(p, depth);
+}
+}
+
+#endif  // PERFT_H_INCLUDED
@@ -1,6 +1,6 @@
 /*
  Stockfish, a UCI chess playing engine derived from Glaurung 2.1
-  Copyright (C) 2004-2021 The Stockfish developers (see AUTHORS file)
+  Copyright (C) 2004-2025 The Stockfish developers (see AUTHORS file)

  Stockfish is free software: you can redistribute it and/or modify
  it under the terms of the GNU General Public License as published by
@@ -21,406 +21,356 @@

 #include <cassert>
 #include <deque>
-#include <memory> // For std::unique_ptr
+#include <iosfwd>
+#include <memory>
 #include <string>

 #include "bitboard.h"
-#include "evaluate.h"
-#include "psqt.h"
+#include "nnue/nnue_accumulator.h"
+#include "nnue/nnue_architecture.h"
 #include "types.h"

-#include "nnue/nnue_accumulator.h"
+namespace Stockfish {

+class TranspositionTable;

-/// StateInfo struct stores information needed to restore a Position object to
-/// its previous state when we retract a move. Whenever a move is made on the
-/// board (by calling Position::do_move), a StateInfo object must be passed.
+// StateInfo struct stores information needed to restore a Position object to
+// its previous state when we retract a move. Whenever a move is made on the
+// board (by calling Position::do_move), a StateInfo object must be passed.

 struct StateInfo {

-  // Copied when making a move
-  Key    pawnKey;
-  Key    materialKey;
-  Value  nonPawnMaterial[COLOR_NB];
-  int    castlingRights;
-  int    rule50;
-  int    pliesFromNull;
-  Square epSquare;
+    // Copied when making a move
+    Key    materialKey;
+    Key    pawnKey;
+    Key    minorPieceKey;
+    Key    nonPawnKey[COLOR_NB];
+    Value  nonPawnMaterial[COLOR_NB];
+    int    castlingRights;
+    int    rule50;
+    int    pliesFromNull;
+    Square epSquare;

-  // Not copied when making a move (will be recomputed anyhow)
-  Key        key;
-  Bitboard   checkersBB;
-  Piece      capturedPiece;
-  StateInfo* previous;
-  Bitboard   blockersForKing[COLOR_NB];
-  Bitboard   pinners[COLOR_NB];
-  Bitboard   checkSquares[PIECE_TYPE_NB];
-  int        repetition;
+    // Not copied when making a move (will be recomputed anyhow)
+    Key        key;
+    Bitboard   checkersBB;
+    StateInfo* previous;
+    StateInfo* next;
+    Bitboard   blockersForKing[COLOR_NB];
+    Bitboard   pinners[COLOR_NB];
+    Bitboard   checkSquares[PIECE_TYPE_NB];
+    Piece      capturedPiece;
+    int        repetition;

-  // Used by NNUE
-  Eval::NNUE::Accumulator accumulator;
-  DirtyPiece dirtyPiece;
+    // Used by NNUE
+    Eval::NNUE::Accumulator<Eval::NNUE::TransformedFeatureDimensionsBig>   accumulatorBig;
+    Eval::NNUE::Accumulator<Eval::NNUE::TransformedFeatureDimensionsSmall> accumulatorSmall;
+    DirtyPiece                                                             dirtyPiece;
 };


-/// A list to keep track of the position states along the setup moves (from the
-/// start position to the position just before the search starts). Needed by
-/// 'draw by repetition' detection. Use a std::deque because pointers to
-/// elements are not invalidated upon list resizing.
-typedef std::unique_ptr<std::deque<StateInfo>> StateListPtr;
+// A list to keep track of the position states along the setup moves (from the
+// start position to the position just before the search starts). Needed by
+// 'draw by repetition' detection. Use a std::deque because pointers to
+// elements are not invalidated upon list resizing.
+using StateListPtr = std::unique_ptr<std::deque<StateInfo>>;


-/// Position class stores information regarding the board representation as
-/// pieces, side to move, hash keys, castling info, etc. Important methods are
-/// do_move() and undo_move(), used by the search to update node info when
-/// traversing the search tree.
-class Thread;
-
+// Position class stores information regarding the board representation as
+// pieces, side to move, hash keys, castling info, etc. Important methods are
+// do_move() and undo_move(), used by the search to update node info when
+// traversing the search tree.
 class Position {
-public:
-  static void init();
+   public:
+    static void init();

-  Position() = default;
-  Position(const Position&) = delete;
-  Position& operator=(const Position&) = delete;
+    Position()                           = default;
+    Position(const Position&)            = delete;
+    Position& operator=(const Position&) = delete;

-  // FEN string input/output
-  Position& set(const std::string& fenStr, bool isChess960, StateInfo* si, Thread* th);
-  Position& set(const std::string& code, Color c, StateInfo* si);
-  const std::string fen() const;
+    // FEN string input/output
+    Position&   set(const std::string& fenStr, bool isChess960, StateInfo* si);
+    Position&   set(const std::string& code, Color c, StateInfo* si);
+    std::string fen() const;

-  // Position representation
-  Bitboard pieces(PieceType pt) const;
-  Bitboard pieces(PieceType pt1, PieceType pt2) const;
-  Bitboard pieces(Color c) const;
-  Bitboard pieces(Color c, PieceType pt) const;
-  Bitboard pieces(Color c, PieceType pt1, PieceType pt2) const;
-  Piece piece_on(Square s) const;
-  Square ep_square() const;
-  bool empty(Square s) const;
-  template<PieceType Pt> int count(Color c) const;
-  template<PieceType Pt> int count() const;
-  template<PieceType Pt> Square square(Color c) const;
-  bool is_on_semiopen_file(Color c, Square s) const;
+    // Position representation
+    Bitboard pieces(PieceType pt = ALL_PIECES) const;
+    template<typename... PieceTypes>
+    Bitboard pieces(PieceType pt, PieceTypes... pts) const;
+    Bitboard pieces(Color c) const;
+    template<typename... PieceTypes>
+    Bitboard pieces(Color c, PieceTypes... pts) const;
+    Piece    piece_on(Square s) const;
+    Square   ep_square() const;
+    bool     empty(Square s) const;
+    template<PieceType Pt>
+    int count(Color c) const;
+    template<PieceType Pt>
+    int count() const;
+    template<PieceType Pt>
+    Square square(Color c) const;

-  // Castling
-  CastlingRights castling_rights(Color c) const;
-  bool can_castle(CastlingRights cr) const;
-  bool castling_impeded(CastlingRights cr) const;
-  Square castling_rook_square(CastlingRights cr) const;
+    // Castling
+    CastlingRights castling_rights(Color c) const;
+    bool           can_castle(CastlingRights cr) const;
+    bool           castling_impeded(CastlingRights cr) const;
+    Square         castling_rook_square(CastlingRights cr) const;

-  // Checking
-  Bitboard checkers() const;
-  Bitboard blockers_for_king(Color c) const;
-  Bitboard check_squares(PieceType pt) const;
-  Bitboard pinners(Color c) const;
-  bool is_discovered_check_on_king(Color c, Move m) const;
+    // Checking
+    Bitboard checkers() const;
+    Bitboard blockers_for_king(Color c) const;
+    Bitboard check_squares(PieceType pt) const;
+    Bitboard pinners(Color c) const;

-  // Attacks to/from a given square
-  Bitboard attackers_to(Square s) const;
-  Bitboard attackers_to(Square s, Bitboard occupied) const;
-  Bitboard slider_blockers(Bitboard sliders, Square s, Bitboard& pinners) const;
+    // Attacks to/from a given square
+    Bitboard attackers_to(Square s) const;
+    Bitboard attackers_to(Square s, Bitboard occupied) const;
+    bool     attackers_to_exist(Square s, Bitboard occupied, Color c) const;
+    void     update_slider_blockers(Color c) const;
+    template<PieceType Pt>
+    Bitboard attacks_by(Color c) const;

-  // Properties of moves
-  bool legal(Move m) const;
-  bool pseudo_legal(const Move m) const;
-  bool capture(Move m) const;
-  bool capture_or_promotion(Move m) const;
-  bool gives_check(Move m) const;
-  bool advanced_pawn_push(Move m) const;
-  Piece moved_piece(Move m) const;
-  Piece captured_piece() const;
+    // Properties of moves
+    bool  legal(Move m) const;
+    bool  pseudo_legal(const Move m) const;
+    bool  capture(Move m) const;
+    bool  capture_stage(Move m) const;
+    bool  gives_check(Move m) const;
+    Piece moved_piece(Move m) const;
+    Piece captured_piece() const;

-  // Piece specific
-  bool pawn_passed(Color c, Square s) const;
-  bool opposite_bishops() const;
-  int  pawns_on_same_color_squares(Color c, Square s) const;
+    // Doing and undoing moves
+    void do_move(Move m, StateInfo& newSt, const TranspositionTable* tt);
+    void do_move(Move m, StateInfo& newSt, bool givesCheck, const TranspositionTable* tt);
+    void undo_move(Move m);
+    void do_null_move(StateInfo& newSt, const TranspositionTable& tt);
+    void undo_null_move();

-  // Doing and undoing moves
-  void do_move(Move m, StateInfo& newSt);
-  void do_move(Move m, StateInfo& newSt, bool givesCheck);
-  void undo_move(Move m);
-  void do_null_move(StateInfo& newSt);
-  void undo_null_move();
+    // Static Exchange Evaluation
+    bool see_ge(Move m, int threshold = 0) const;

-  // Static Exchange Evaluation
-  bool see_ge(Move m, Value threshold = VALUE_ZERO) const;
+    // Accessing hash keys
+    Key key() const;
+    Key material_key() const;
+    Key pawn_key() const;
+    Key minor_piece_key() const;
+    Key non_pawn_key(Color c) const;

-  // Accessing hash keys
-  Key key() const;
-  Key key_after(Move m) const;
-  Key material_key() const;
-  Key pawn_key() const;
+    // Other properties of the position
+    Color side_to_move() const;
+    int   game_ply() const;
+    bool  is_chess960() const;
+    bool  is_draw(int ply) const;
+    bool  is_repetition(int ply) const;
+    bool  upcoming_repetition(int ply) const;
+    bool  has_repeated() const;
+    int   rule50_count() const;
+    Value non_pawn_material(Color c) const;
+    Value non_pawn_material() const;

-  // Other properties of the position
-  Color side_to_move() const;
-  int game_ply() const;
-  bool is_chess960() const;
-  Thread* this_thread() const;
-  bool is_draw(int ply) const;
-  bool has_game_cycle(int ply) const;
-  bool has_repeated() const;
-  int rule50_count() const;
-  Score psq_score() const;
-  Value non_pawn_material(Color c) const;
-  Value non_pawn_material() const;
+    // Position consistency check, for debugging
+    bool pos_is_ok() const;
+    void flip();

-  // Position consistency check, for debugging
-  bool pos_is_ok() const;
-  void flip();
+    // Used by NNUE
+    StateInfo* state() const;

-  // Used by NNUE
-  StateInfo* state() const;
+    void put_piece(Piece pc, Square s);
+    void remove_piece(Square s);

-private:
-  // Initialization helpers (used while setting up a position)
-  void set_castling_right(Color c, Square rfrom);
-  void set_state(StateInfo* si) const;
-  void set_check_info(StateInfo* si) const;
+   private:
+    // Initialization helpers (used while setting up a position)
+    void set_castling_right(Color c, Square rfrom);
+    void set_state() const;
+    void set_check_info() const;

-  // Other helpers
-  void put_piece(Piece pc, Square s);
-  void remove_piece(Square s);
-  void move_piece(Square from, Square to);
-  template<bool Do>
-  void do_castling(Color us, Square from, Square& to, Square& rfrom, Square& rto);
+    // Other helpers
+    void move_piece(Square from, Square to);
+    template<bool Do>
+    void do_castling(Color us, Square from, Square& to, Square& rfrom, Square& rto);
+    template<bool AfterMove>
+    Key adjust_key50(Key k) const;

-  // Data members
-  Piece board[SQUARE_NB];
-  Bitboard byTypeBB[PIECE_TYPE_NB];
-  Bitboard byColorBB[COLOR_NB];
-  int pieceCount[PIECE_NB];
-  int castlingRightsMask[SQUARE_NB];
-  Square castlingRookSquare[CASTLING_RIGHT_NB];
-  Bitboard castlingPath[CASTLING_RIGHT_NB];
-  int gamePly;
-  Color sideToMove;
-  Score psq;
-  Thread* thisThread;
-  StateInfo* st;
-  bool chess960;
+    // Data members
+    Piece      board[SQUARE_NB];
+    Bitboard   byTypeBB[PIECE_TYPE_NB];
+    Bitboard   byColorBB[COLOR_NB];
+    int        pieceCount[PIECE_NB];
+    int        castlingRightsMask[SQUARE_NB];
+    Square     castlingRookSquare[CASTLING_RIGHT_NB];
+    Bitboard   castlingPath[CASTLING_RIGHT_NB];
+    StateInfo* st;
+    int        gamePly;
+    Color      sideToMove;
+    bool       chess960;
 };

-extern std::ostream& operator<<(std::ostream& os, const Position& pos);
+std::ostream& operator<<(std::ostream& os, const Position& pos);

-inline Color Position::side_to_move() const {
-  return sideToMove;
-}
+inline Color Position::side_to_move() const { return sideToMove; }

 inline Piece Position::piece_on(Square s) const {
-  assert(is_ok(s));
-  return board[s];
+    assert(is_ok(s));
+    return board[s];
 }

-inline bool Position::empty(Square s) const {
-  return piece_on(s) == NO_PIECE;
+inline bool Position::empty(Square s) const { return piece_on(s) == NO_PIECE; }
+
+inline Piece Position::moved_piece(Move m) const { return piece_on(m.from_sq()); }
+
+inline Bitboard Position::pieces(PieceType pt) const { return byTypeBB[pt]; }
+
+template<typename... PieceTypes>
+inline Bitboard Position::pieces(PieceType pt, PieceTypes... pts) const {
+    return pieces(pt) | pieces(pts...);
 }

-inline Piece Position::moved_piece(Move m) const {
-  return piece_on(from_sq(m));
+inline Bitboard Position::pieces(Color c) const { return byColorBB[c]; }
+
+template<typename... PieceTypes>
+inline Bitboard Position::pieces(Color c, PieceTypes... pts) const {
+    return pieces(c) & pieces(pts...);
 }

-inline Bitboard Position::pieces(PieceType pt = ALL_PIECES) const {
-  return byTypeBB[pt];
+template<PieceType Pt>
+inline int Position::count(Color c) const {
+    return pieceCount[make_piece(c, Pt)];
 }

-inline Bitboard Position::pieces(PieceType pt1, PieceType pt2) const {
-  return pieces(pt1) | pieces(pt2);
+template<PieceType Pt>
+inline int Position::count() const {
+    return count<Pt>(WHITE) + count<Pt>(BLACK);
 }

-inline Bitboard Position::pieces(Color c) const {
-  return byColorBB[c];
+template<PieceType Pt>
+inline Square Position::square(Color c) const {
+    assert(count<Pt>(c) == 1);
+    return lsb(pieces(c, Pt));
 }

-inline Bitboard Position::pieces(Color c, PieceType pt) const {
-  return pieces(c) & pieces(pt);
-}
+inline Square Position::ep_square() const { return st->epSquare; }

-inline Bitboard Position::pieces(Color c, PieceType pt1, PieceType pt2) const {
-  return pieces(c) & (pieces(pt1) | pieces(pt2));
-}
-
-template<PieceType Pt> inline int Position::count(Color c) const {
-  return pieceCount[make_piece(c, Pt)];
-}
-
-template<PieceType Pt> inline int Position::count() const {
-  return count<Pt>(WHITE) + count<Pt>(BLACK);
-}
-
-template<PieceType Pt> inline Square Position::square(Color c) const {
-  assert(count<Pt>(c) == 1);
-  return lsb(pieces(c, Pt));
-}
-
-inline Square Position::ep_square() const {
-  return st->epSquare;
-}
-
-inline bool Position::is_on_semiopen_file(Color c, Square s) const {
-  return !(pieces(c, PAWN) & file_bb(s));
-}
-
-inline bool Position::can_castle(CastlingRights cr) const {
-  return st->castlingRights & cr;
-}
+inline bool Position::can_castle(CastlingRights cr) const { return st->castlingRights & cr; }

 inline CastlingRights Position::castling_rights(Color c) const {
-  return c & CastlingRights(st->castlingRights);
+    return c & CastlingRights(st->castlingRights);
 }

 inline bool Position::castling_impeded(CastlingRights cr) const {
-  assert(cr == WHITE_OO || cr == WHITE_OOO || cr == BLACK_OO || cr == BLACK_OOO);
-
-  return pieces() & castlingPath[cr];
+    assert(cr == WHITE_OO || cr == WHITE_OOO || cr == BLACK_OO || cr == BLACK_OOO);
+    return pieces() & castlingPath[cr];
 }

 inline Square Position::castling_rook_square(CastlingRights cr) const {
-  assert(cr == WHITE_OO || cr == WHITE_OOO || cr == BLACK_OO || cr == BLACK_OOO);
-
-  return castlingRookSquare[cr];
+    assert(cr == WHITE_OO || cr == WHITE_OOO || cr == BLACK_OO || cr == BLACK_OOO);
+    return castlingRookSquare[cr];
 }

-inline Bitboard Position::attackers_to(Square s) const {
-  return attackers_to(s, pieces());
+inline Bitboard Position::attackers_to(Square s) const { return attackers_to(s, pieces()); }
+
+template<PieceType Pt>
+inline Bitboard Position::attacks_by(Color c) const {
+
+    if constexpr (Pt == PAWN)
+        return c == WHITE ? pawn_attacks_bb<WHITE>(pieces(WHITE, PAWN))
+                          : pawn_attacks_bb<BLACK>(pieces(BLACK, PAWN));
+    else
+    {
+        Bitboard threats   = 0;
+        Bitboard attackers = pieces(c, Pt);
+        while (attackers)
+            threats |= attacks_bb<Pt>(pop_lsb(attackers), pieces());
+        return threats;
+    }
 }

-inline Bitboard Position::checkers() const {
-  return st->checkersBB;
+inline Bitboard Position::checkers() const { return st->checkersBB; }
+
+inline Bitboard Position::blockers_for_king(Color c) const { return st->blockersForKing[c]; }
+
+inline Bitboard Position::pinners(Color c) const { return st->pinners[c]; }
+
+inline Bitboard Position::check_squares(PieceType pt) const { return st->checkSquares[pt]; }
+
+inline Key Position::key() const { return adjust_key50<false>(st->key); }
+
+template<bool AfterMove>
+inline Key Position::adjust_key50(Key k) const {
+    return st->rule50 < 14 - AfterMove ? k : k ^ make_key((st->rule50 - (14 - AfterMove)) / 8);
 }

-inline Bitboard Position::blockers_for_king(Color c) const {
-  return st->blockersForKing[c];
-}
+inline Key Position::pawn_key() const { return st->pawnKey; }

-inline Bitboard Position::pinners(Color c) const {
-  return st->pinners[c];
-}
+inline Key Position::material_key() const { return st->materialKey; }

-inline Bitboard Position::check_squares(PieceType pt) const {
-  return st->checkSquares[pt];
-}
+inline Key Position::minor_piece_key() const { return st->minorPieceKey; }

-inline bool Position::is_discovered_check_on_king(Color c, Move m) const {
-  return st->blockersForKing[c] & from_sq(m);
-}
+inline Key Position::non_pawn_key(Color c) const { return st->nonPawnKey[c]; }

-inline bool Position::pawn_passed(Color c, Square s) const {
-  return !(pieces(~c, PAWN) & passed_pawn_span(c, s));
-}
-
-inline bool Position::advanced_pawn_push(Move m) const {
-  return   type_of(moved_piece(m)) == PAWN
-        && relative_rank(sideToMove, to_sq(m)) > RANK_5;
-}
-
-inline int Position::pawns_on_same_color_squares(Color c, Square s) const {
-  return popcount(pieces(c, PAWN) & ((DarkSquares & s) ? DarkSquares : ~DarkSquares));
-}
-
-inline Key Position::key() const {
-  return st->rule50 < 14 ? st->key
-                         : st->key ^ make_key((st->rule50 - 14) / 8);
-}
-
-inline Key Position::pawn_key() const {
-  return st->pawnKey;
-}
-
-inline Key Position::material_key() const {
-  return st->materialKey;
-}
-
-inline Score Position::psq_score() const {
-  return psq;
-}
-
-inline Value Position::non_pawn_material(Color c) const {
-  return st->nonPawnMaterial[c];
-}
+inline Value Position::non_pawn_material(Color c) const { return st->nonPawnMaterial[c]; }

 inline Value Position::non_pawn_material() const {
-  return non_pawn_material(WHITE) + non_pawn_material(BLACK);
+    return non_pawn_material(WHITE) + non_pawn_material(BLACK);
 }

-inline int Position::game_ply() const {
-  return gamePly;
-}
+inline int Position::game_ply() const { return gamePly; }

-inline int Position::rule50_count() const {
-  return st->rule50;
-}
+inline int Position::rule50_count() const { return st->rule50; }

-inline bool Position::opposite_bishops() const {
-  return   count<BISHOP>(WHITE) == 1
-        && count<BISHOP>(BLACK) == 1
-        && opposite_colors(square<BISHOP>(WHITE), square<BISHOP>(BLACK));
-}
-
-inline bool Position::is_chess960() const {
-  return chess960;
-}
-
-inline bool Position::capture_or_promotion(Move m) const {
-  assert(is_ok(m));
-  return type_of(m) != NORMAL ? type_of(m) != CASTLING : !empty(to_sq(m));
-}
+inline bool Position::is_chess960() const { return chess960; }

 inline bool Position::capture(Move m) const {
-  assert(is_ok(m));
-  // Castling is encoded as "king captures rook"
-  return (!empty(to_sq(m)) && type_of(m) != CASTLING) || type_of(m) == EN_PASSANT;
+    assert(m.is_ok());
+    return (!empty(m.to_sq()) && m.type_of() != CASTLING) || m.type_of() == EN_PASSANT;
 }

-inline Piece Position::captured_piece() const {
-  return st->capturedPiece;
+// Returns true if a move is generated from the capture stage, having also
+// queen promotions covered, i.e. consistency with the capture stage move
+// generation is needed to avoid the generation of duplicate moves.
+inline bool Position::capture_stage(Move m) const {
+    assert(m.is_ok());
+    return capture(m) || m.promotion_type() == QUEEN;
 }

-inline Thread* Position::this_thread() const {
-  return thisThread;
-}
+inline Piece Position::captured_piece() const { return st->capturedPiece; }

 inline void Position::put_piece(Piece pc, Square s) {

-  board[s] = pc;
-  byTypeBB[ALL_PIECES] |= byTypeBB[type_of(pc)] |= s;
-  byColorBB[color_of(pc)] |= s;
-  pieceCount[pc]++;
-  pieceCount[make_piece(color_of(pc), ALL_PIECES)]++;
-  psq += PSQT::psq[pc][s];
+    board[s] = pc;
+    byTypeBB[ALL_PIECES] |= byTypeBB[type_of(pc)] |= s;
+    byColorBB[color_of(pc)] |= s;
+    pieceCount[pc]++;
+    pieceCount[make_piece(color_of(pc), ALL_PIECES)]++;
 }

 inline void Position::remove_piece(Square s) {

-  Piece pc = board[s];
-  byTypeBB[ALL_PIECES] ^= s;
-  byTypeBB[type_of(pc)] ^= s;
-  byColorBB[color_of(pc)] ^= s;
-  /* board[s] = NO_PIECE;  Not needed, overwritten by the capturing one */
-  pieceCount[pc]--;
-  pieceCount[make_piece(color_of(pc), ALL_PIECES)]--;
-  psq -= PSQT::psq[pc][s];
+    Piece pc = board[s];
+    byTypeBB[ALL_PIECES] ^= s;
+    byTypeBB[type_of(pc)] ^= s;
+    byColorBB[color_of(pc)] ^= s;
+    board[s] = NO_PIECE;
+    pieceCount[pc]--;
+    pieceCount[make_piece(color_of(pc), ALL_PIECES)]--;
 }

 inline void Position::move_piece(Square from, Square to) {

-  Piece pc = board[from];
-  Bitboard fromTo = from | to;
-  byTypeBB[ALL_PIECES] ^= fromTo;
-  byTypeBB[type_of(pc)] ^= fromTo;
-  byColorBB[color_of(pc)] ^= fromTo;
-  board[from] = NO_PIECE;
-  board[to] = pc;
-  psq += PSQT::psq[pc][to] - PSQT::psq[pc][from];
+    Piece    pc     = board[from];
+    Bitboard fromTo = from | to;
+    byTypeBB[ALL_PIECES] ^= fromTo;
+    byTypeBB[type_of(pc)] ^= fromTo;
+    byColorBB[color_of(pc)] ^= fromTo;
+    board[from] = NO_PIECE;
+    board[to]   = pc;
 }

-inline void Position::do_move(Move m, StateInfo& newSt) {
-  do_move(m, newSt, gives_check(m));
+inline void Position::do_move(Move m, StateInfo& newSt, const TranspositionTable* tt = nullptr) {
+    do_move(m, newSt, gives_check(m), tt);
 }

-inline StateInfo* Position::state() const {
+inline StateInfo* Position::state() const { return st; }

-  return st;
-}
+}  // namespace Stockfish

-#endif // #ifndef POSITION_H_INCLUDED
+#endif  // #ifndef POSITION_H_INCLUDED
@@ -1,128 +0,0 @@
-/*
-  Stockfish, a UCI chess playing engine derived from Glaurung 2.1
-  Copyright (C) 2004-2021 The Stockfish developers (see AUTHORS file)
-
-  Stockfish is free software: you can redistribute it and/or modify
-  it under the terms of the GNU General Public License as published by
-  the Free Software Foundation, either version 3 of the License, or
-  (at your option) any later version.
-
-  Stockfish is distributed in the hope that it will be useful,
-  but WITHOUT ANY WARRANTY; without even the implied warranty of
-  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-  GNU General Public License for more details.
-
-  You should have received a copy of the GNU General Public License
-  along with this program.  If not, see <http://www.gnu.org/licenses/>.
-*/
-
-
-#include "psqt.h"
-
-#include <algorithm>
-
-#include "bitboard.h"
-#include "types.h"
-
-
-namespace
-{
-
-auto constexpr S = make_score;
-
-// 'Bonus' contains Piece-Square parameters.
-// Scores are explicit for files A to D, implicitly mirrored for E to H.
-constexpr Score Bonus[][RANK_NB][int(FILE_NB) / 2] = {
-  { },
-  { },
-  { // Knight
-   { S(-175, -96), S(-92,-65), S(-74,-49), S(-73,-21) },
-   { S( -77, -67), S(-41,-54), S(-27,-18), S(-15,  8) },
-   { S( -61, -40), S(-17,-27), S(  6, -8), S( 12, 29) },
-   { S( -35, -35), S(  8, -2), S( 40, 13), S( 49, 28) },
-   { S( -34, -45), S( 13,-16), S( 44,  9), S( 51, 39) },
-   { S(  -9, -51), S( 22,-44), S( 58,-16), S( 53, 17) },
-   { S( -67, -69), S(-27,-50), S(  4,-51), S( 37, 12) },
-   { S(-201,-100), S(-83,-88), S(-56,-56), S(-26,-17) }
-  },
-  { // Bishop
-   { S(-37,-40), S(-4 ,-21), S( -6,-26), S(-16, -8) },
-   { S(-11,-26), S(  6, -9), S( 13,-12), S(  3,  1) },
-   { S(-5 ,-11), S( 15, -1), S( -4, -1), S( 12,  7) },
-   { S(-4 ,-14), S(  8, -4), S( 18,  0), S( 27, 12) },
-   { S(-8 ,-12), S( 20, -1), S( 15,-10), S( 22, 11) },
-   { S(-11,-21), S(  4,  4), S(  1,  3), S(  8,  4) },
-   { S(-12,-22), S(-10,-14), S(  4, -1), S(  0,  1) },
-   { S(-34,-32), S(  1,-29), S(-10,-26), S(-16,-17) }
-  },
-  { // Rook
-   { S(-31, -9), S(-20,-13), S(-14,-10), S(-5, -9) },
-   { S(-21,-12), S(-13, -9), S( -8, -1), S( 6, -2) },
-   { S(-25,  6), S(-11, -8), S( -1, -2), S( 3, -6) },
-   { S(-13, -6), S( -5,  1), S( -4, -9), S(-6,  7) },
-   { S(-27, -5), S(-15,  8), S( -4,  7), S( 3, -6) },
-   { S(-22,  6), S( -2,  1), S(  6, -7), S(12, 10) },
-   { S( -2,  4), S( 12,  5), S( 16, 20), S(18, -5) },
-   { S(-17, 18), S(-19,  0), S( -1, 19), S( 9, 13) }
-  },
-  { // Queen
-   { S( 3,-69), S(-5,-57), S(-5,-47), S( 4,-26) },
-   { S(-3,-54), S( 5,-31), S( 8,-22), S(12, -4) },
-   { S(-3,-39), S( 6,-18), S(13, -9), S( 7,  3) },
-   { S( 4,-23), S( 5, -3), S( 9, 13), S( 8, 24) },
-   { S( 0,-29), S(14, -6), S(12,  9), S( 5, 21) },
-   { S(-4,-38), S(10,-18), S( 6,-11), S( 8,  1) },
-   { S(-5,-50), S( 6,-27), S(10,-24), S( 8, -8) },
-   { S(-2,-74), S(-2,-52), S( 1,-43), S(-2,-34) }
-  },
-  { // King
-   { S(271,  1), S(327, 45), S(271, 85), S(198, 76) },
-   { S(278, 53), S(303,100), S(234,133), S(179,135) },
-   { S(195, 88), S(258,130), S(169,169), S(120,175) },
-   { S(164,103), S(190,156), S(138,172), S( 98,172) },
-   { S(154, 96), S(179,166), S(105,199), S( 70,199) },
-   { S(123, 92), S(145,172), S( 81,184), S( 31,191) },
-   { S( 88, 47), S(120,121), S( 65,116), S( 33,131) },
-   { S( 59, 11), S( 89, 59), S( 45, 73), S( -1, 78) }
-  }
-};
-
-constexpr Score PBonus[RANK_NB][FILE_NB] =
-  { // Pawn (asymmetric distribution)
-   { },
-   { S(  2, -8), S(  4, -6), S( 11,  9), S( 18,  5), S( 16, 16), S( 21,  6), S(  9, -6), S( -3,-18) },
-   { S( -9, -9), S(-15, -7), S( 11,-10), S( 15,  5), S( 31,  2), S( 23,  3), S(  6, -8), S(-20, -5) },
-   { S( -3,  7), S(-20,  1), S(  8, -8), S( 19, -2), S( 39,-14), S( 17,-13), S(  2,-11), S( -5, -6) },
-   { S( 11, 12), S( -4,  6), S(-11,  2), S(  2, -6), S( 11, -5), S(  0, -4), S(-12, 14), S(  5,  9) },
-   { S(  3, 27), S(-11, 18), S( -6, 19), S( 22, 29), S( -8, 30), S( -5,  9), S(-14,  8), S(-11, 14) },
-   { S( -7, -1), S(  6,-14), S( -2, 13), S(-11, 22), S(  4, 24), S(-14, 17), S( 10,  7), S( -9,  7) }
-  };
-
-} // namespace
-
-
-namespace PSQT
-{
-
-Score psq[PIECE_NB][SQUARE_NB];
-
-// PSQT::init() initializes piece-square tables: the white halves of the tables are
-// copied from Bonus[] and PBonus[], adding the piece value, then the black halves of
-// the tables are initialized by flipping and changing the sign of the white scores.
-void init() {
-
-  for (Piece pc : {W_PAWN, W_KNIGHT, W_BISHOP, W_ROOK, W_QUEEN, W_KING})
-  {
-    Score score = make_score(PieceValue[MG][pc], PieceValue[EG][pc]);
-
-    for (Square s = SQ_A1; s <= SQ_H8; ++s)
-    {
-      File f = File(edge_distance(file_of(s)));
-      psq[ pc][s] = score + (type_of(pc) == PAWN ? PBonus[rank_of(s)][file_of(s)]
-                                                 : Bonus[pc][rank_of(s)][f]);
-      psq[~pc][flip_rank(s)] = -psq[pc][s];
-    }
-  }
-}
-
-} // namespace PSQT
@@ -1,6 +1,6 @@
 /*
  Stockfish, a UCI chess playing engine derived from Glaurung 2.1
-  Copyright (C) 2004-2021 The Stockfish developers (see AUTHORS file)
+  Copyright (C) 2004-2025 The Stockfish developers (see AUTHORS file)

  Stockfish is free software: you can redistribute it and/or modify
  it under the terms of the GNU General Public License as published by
@@ -16,30 +16,33 @@
  along with this program.  If not, see <http://www.gnu.org/licenses/>.
 */

-//Common header of input features of NNUE evaluation function
+#include "score.h"

-#ifndef NNUE_FEATURES_COMMON_H_INCLUDED
-#define NNUE_FEATURES_COMMON_H_INCLUDED
+#include <cassert>
+#include <cmath>
+#include <cstdlib>

-#include "../../evaluate.h"
-#include "../nnue_common.h"
+#include "uci.h"

-namespace Eval::NNUE::Features {
+namespace Stockfish {

-  class IndexList;
+Score::Score(Value v, const Position& pos) {
+    assert(-VALUE_INFINITE < v && v < VALUE_INFINITE);

-  template <typename... FeatureTypes>
-  class FeatureSet;
+    if (!is_decisive(v))
+    {
+        score = InternalUnits{UCIEngine::to_cp(v, pos)};
+    }
+    else if (std::abs(v) <= VALUE_TB)
+    {
+        auto distance = VALUE_TB - std::abs(v);
+        score         = (v > 0) ? Tablebase{distance, true} : Tablebase{-distance, false};
+    }
+    else
+    {
+        auto distance = VALUE_MATE - std::abs(v);
+        score         = (v > 0) ? Mate{distance} : Mate{-distance};
+    }
+}

-  // Trigger to perform full calculations instead of difference only
-  enum class TriggerEvent {
-    kFriendKingMoved // calculate full evaluation when own king moves
-  };
-
-  enum class Side {
-    kFriend // side to move
-  };
-
-}  // namespace Eval::NNUE::Features
-
-#endif // #ifndef NNUE_FEATURES_COMMON_H_INCLUDED
+}
@@ -0,0 +1,70 @@
+/*
+  Stockfish, a UCI chess playing engine derived from Glaurung 2.1
+  Copyright (C) 2004-2025 The Stockfish developers (see AUTHORS file)
+
+  Stockfish is free software: you can redistribute it and/or modify
+  it under the terms of the GNU General Public License as published by
+  the Free Software Foundation, either version 3 of the License, or
+  (at your option) any later version.
+
+  Stockfish is distributed in the hope that it will be useful,
+  but WITHOUT ANY WARRANTY; without even the implied warranty of
+  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+  GNU General Public License for more details.
+
+  You should have received a copy of the GNU General Public License
+  along with this program.  If not, see <http://www.gnu.org/licenses/>.
+*/
+
+#ifndef SCORE_H_INCLUDED
+#define SCORE_H_INCLUDED
+
+#include <variant>
+#include <utility>
+
+#include "types.h"
+
+namespace Stockfish {
+
+class Position;
+
+class Score {
+   public:
+    struct Mate {
+        int plies;
+    };
+
+    struct Tablebase {
+        int  plies;
+        bool win;
+    };
+
+    struct InternalUnits {
+        int value;
+    };
+
+    Score() = default;
+    Score(Value v, const Position& pos);
+
+    template<typename T>
+    bool is() const {
+        return std::holds_alternative<T>(score);
+    }
+
+    template<typename T>
+    T get() const {
+        return std::get<T>(score);
+    }
+
+    template<typename F>
+    decltype(auto) visit(F&& f) const {
+        return std::visit(std::forward<F>(f), score);
+    }
+
+   private:
+    std::variant<Mate, Tablebase, InternalUnits> score;
+};
+
+}
+
+#endif  // #ifndef SCORE_H_INCLUDED
@@ -1,6 +1,6 @@
 /*
  Stockfish, a UCI chess playing engine derived from Glaurung 2.1
-  Copyright (C) 2004-2021 The Stockfish developers (see AUTHORS file)
+  Copyright (C) 2004-2025 The Stockfish developers (see AUTHORS file)

  Stockfish is free software: you can redistribute it and/or modify
  it under the terms of the GNU General Public License as published by
@@ -19,91 +19,348 @@
 #ifndef SEARCH_H_INCLUDED
 #define SEARCH_H_INCLUDED

+#include <algorithm>
+#include <array>
+#include <atomic>
+#include <cassert>
+#include <cstddef>
+#include <cstdint>
+#include <functional>
+#include <memory>
+#include <string>
+#include <string_view>
 #include <vector>

+#include "history.h"
 #include "misc.h"
-#include "movepick.h"
+#include "nnue/network.h"
+#include "nnue/nnue_accumulator.h"
+#include "numa.h"
+#include "position.h"
+#include "score.h"
+#include "syzygy/tbprobe.h"
+#include "timeman.h"
 #include "types.h"

-class Position;
+namespace Stockfish {
+
+// Different node types, used as a template parameter
+enum NodeType {
+    NonPV,
+    PV,
+    Root
+};
+
+class TranspositionTable;
+class ThreadPool;
+class OptionsMap;

 namespace Search {

-/// Threshold used for countermoves based pruning
-constexpr int CounterMovePruneThreshold = 0;
-
-
-/// Stack struct keeps track of the information we need to remember from nodes
-/// shallower and deeper in the tree during the search. Each search thread has
-/// its own array of Stack objects, indexed by the current ply.
-
+// Stack struct keeps track of the information we need to remember from nodes
+// shallower and deeper in the tree during the search. Each search thread has
+// its own array of Stack objects, indexed by the current ply.
 struct Stack {
-  Move* pv;
-  PieceToHistory* continuationHistory;
-  int ply;
-  Move currentMove;
-  Move excludedMove;
-  Move killers[2];
-  Value staticEval;
-  int statScore;
-  int moveCount;
-  bool inCheck;
-  bool ttPv;
-  bool ttHit;
+    Move*                       pv;
+    PieceToHistory*             continuationHistory;
+    CorrectionHistory<PieceTo>* continuationCorrectionHistory;
+    int                         ply;
+    Move                        currentMove;
+    Move                        excludedMove;
+    Value                       staticEval;
+    int                         statScore;
+    int                         moveCount;
+    bool                        inCheck;
+    bool                        ttPv;
+    bool                        ttHit;
+    int                         cutoffCnt;
+    int                         reduction;
+    bool                        isTTMove;
 };


-/// RootMove struct is used for moves at the root of the tree. For each root move
-/// we store a score and a PV (really a refutation in the case of moves which
-/// fail low). Score is normally set at -VALUE_INFINITE for all non-pv moves.
-
+// RootMove struct is used for moves at the root of the tree. For each root move
+// we store a score and a PV (really a refutation in the case of moves which
+// fail low). Score is normally set at -VALUE_INFINITE for all non-pv moves.
 struct RootMove {

-  explicit RootMove(Move m) : pv(1, m) {}
-  bool extract_ponder_from_tt(Position& pos);
-  bool operator==(const Move& m) const { return pv[0] == m; }
-  bool operator<(const RootMove& m) const { // Sort in descending order
-    return m.score != score ? m.score < score
-                            : m.previousScore < previousScore;
-  }
+    explicit RootMove(Move m) :
+        pv(1, m) {}
+    bool extract_ponder_from_tt(const TranspositionTable& tt, Position& pos);
+    bool operator==(const Move& m) const { return pv[0] == m; }
+    // Sort in descending order
+    bool operator<(const RootMove& m) const {
+        return m.score != score ? m.score < score : m.previousScore < previousScore;
+    }

-  Value score = -VALUE_INFINITE;
-  Value previousScore = -VALUE_INFINITE;
-  int selDepth = 0;
-  int tbRank = 0;
-  Value tbScore;
-  std::vector<Move> pv;
+    uint64_t          effort           = 0;
+    Value             score            = -VALUE_INFINITE;
+    Value             previousScore    = -VALUE_INFINITE;
+    Value             averageScore     = -VALUE_INFINITE;
+    Value             meanSquaredScore = -VALUE_INFINITE * VALUE_INFINITE;
+    Value             uciScore         = -VALUE_INFINITE;
+    bool              scoreLowerbound  = false;
+    bool              scoreUpperbound  = false;
+    int               selDepth         = 0;
+    int               tbRank           = 0;
+    Value             tbScore;
+    std::vector<Move> pv;
 };

-typedef std::vector<RootMove> RootMoves;
+using RootMoves = std::vector<RootMove>;


-/// LimitsType struct stores information sent by GUI about available time to
-/// search the current move, maximum depth/time, or if we are in analysis mode.
-
+// LimitsType struct stores information sent by the caller about the analysis required.
 struct LimitsType {

-  LimitsType() { // Init explicitly due to broken value-initialization of non POD in MSVC
-    time[WHITE] = time[BLACK] = inc[WHITE] = inc[BLACK] = npmsec = movetime = TimePoint(0);
-    movestogo = depth = mate = perft = infinite = 0;
-    nodes = 0;
-  }
+    // Init explicitly due to broken value-initialization of non POD in MSVC
+    LimitsType() {
+        time[WHITE] = time[BLACK] = inc[WHITE] = inc[BLACK] = npmsec = movetime = TimePoint(0);
+        movestogo = depth = mate = perft = infinite = 0;
+        nodes                                       = 0;
+        ponderMode                                  = false;
+    }

-  bool use_time_management() const {
-    return time[WHITE] || time[BLACK];
-  }
+    bool use_time_management() const { return time[WHITE] || time[BLACK]; }

-  std::vector<Move> searchmoves;
-  TimePoint time[COLOR_NB], inc[COLOR_NB], npmsec, movetime, startTime;
-  int movestogo, depth, mate, perft, infinite;
-  int64_t nodes;
+    std::vector<std::string> searchmoves;
+    TimePoint                time[COLOR_NB], inc[COLOR_NB], npmsec, movetime, startTime;
+    int                      movestogo, depth, mate, perft, infinite;
+    uint64_t                 nodes;
+    bool                     ponderMode;
 };

-extern LimitsType Limits;

-void init();
-void clear();
+// The UCI stores the uci options, thread pool, and transposition table.
+// This struct is used to easily forward data to the Search::Worker class.
+struct SharedState {
+    SharedState(const OptionsMap&                               optionsMap,
+                ThreadPool&                                     threadPool,
+                TranspositionTable&                             transpositionTable,
+                const LazyNumaReplicated<Eval::NNUE::Networks>& nets) :
+        options(optionsMap),
+        threads(threadPool),
+        tt(transpositionTable),
+        networks(nets) {}

-} // namespace Search
+    const OptionsMap&                               options;
+    ThreadPool&                                     threads;
+    TranspositionTable&                             tt;
+    const LazyNumaReplicated<Eval::NNUE::Networks>& networks;
+};

-#endif // #ifndef SEARCH_H_INCLUDED
+class Worker;
+
+// Null Object Pattern, implement a common interface for the SearchManagers.
+// A Null Object will be given to non-mainthread workers.
+class ISearchManager {
+   public:
+    virtual ~ISearchManager() {}
+    virtual void check_time(Search::Worker&) = 0;
+};
+
+struct InfoShort {
+    int   depth;
+    Score score;
+};
+
+struct InfoFull: InfoShort {
+    int              selDepth;
+    size_t           multiPV;
+    std::string_view wdl;
+    std::string_view bound;
+    size_t           timeMs;
+    size_t           nodes;
+    size_t           nps;
+    size_t           tbHits;
+    std::string_view pv;
+    int              hashfull;
+};
+
+struct InfoIteration {
+    int              depth;
+    std::string_view currmove;
+    size_t           currmovenumber;
+};
+
+// Skill structure is used to implement strength limit. If we have a UCI_Elo,
+// we convert it to an appropriate skill level, anchored to the Stash engine.
+// This method is based on a fit of the Elo results for games played between
+// Stockfish at various skill levels and various versions of the Stash engine.
+// Skill 0 .. 19 now covers CCRL Blitz Elo from 1320 to 3190, approximately
+// Reference: https://github.com/vondele/Stockfish/commit/a08b8d4e9711c2
+struct Skill {
+    // Lowest and highest Elo ratings used in the skill level calculation
+    constexpr static int LowestElo  = 1320;
+    constexpr static int HighestElo = 3190;
+
+    Skill(int skill_level, int uci_elo) {
+        if (uci_elo)
+        {
+            double e = double(uci_elo - LowestElo) / (HighestElo - LowestElo);
+            level = std::clamp((((37.2473 * e - 40.8525) * e + 22.2943) * e - 0.311438), 0.0, 19.0);
+        }
+        else
+            level = double(skill_level);
+    }
+    bool enabled() const { return level < 20.0; }
+    bool time_to_pick(Depth depth) const { return depth == 1 + int(level); }
+    Move pick_best(const RootMoves&, size_t multiPV);
+
+    double level;
+    Move   best = Move::none();
+};
+
+// SearchManager manages the search from the main thread. It is responsible for
+// keeping track of the time, and storing data strictly related to the main thread.
+class SearchManager: public ISearchManager {
+   public:
+    using UpdateShort    = std::function<void(const InfoShort&)>;
+    using UpdateFull     = std::function<void(const InfoFull&)>;
+    using UpdateIter     = std::function<void(const InfoIteration&)>;
+    using UpdateBestmove = std::function<void(std::string_view, std::string_view)>;
+
+    struct UpdateContext {
+        UpdateShort    onUpdateNoMoves;
+        UpdateFull     onUpdateFull;
+        UpdateIter     onIter;
+        UpdateBestmove onBestmove;
+    };
+
+
+    SearchManager(const UpdateContext& updateContext) :
+        updates(updateContext) {}
+
+    void check_time(Search::Worker& worker) override;
+
+    void pv(Search::Worker&           worker,
+            const ThreadPool&         threads,
+            const TranspositionTable& tt,
+            Depth                     depth);
+
+    Stockfish::TimeManagement tm;
+    double                    originalTimeAdjust;
+    int                       callsCnt;
+    std::atomic_bool          ponder;
+
+    std::array<Value, 4> iterValue;
+    double               previousTimeReduction;
+    Value                bestPreviousScore;
+    Value                bestPreviousAverageScore;
+    bool                 stopOnPonderhit;
+
+    size_t id;
+
+    const UpdateContext& updates;
+};
+
+class NullSearchManager: public ISearchManager {
+   public:
+    void check_time(Search::Worker&) override {}
+};
+
+
+// Search::Worker is the class that does the actual search.
+// It is instantiated once per thread, and it is responsible for keeping track
+// of the search history, and storing data required for the search.
+class Worker {
+   public:
+    Worker(SharedState&, std::unique_ptr<ISearchManager>, size_t, NumaReplicatedAccessToken);
+
+    // Called at instantiation to initialize reductions tables.
+    // Reset histories, usually before a new game.
+    void clear();
+
+    // Called when the program receives the UCI 'go' command.
+    // It searches from the root position and outputs the "bestmove".
+    void start_searching();
+
+    bool is_mainthread() const { return threadIdx == 0; }
+
+    void ensure_network_replicated();
+
+    // Public because they need to be updatable by the stats
+    ButterflyHistory mainHistory;
+    LowPlyHistory    lowPlyHistory;
+
+    CapturePieceToHistory captureHistory;
+    ContinuationHistory   continuationHistory[2][2];
+    PawnHistory           pawnHistory;
+
+    CorrectionHistory<Pawn>         pawnCorrectionHistory;
+    CorrectionHistory<Minor>        minorPieceCorrectionHistory;
+    CorrectionHistory<NonPawn>      nonPawnCorrectionHistory[COLOR_NB];
+    CorrectionHistory<Continuation> continuationCorrectionHistory;
+
+   private:
+    void iterative_deepening();
+
+    // This is the main search function, for both PV and non-PV nodes
+    template<NodeType nodeType>
+    Value search(Position& pos, Stack* ss, Value alpha, Value beta, Depth depth, bool cutNode);
+
+    // Quiescence search function, which is called by the main search
+    template<NodeType nodeType>
+    Value qsearch(Position& pos, Stack* ss, Value alpha, Value beta);
+
+    Depth reduction(bool i, Depth d, int mn, int delta) const;
+
+    // Pointer to the search manager, only allowed to be called by the main thread
+    SearchManager* main_manager() const {
+        assert(threadIdx == 0);
+        return static_cast<SearchManager*>(manager.get());
+    }
+
+    TimePoint elapsed() const;
+    TimePoint elapsed_time() const;
+
+    Value evaluate(const Position&);
+
+    LimitsType limits;
+
+    size_t                pvIdx, pvLast;
+    std::atomic<uint64_t> nodes, tbHits, bestMoveChanges;
+    int                   selDepth, nmpMinPly;
+
+    Value optimism[COLOR_NB];
+
+    Position  rootPos;
+    StateInfo rootState;
+    RootMoves rootMoves;
+    Depth     rootDepth, completedDepth;
+    Value     rootDelta;
+
+    size_t                    threadIdx;
+    NumaReplicatedAccessToken numaAccessToken;
+
+    // Reductions lookup table initialized at startup
+    std::array<int, MAX_MOVES> reductions;  // [depth or moveNumber]
+
+    // The main thread has a SearchManager, the others have a NullSearchManager
+    std::unique_ptr<ISearchManager> manager;
+
+    Tablebases::Config tbConfig;
+
+    const OptionsMap&                               options;
+    ThreadPool&                                     threads;
+    TranspositionTable&                             tt;
+    const LazyNumaReplicated<Eval::NNUE::Networks>& networks;
+
+    // Used by NNUE
+    Eval::NNUE::AccumulatorCaches refreshTable;
+
+    friend class Stockfish::ThreadPool;
+    friend class SearchManager;
+};
+
+struct ConthistBonus {
+    int index;
+    int weight;
+};
+
+
+}  // namespace Search
+
+}  // namespace Stockfish
+
+#endif  // #ifndef SEARCH_H_INCLUDED
@@ -1,6 +1,6 @@
 /*
  Stockfish, a UCI chess playing engine derived from Glaurung 2.1
-  Copyright (C) 2004-2021 The Stockfish developers (see AUTHORS file)
+  Copyright (C) 2004-2025 The Stockfish developers (see AUTHORS file)

  Stockfish is free software: you can redistribute it and/or modify
  it under the terms of the GNU General Public License as published by
@@ -19,60 +19,60 @@
 #ifndef TBPROBE_H
 #define TBPROBE_H

-#include <ostream>
+#include <string>
+#include <vector>

-#include "../search.h"

-namespace Tablebases {
+namespace Stockfish {
+class Position;
+class OptionsMap;
+
+using Depth = int;
+
+namespace Search {
+struct RootMove;
+using RootMoves = std::vector<RootMove>;
+}
+}
+
+namespace Stockfish::Tablebases {
+
+struct Config {
+    int   cardinality = 0;
+    bool  rootInTB    = false;
+    bool  useRule50   = false;
+    Depth probeDepth  = 0;
+};

 enum WDLScore {
-    WDLLoss        = -2, // Loss
-    WDLBlessedLoss = -1, // Loss, but draw under 50-move rule
-    WDLDraw        =  0, // Draw
-    WDLCursedWin   =  1, // Win, but draw under 50-move rule
-    WDLWin         =  2, // Win
-
-    WDLScoreNone  = -1000
+    WDLLoss        = -2,  // Loss
+    WDLBlessedLoss = -1,  // Loss, but draw under 50-move rule
+    WDLDraw        = 0,   // Draw
+    WDLCursedWin   = 1,   // Win, but draw under 50-move rule
+    WDLWin         = 2,   // Win
 };

 // Possible states after a probing operation
 enum ProbeState {
-    FAIL              =  0, // Probe failed (missing file table)
-    OK                =  1, // Probe succesful
-    CHANGE_STM        = -1, // DTZ should check the other side
-    ZEROING_BEST_MOVE =  2  // Best move zeroes DTZ (capture or pawn move)
+    FAIL              = 0,   // Probe failed (missing file table)
+    OK                = 1,   // Probe successful
+    CHANGE_STM        = -1,  // DTZ should check the other side
+    ZEROING_BEST_MOVE = 2    // Best move zeroes DTZ (capture or pawn move)
 };

 extern int MaxCardinality;

-void init(const std::string& paths);
+
+void     init(const std::string& paths);
 WDLScore probe_wdl(Position& pos, ProbeState* result);
-int probe_dtz(Position& pos, ProbeState* result);
-bool root_probe(Position& pos, Search::RootMoves& rootMoves);
-bool root_probe_wdl(Position& pos, Search::RootMoves& rootMoves);
-void rank_root_moves(Position& pos, Search::RootMoves& rootMoves);
+int      probe_dtz(Position& pos, ProbeState* result);
+bool     root_probe(Position& pos, Search::RootMoves& rootMoves, bool rule50, bool rankDTZ);
+bool     root_probe_wdl(Position& pos, Search::RootMoves& rootMoves, bool rule50);
+Config   rank_root_moves(const OptionsMap&  options,
+                         Position&          pos,
+                         Search::RootMoves& rootMoves,
+                         bool               rankDTZ = false);

-inline std::ostream& operator<<(std::ostream& os, const WDLScore v) {
-
-    os << (v == WDLLoss        ? "Loss" :
-           v == WDLBlessedLoss ? "Blessed loss" :
-           v == WDLDraw        ? "Draw" :
-           v == WDLCursedWin   ? "Cursed win" :
-           v == WDLWin         ? "Win" : "None");
-
-    return os;
-}
-
-inline std::ostream& operator<<(std::ostream& os, const ProbeState v) {
-
-    os << (v == FAIL              ? "Failed" :
-           v == OK                ? "Success" :
-           v == CHANGE_STM        ? "Probed opponent side" :
-           v == ZEROING_BEST_MOVE ? "Best move zeroes DTZ" : "None");
-
-    return os;
-}
-
-}
+}  // namespace Stockfish::Tablebases

 #endif
@@ -1,6 +1,6 @@
 /*
  Stockfish, a UCI chess playing engine derived from Glaurung 2.1
-  Copyright (C) 2004-2021 The Stockfish developers (see AUTHORS file)
+  Copyright (C) 2004-2025 The Stockfish developers (see AUTHORS file)

  Stockfish is free software: you can redistribute it and/or modify
  it under the terms of the GNU General Public License as published by
@@ -16,245 +16,395 @@
  along with this program.  If not, see <http://www.gnu.org/licenses/>.
 */

-#include <cassert>
+#include "thread.h"
+
+#include <algorithm>
+#include <cassert>
+#include <deque>
+#include <memory>
+#include <string>
+#include <unordered_map>
+#include <utility>

-#include <algorithm> // For std::count
 #include "movegen.h"
 #include "search.h"
-#include "thread.h"
-#include "uci.h"
 #include "syzygy/tbprobe.h"
-#include "tt.h"
+#include "timeman.h"
+#include "types.h"
+#include "uci.h"
+#include "ucioption.h"

-ThreadPool Threads; // Global object
+namespace Stockfish {

+// Constructor launches the thread and waits until it goes to sleep
+// in idle_loop(). Note that 'searching' and 'exit' should be already set.
+Thread::Thread(Search::SharedState&                    sharedState,
+               std::unique_ptr<Search::ISearchManager> sm,
+               size_t                                  n,
+               OptionalThreadToNumaNodeBinder          binder) :
+    idx(n),
+    nthreads(sharedState.options["Threads"]),
+    stdThread(&Thread::idle_loop, this) {

-/// Thread constructor launches the thread and waits until it goes to sleep
-/// in idle_loop(). Note that 'searching' and 'exit' should be already set.
+    wait_for_search_finished();

-Thread::Thread(size_t n) : idx(n), stdThread(&Thread::idle_loop, this) {
+    run_custom_job([this, &binder, &sharedState, &sm, n]() {
+        // Use the binder to [maybe] bind the threads to a NUMA node before doing
+        // the Worker allocation. Ideally we would also allocate the SearchManager
+        // here, but that's minor.
+        this->numaAccessToken = binder();
+        this->worker =
+          std::make_unique<Search::Worker>(sharedState, std::move(sm), n, this->numaAccessToken);
+    });

-  wait_for_search_finished();
+    wait_for_search_finished();
 }


-/// Thread destructor wakes up the thread in idle_loop() and waits
-/// for its termination. Thread should be already waiting.
-
+// Destructor wakes up the thread in idle_loop() and waits
+// for its termination. Thread should be already waiting.
 Thread::~Thread() {

-  assert(!searching);
+    assert(!searching);

-  exit = true;
-  start_searching();
-  stdThread.join();
+    exit = true;
+    start_searching();
+    stdThread.join();
 }

-
-/// Thread::clear() reset histories, usually before a new game
-
-void Thread::clear() {
-
-  counterMoves.fill(MOVE_NONE);
-  mainHistory.fill(0);
-  lowPlyHistory.fill(0);
-  captureHistory.fill(0);
-
-  for (bool inCheck : { false, true })
-      for (StatsType c : { NoCaptures, Captures })
-      {
-          for (auto& to : continuationHistory[inCheck][c])
-                for (auto& h : to)
-                      h->fill(0);
-          continuationHistory[inCheck][c][NO_PIECE][0]->fill(Search::CounterMovePruneThreshold - 1);
-      }
-}
-
-
-/// Thread::start_searching() wakes up the thread that will start the search
-
+// Wakes up the thread that will start the search
 void Thread::start_searching() {
-
-  std::lock_guard<std::mutex> lk(mutex);
-  searching = true;
-  cv.notify_one(); // Wake up the thread in idle_loop()
+    assert(worker != nullptr);
+    run_custom_job([this]() { worker->start_searching(); });
 }

+// Clears the histories for the thread worker (usually before a new game)
+void Thread::clear_worker() {
+    assert(worker != nullptr);
+    run_custom_job([this]() { worker->clear(); });
+}

-/// Thread::wait_for_search_finished() blocks on the condition variable
-/// until the thread has finished searching.
-
+// Blocks on the condition variable until the thread has finished searching
 void Thread::wait_for_search_finished() {

-  std::unique_lock<std::mutex> lk(mutex);
-  cv.wait(lk, [&]{ return !searching; });
+    std::unique_lock<std::mutex> lk(mutex);
+    cv.wait(lk, [&] { return !searching; });
 }

+// Launching a function in the thread
+void Thread::run_custom_job(std::function<void()> f) {
+    {
+        std::unique_lock<std::mutex> lk(mutex);
+        cv.wait(lk, [&] { return !searching; });
+        jobFunc   = std::move(f);
+        searching = true;
+    }
+    cv.notify_one();
+}

-/// Thread::idle_loop() is where the thread is parked, blocked on the
-/// condition variable, when it has no work to do.
+void Thread::ensure_network_replicated() { worker->ensure_network_replicated(); }
+
+// Thread gets parked here, blocked on the condition variable
+// when the thread has no work to do.

 void Thread::idle_loop() {
+    while (true)
+    {
+        std::unique_lock<std::mutex> lk(mutex);
+        searching = false;
+        cv.notify_one();  // Wake up anyone waiting for search finished
+        cv.wait(lk, [&] { return searching; });

-  // If OS already scheduled us on a different group than 0 then don't overwrite
-  // the choice, eventually we are one of many one-threaded processes running on
-  // some Windows NUMA hardware, for instance in fishtest. To make it simple,
-  // just check if running threads are below a threshold, in this case all this
-  // NUMA machinery is not needed.
-  if (Options["Threads"] > 8)
-      WinProcGroup::bindThisThread(idx);
+        if (exit)
+            return;

-  while (true)
-  {
-      std::unique_lock<std::mutex> lk(mutex);
-      searching = false;
-      cv.notify_one(); // Wake up anyone waiting for search finished
-      cv.wait(lk, [&]{ return searching; });
+        std::function<void()> job = std::move(jobFunc);
+        jobFunc                   = nullptr;

-      if (exit)
-          return;
+        lk.unlock();

-      lk.unlock();
-
-      search();
-  }
+        if (job)
+            job();
+    }
 }

-/// ThreadPool::set() creates/destroys threads to match the requested number.
-/// Created and launched threads will immediately go to sleep in idle_loop.
-/// Upon resizing, threads are recreated to allow for binding if necessary.
+Search::SearchManager* ThreadPool::main_manager() { return main_thread()->worker->main_manager(); }

-void ThreadPool::set(size_t requested) {
+uint64_t ThreadPool::nodes_searched() const { return accumulate(&Search::Worker::nodes); }
+uint64_t ThreadPool::tb_hits() const { return accumulate(&Search::Worker::tbHits); }

-  if (size() > 0) { // destroy any existing thread(s)
-      main()->wait_for_search_finished();
+// Creates/destroys threads to match the requested number.
+// Created and launched threads will immediately go to sleep in idle_loop.
+// Upon resizing, threads are recreated to allow for binding if necessary.
+void ThreadPool::set(const NumaConfig&                           numaConfig,
+                     Search::SharedState                         sharedState,
+                     const Search::SearchManager::UpdateContext& updateContext) {

-      while (size() > 0)
-          delete back(), pop_back();
-  }
+    if (threads.size() > 0)  // destroy any existing thread(s)
+    {
+        main_thread()->wait_for_search_finished();

-  if (requested > 0) { // create new thread(s)
-      push_back(new MainThread(0));
+        threads.clear();

-      while (size() < requested)
-          push_back(new Thread(size()));
-      clear();
+        boundThreadToNumaNode.clear();
+    }

-      // Reallocate the hash with the new threadpool size
-      TT.resize(size_t(Options["Hash"]));
+    const size_t requested = sharedState.options["Threads"];

-      // Init thread number dependent search params.
-      Search::init();
-  }
+    if (requested > 0)  // create new thread(s)
+    {
+        // Binding threads may be problematic when there's multiple NUMA nodes and
+        // multiple Stockfish instances running. In particular, if each instance
+        // runs a single thread then they would all be mapped to the first NUMA node.
+        // This is undesirable, and so the default behaviour (i.e. when the user does not
+        // change the NumaConfig UCI setting) is to not bind the threads to processors
+        // unless we know for sure that we span NUMA nodes and replication is required.
+        const std::string numaPolicy(sharedState.options["NumaPolicy"]);
+        const bool        doBindThreads = [&]() {
+            if (numaPolicy == "none")
+                return false;
+
+            if (numaPolicy == "auto")
+                return numaConfig.suggests_binding_threads(requested);
+
+            // numaPolicy == "system", or explicitly set by the user
+            return true;
+        }();
+
+        boundThreadToNumaNode = doBindThreads
+                                ? numaConfig.distribute_threads_among_numa_nodes(requested)
+                                : std::vector<NumaIndex>{};
+
+        while (threads.size() < requested)
+        {
+            const size_t    threadId = threads.size();
+            const NumaIndex numaId   = doBindThreads ? boundThreadToNumaNode[threadId] : 0;
+            auto            manager  = threadId == 0 ? std::unique_ptr<Search::ISearchManager>(
+                                             std::make_unique<Search::SearchManager>(updateContext))
+                                                     : std::make_unique<Search::NullSearchManager>();
+
+            // When not binding threads we want to force all access to happen
+            // from the same NUMA node, because in case of NUMA replicated memory
+            // accesses we don't want to trash cache in case the threads get scheduled
+            // on the same NUMA node.
+            auto binder = doBindThreads ? OptionalThreadToNumaNodeBinder(numaConfig, numaId)
+                                        : OptionalThreadToNumaNodeBinder(numaId);
+
+            threads.emplace_back(
+              std::make_unique<Thread>(sharedState, std::move(manager), threadId, binder));
+        }
+
+        clear();
+
+        main_thread()->wait_for_search_finished();
+    }
 }


-/// ThreadPool::clear() sets threadPool data to initial values
-
+// Sets threadPool data to initial values
 void ThreadPool::clear() {
+    if (threads.size() == 0)
+        return;

-  for (Thread* th : *this)
-      th->clear();
+    for (auto&& th : threads)
+        th->clear_worker();

-  main()->callsCnt = 0;
-  main()->bestPreviousScore = VALUE_INFINITE;
-  main()->previousTimeReduction = 1.0;
+    for (auto&& th : threads)
+        th->wait_for_search_finished();
+
+    // These two affect the time taken on the first move of a game:
+    main_manager()->bestPreviousAverageScore = VALUE_INFINITE;
+    main_manager()->previousTimeReduction    = 0.85;
+
+    main_manager()->callsCnt           = 0;
+    main_manager()->bestPreviousScore  = VALUE_INFINITE;
+    main_manager()->originalTimeAdjust = -1;
+    main_manager()->tm.clear();
 }

+void ThreadPool::run_on_thread(size_t threadId, std::function<void()> f) {
+    assert(threads.size() > threadId);
+    threads[threadId]->run_custom_job(std::move(f));
+}

-/// ThreadPool::start_thinking() wakes up main thread waiting in idle_loop() and
-/// returns immediately. Main thread will wake up other threads and start the search.
+void ThreadPool::wait_on_thread(size_t threadId) {
+    assert(threads.size() > threadId);
+    threads[threadId]->wait_for_search_finished();
+}

-void ThreadPool::start_thinking(Position& pos, StateListPtr& states,
-                                const Search::LimitsType& limits, bool ponderMode) {
+size_t ThreadPool::num_threads() const { return threads.size(); }

-  main()->wait_for_search_finished();

-  main()->stopOnPonderhit = stop = false;
-  increaseDepth = true;
-  main()->ponder = ponderMode;
-  Search::Limits = limits;
-  Search::RootMoves rootMoves;
+// Wakes up main thread waiting in idle_loop() and returns immediately.
+// Main thread will wake up other threads and start the search.
+void ThreadPool::start_thinking(const OptionsMap&  options,
+                                Position&          pos,
+                                StateListPtr&      states,
+                                Search::LimitsType limits) {

-  for (const auto& m : MoveList<LEGAL>(pos))
-      if (   limits.searchmoves.empty()
-          || std::count(limits.searchmoves.begin(), limits.searchmoves.end(), m))
-          rootMoves.emplace_back(m);
+    main_thread()->wait_for_search_finished();

-  if (!rootMoves.empty())
-      Tablebases::rank_root_moves(pos, rootMoves);
+    main_manager()->stopOnPonderhit = stop = abortedSearch = false;
+    main_manager()->ponder                                 = limits.ponderMode;

-  // After ownership transfer 'states' becomes empty, so if we stop the search
-  // and call 'go' again without setting a new position states.get() == NULL.
-  assert(states.get() || setupStates.get());
+    increaseDepth = true;

-  if (states.get())
-      setupStates = std::move(states); // Ownership transfer, states is now empty
+    Search::RootMoves rootMoves;
+    const auto        legalmoves = MoveList<LEGAL>(pos);

-  // We use Position::set() to set root position across threads. But there are
-  // some StateInfo fields (previous, pliesFromNull, capturedPiece) that cannot
-  // be deduced from a fen string, so set() clears them and they are set from
-  // setupStates->back() later. The rootState is per thread, earlier states are shared
-  // since they are read-only.
-  for (Thread* th : *this)
-  {
-      th->nodes = th->tbHits = th->nmpMinPly = th->bestMoveChanges = 0;
-      th->rootDepth = th->completedDepth = 0;
-      th->rootMoves = rootMoves;
-      th->rootPos.set(pos.fen(), pos.is_chess960(), &th->rootState, th);
-      th->rootState = setupStates->back();
-  }
+    for (const auto& uciMove : limits.searchmoves)
+    {
+        auto move = UCIEngine::to_move(pos, uciMove);

-  main()->start_searching();
+        if (std::find(legalmoves.begin(), legalmoves.end(), move) != legalmoves.end())
+            rootMoves.emplace_back(move);
+    }
+
+    if (rootMoves.empty())
+        for (const auto& m : legalmoves)
+            rootMoves.emplace_back(m);
+
+    Tablebases::Config tbConfig = Tablebases::rank_root_moves(options, pos, rootMoves);
+
+    // After ownership transfer 'states' becomes empty, so if we stop the search
+    // and call 'go' again without setting a new position states.get() == nullptr.
+    assert(states.get() || setupStates.get());
+
+    if (states.get())
+        setupStates = std::move(states);  // Ownership transfer, states is now empty
+
+    // We use Position::set() to set root position across threads. But there are
+    // some StateInfo fields (previous, pliesFromNull, capturedPiece) that cannot
+    // be deduced from a fen string, so set() clears them and they are set from
+    // setupStates->back() later. The rootState is per thread, earlier states are
+    // shared since they are read-only.
+    for (auto&& th : threads)
+    {
+        th->run_custom_job([&]() {
+            th->worker->limits = limits;
+            th->worker->nodes = th->worker->tbHits = th->worker->nmpMinPly =
+              th->worker->bestMoveChanges          = 0;
+            th->worker->rootDepth = th->worker->completedDepth = 0;
+            th->worker->rootMoves                              = rootMoves;
+            th->worker->rootPos.set(pos.fen(), pos.is_chess960(), &th->worker->rootState);
+            th->worker->rootState = setupStates->back();
+            th->worker->tbConfig  = tbConfig;
+        });
+    }
+
+    for (auto&& th : threads)
+        th->wait_for_search_finished();
+
+    main_thread()->start_searching();
 }

 Thread* ThreadPool::get_best_thread() const {

-    Thread* bestThread = front();
-    std::map<Move, int64_t> votes;
-    Value minScore = VALUE_NONE;
+    Thread* bestThread = threads.front().get();
+    Value   minScore   = VALUE_NONE;

-    // Find minimum score of all threads
-    for (Thread* th: *this)
-        minScore = std::min(minScore, th->rootMoves[0].score);
+    std::unordered_map<Move, int64_t, Move::MoveHash> votes(
+      2 * std::min(size(), bestThread->worker->rootMoves.size()));
+
+    // Find the minimum score of all threads
+    for (auto&& th : threads)
+        minScore = std::min(minScore, th->worker->rootMoves[0].score);

    // Vote according to score and depth, and select the best thread
-    for (Thread* th : *this)
-    {
-        votes[th->rootMoves[0].pv[0]] +=
-            (th->rootMoves[0].score - minScore + 14) * int(th->completedDepth);
+    auto thread_voting_value = [minScore](Thread* th) {
+        return (th->worker->rootMoves[0].score - minScore + 14) * int(th->worker->completedDepth);
+    };

-        if (abs(bestThread->rootMoves[0].score) >= VALUE_TB_WIN_IN_MAX_PLY)
+    for (auto&& th : threads)
+        votes[th->worker->rootMoves[0].pv[0]] += thread_voting_value(th.get());
+
+    for (auto&& th : threads)
+    {
+        const auto bestThreadScore = bestThread->worker->rootMoves[0].score;
+        const auto newThreadScore  = th->worker->rootMoves[0].score;
+
+        const auto& bestThreadPV = bestThread->worker->rootMoves[0].pv;
+        const auto& newThreadPV  = th->worker->rootMoves[0].pv;
+
+        const auto bestThreadMoveVote = votes[bestThreadPV[0]];
+        const auto newThreadMoveVote  = votes[newThreadPV[0]];
+
+        const bool bestThreadInProvenWin = is_win(bestThreadScore);
+        const bool newThreadInProvenWin  = is_win(newThreadScore);
+
+        const bool bestThreadInProvenLoss =
+          bestThreadScore != -VALUE_INFINITE && is_loss(bestThreadScore);
+        const bool newThreadInProvenLoss =
+          newThreadScore != -VALUE_INFINITE && is_loss(newThreadScore);
+
+        // We make sure not to pick a thread with truncated principal variation
+        const bool betterVotingValue =
+          thread_voting_value(th.get()) * int(newThreadPV.size() > 2)
+          > thread_voting_value(bestThread) * int(bestThreadPV.size() > 2);
+
+        if (bestThreadInProvenWin)
        {
-            // Make sure we pick the shortest mate / TB conversion or stave off mate the longest
-            if (th->rootMoves[0].score > bestThread->rootMoves[0].score)
-                bestThread = th;
+            // Make sure we pick the shortest mate / TB conversion
+            if (newThreadScore > bestThreadScore)
+                bestThread = th.get();
        }
-        else if (   th->rootMoves[0].score >= VALUE_TB_WIN_IN_MAX_PLY
-                 || (   th->rootMoves[0].score > VALUE_TB_LOSS_IN_MAX_PLY
-                     && votes[th->rootMoves[0].pv[0]] > votes[bestThread->rootMoves[0].pv[0]]))
-            bestThread = th;
+        else if (bestThreadInProvenLoss)
+        {
+            // Make sure we pick the shortest mated / TB conversion
+            if (newThreadInProvenLoss && newThreadScore < bestThreadScore)
+                bestThread = th.get();
+        }
+        else if (newThreadInProvenWin || newThreadInProvenLoss
+                 || (!is_loss(newThreadScore)
+                     && (newThreadMoveVote > bestThreadMoveVote
+                         || (newThreadMoveVote == bestThreadMoveVote && betterVotingValue))))
+            bestThread = th.get();
    }

    return bestThread;
 }


-/// Start non-main threads
-
+// Start non-main threads.
+// Will be invoked by main thread after it has started searching.
 void ThreadPool::start_searching() {

-    for (Thread* th : *this)
-        if (th != front())
+    for (auto&& th : threads)
+        if (th != threads.front())
            th->start_searching();
 }


-/// Wait for non-main threads
-
+// Wait for non-main threads
 void ThreadPool::wait_for_search_finished() const {

-    for (Thread* th : *this)
-        if (th != front())
+    for (auto&& th : threads)
+        if (th != threads.front())
            th->wait_for_search_finished();
 }
+
+std::vector<size_t> ThreadPool::get_bound_thread_count_by_numa_node() const {
+    std::vector<size_t> counts;
+
+    if (!boundThreadToNumaNode.empty())
+    {
+        NumaIndex highestNumaNode = 0;
+        for (NumaIndex n : boundThreadToNumaNode)
+            if (n > highestNumaNode)
+                highestNumaNode = n;
+
+        counts.resize(highestNumaNode + 1, 0);
+
+        for (NumaIndex n : boundThreadToNumaNode)
+            counts[n] += 1;
+    }
+
+    return counts;
+}
+
+void ThreadPool::ensure_network_replicated() {
+    for (auto&& th : threads)
+        th->ensure_network_replicated();
+}
+
+}  // namespace Stockfish
@@ -1,6 +1,6 @@
 /*
  Stockfish, a UCI chess playing engine derived from Glaurung 2.1
-  Copyright (C) 2004-2021 The Stockfish developers (see AUTHORS file)
+  Copyright (C) 2004-2025 The Stockfish developers (see AUTHORS file)

  Stockfish is free software: you can redistribute it and/or modify
  it under the terms of the GNU General Public License as published by
@@ -21,111 +21,158 @@

 #include <atomic>
 #include <condition_variable>
+#include <cstddef>
+#include <cstdint>
+#include <functional>
+#include <memory>
 #include <mutex>
-#include <thread>
 #include <vector>

-#include "material.h"
-#include "movepick.h"
-#include "pawns.h"
+#include "numa.h"
 #include "position.h"
 #include "search.h"
 #include "thread_win32_osx.h"

+namespace Stockfish {

-/// Thread class keeps together all the thread-related stuff. We use
-/// per-thread pawn and material hash tables so that once we get a
-/// pointer to an entry its life time is unlimited and we don't have
-/// to care about someone changing the entry under our feet.

+class OptionsMap;
+using Value = int;
+
+// Sometimes we don't want to actually bind the threads, but the recipient still
+// needs to think it runs on *some* NUMA node, such that it can access structures
+// that rely on NUMA node knowledge. This class encapsulates this optional process
+// such that the recipient does not need to know whether the binding happened or not.
+class OptionalThreadToNumaNodeBinder {
+   public:
+    OptionalThreadToNumaNodeBinder(NumaIndex n) :
+        numaConfig(nullptr),
+        numaId(n) {}
+
+    OptionalThreadToNumaNodeBinder(const NumaConfig& cfg, NumaIndex n) :
+        numaConfig(&cfg),
+        numaId(n) {}
+
+    NumaReplicatedAccessToken operator()() const {
+        if (numaConfig != nullptr)
+            return numaConfig->bind_current_thread_to_numa_node(numaId);
+        else
+            return NumaReplicatedAccessToken(numaId);
+    }
+
+   private:
+    const NumaConfig* numaConfig;
+    NumaIndex         numaId;
+};
+
+// Abstraction of a thread. It contains a pointer to the worker and a native thread.
+// After construction, the native thread is started with idle_loop()
+// waiting for a signal to start searching.
+// When the signal is received, the thread starts searching and when
+// the search is finished, it goes back to idle_loop() waiting for a new signal.
 class Thread {
+   public:
+    Thread(Search::SharedState&,
+           std::unique_ptr<Search::ISearchManager>,
+           size_t,
+           OptionalThreadToNumaNodeBinder);
+    virtual ~Thread();

-  std::mutex mutex;
-  std::condition_variable cv;
-  size_t idx;
-  bool exit = false, searching = true; // Set before starting std::thread
-  NativeThread stdThread;
+    void idle_loop();
+    void start_searching();
+    void clear_worker();
+    void run_custom_job(std::function<void()> f);

-public:
-  explicit Thread(size_t);
-  virtual ~Thread();
-  virtual void search();
-  void clear();
-  void idle_loop();
-  void start_searching();
-  void wait_for_search_finished();
+    void ensure_network_replicated();

-  Pawns::Table pawnsTable;
-  Material::Table materialTable;
-  size_t pvIdx, pvLast;
-  uint64_t ttHitAverage;
-  int selDepth, nmpMinPly;
-  Color nmpColor;
-  std::atomic<uint64_t> nodes, tbHits, bestMoveChanges;
+    // Thread has been slightly altered to allow running custom jobs, so
+    // this name is no longer correct. However, this class (and ThreadPool)
+    // require further work to make them properly generic while maintaining
+    // appropriate specificity regarding search, from the point of view of an
+    // outside user, so renaming of this function is left for whenever that happens.
+    void   wait_for_search_finished();
+    size_t id() const { return idx; }

-  Position rootPos;
-  StateInfo rootState;
-  Search::RootMoves rootMoves;
-  Depth rootDepth, completedDepth;
-  CounterMoveHistory counterMoves;
-  ButterflyHistory mainHistory;
-  LowPlyHistory lowPlyHistory;
-  CapturePieceToHistory captureHistory;
-  ContinuationHistory continuationHistory[2][2];
-  Score contempt;
-  int failedHighCnt;
+    std::unique_ptr<Search::Worker> worker;
+    std::function<void()>           jobFunc;
+
+   private:
+    std::mutex                mutex;
+    std::condition_variable   cv;
+    size_t                    idx, nthreads;
+    bool                      exit = false, searching = true;  // Set before starting std::thread
+    NativeThread              stdThread;
+    NumaReplicatedAccessToken numaAccessToken;
 };


-/// MainThread is a derived class specific for main thread
+// ThreadPool struct handles all the threads-related stuff like init, starting,
+// parking and, most importantly, launching a thread. All the access to threads
+// is done through this class.
+class ThreadPool {
+   public:
+    ThreadPool() {}

-struct MainThread : public Thread {
+    ~ThreadPool() {
+        // destroy any existing thread(s)
+        if (threads.size() > 0)
+        {
+            main_thread()->wait_for_search_finished();

-  using Thread::Thread;
+            threads.clear();
+        }
+    }

-  void search() override;
-  void check_time();
+    ThreadPool(const ThreadPool&) = delete;
+    ThreadPool(ThreadPool&&)      = delete;

-  double previousTimeReduction;
-  Value bestPreviousScore;
-  Value iterValue[4];
-  int callsCnt;
-  bool stopOnPonderhit;
-  std::atomic_bool ponder;
+    ThreadPool& operator=(const ThreadPool&) = delete;
+    ThreadPool& operator=(ThreadPool&&)      = delete;
+
+    void   start_thinking(const OptionsMap&, Position&, StateListPtr&, Search::LimitsType);
+    void   run_on_thread(size_t threadId, std::function<void()> f);
+    void   wait_on_thread(size_t threadId);
+    size_t num_threads() const;
+    void   clear();
+    void   set(const NumaConfig& numaConfig,
+               Search::SharedState,
+               const Search::SearchManager::UpdateContext&);
+
+    Search::SearchManager* main_manager();
+    Thread*                main_thread() const { return threads.front().get(); }
+    uint64_t               nodes_searched() const;
+    uint64_t               tb_hits() const;
+    Thread*                get_best_thread() const;
+    void                   start_searching();
+    void                   wait_for_search_finished() const;
+
+    std::vector<size_t> get_bound_thread_count_by_numa_node() const;
+
+    void ensure_network_replicated();
+
+    std::atomic_bool stop, abortedSearch, increaseDepth;
+
+    auto cbegin() const noexcept { return threads.cbegin(); }
+    auto begin() noexcept { return threads.begin(); }
+    auto end() noexcept { return threads.end(); }
+    auto cend() const noexcept { return threads.cend(); }
+    auto size() const noexcept { return threads.size(); }
+    auto empty() const noexcept { return threads.empty(); }
+
+   private:
+    StateListPtr                         setupStates;
+    std::vector<std::unique_ptr<Thread>> threads;
+    std::vector<NumaIndex>               boundThreadToNumaNode;
+
+    uint64_t accumulate(std::atomic<uint64_t> Search::Worker::*member) const {
+
+        uint64_t sum = 0;
+        for (auto&& th : threads)
+            sum += (th->worker.get()->*member).load(std::memory_order_relaxed);
+        return sum;
+    }
 };

+}  // namespace Stockfish

-/// ThreadPool struct handles all the threads-related stuff like init, starting,
-/// parking and, most importantly, launching a thread. All the access to threads
-/// is done through this class.
-
-struct ThreadPool : public std::vector<Thread*> {
-
-  void start_thinking(Position&, StateListPtr&, const Search::LimitsType&, bool = false);
-  void clear();
-  void set(size_t);
-
-  MainThread* main()        const { return static_cast<MainThread*>(front()); }
-  uint64_t nodes_searched() const { return accumulate(&Thread::nodes); }
-  uint64_t tb_hits()        const { return accumulate(&Thread::tbHits); }
-  Thread* get_best_thread() const;
-  void start_searching();
-  void wait_for_search_finished() const;
-
-  std::atomic_bool stop, increaseDepth;
-
-private:
-  StateListPtr setupStates;
-
-  uint64_t accumulate(std::atomic<uint64_t> Thread::* member) const {
-
-    uint64_t sum = 0;
-    for (Thread* th : *this)
-        sum += (th->*member).load(std::memory_order_relaxed);
-    return sum;
-  }
-};
-
-extern ThreadPool Threads;
-
-#endif // #ifndef THREAD_H_INCLUDED
+#endif  // #ifndef THREAD_H_INCLUDED
@@ -1,6 +1,6 @@
 /*
  Stockfish, a UCI chess playing engine derived from Glaurung 2.1
-  Copyright (C) 2004-2021 The Stockfish developers (see AUTHORS file)
+  Copyright (C) 2004-2025 The Stockfish developers (see AUTHORS file)

  Stockfish is free software: you can redistribute it and/or modify
  it under the terms of the GNU General Public License as published by
@@ -21,46 +21,58 @@

 #include <thread>

-/// On OSX threads other than the main thread are created with a reduced stack
-/// size of 512KB by default, this is too low for deep searches, which require
-/// somewhat more than 1MB stack, so adjust it to TH_STACK_SIZE.
-/// The implementation calls pthread_create() with the stack size parameter
-/// equal to the linux 8MB default, on platforms that support it.
+// On OSX threads other than the main thread are created with a reduced stack
+// size of 512KB by default, this is too low for deep searches, which require
+// somewhat more than 1MB stack, so adjust it to TH_STACK_SIZE.
+// The implementation calls pthread_create() with the stack size parameter
+// equal to the Linux 8MB default, on platforms that support it.

 #if defined(__APPLE__) || defined(__MINGW32__) || defined(__MINGW64__) || defined(USE_PTHREADS)

-#include <pthread.h>
+    #include <pthread.h>
+    #include <functional>

-static const size_t TH_STACK_SIZE = 8 * 1024 * 1024;
-
-template <class T, class P = std::pair<T*, void(T::*)()>>
-void* start_routine(void* ptr)
-{
-   P* p = reinterpret_cast<P*>(ptr);
-   (p->first->*(p->second))(); // Call member function pointer
-   delete p;
-   return NULL;
-}
+namespace Stockfish {

 class NativeThread {
+    pthread_t thread;

-   pthread_t thread;
+    static constexpr size_t TH_STACK_SIZE = 8 * 1024 * 1024;

-public:
-  template<class T, class P = std::pair<T*, void(T::*)()>>
-  explicit NativeThread(void(T::*fun)(), T* obj) {
-    pthread_attr_t attr_storage, *attr = &attr_storage;
-    pthread_attr_init(attr);
-    pthread_attr_setstacksize(attr, TH_STACK_SIZE);
-    pthread_create(&thread, attr, start_routine<T>, new P(obj, fun));
-  }
-  void join() { pthread_join(thread, NULL); }
+   public:
+    template<class Function, class... Args>
+    explicit NativeThread(Function&& fun, Args&&... args) {
+        auto func = new std::function<void()>(
+          std::bind(std::forward<Function>(fun), std::forward<Args>(args)...));
+
+        pthread_attr_t attr_storage, *attr = &attr_storage;
+        pthread_attr_init(attr);
+        pthread_attr_setstacksize(attr, TH_STACK_SIZE);
+
+        auto start_routine = [](void* ptr) -> void* {
+            auto f = reinterpret_cast<std::function<void()>*>(ptr);
+            // Call the function
+            (*f)();
+            delete f;
+            return nullptr;
+        };
+
+        pthread_create(&thread, attr, start_routine, func);
+    }
+
+    void join() { pthread_join(thread, nullptr); }
 };

-#else // Default case: use STL classes
+}  // namespace Stockfish

-typedef std::thread NativeThread;
+#else  // Default case: use STL classes
+
+namespace Stockfish {
+
+using NativeThread = std::thread;
+
+}  // namespace Stockfish

 #endif

-#endif // #ifndef THREAD_WIN32_OSX_H_INCLUDED
+#endif  // #ifndef THREAD_WIN32_OSX_H_INCLUDED
@@ -1,6 +1,6 @@
 /*
  Stockfish, a UCI chess playing engine derived from Glaurung 2.1
-  Copyright (C) 2004-2021 The Stockfish developers (see AUTHORS file)
+  Copyright (C) 2004-2025 The Stockfish developers (see AUTHORS file)

  Stockfish is free software: you can redistribute it and/or modify
  it under the terms of the GNU General Public License as published by
@@ -16,82 +16,128 @@
  along with this program.  If not, see <http://www.gnu.org/licenses/>.
 */

+#include "timeman.h"
+
 #include <algorithm>
-#include <cfloat>
+#include <cassert>
 #include <cmath>
+#include <cstdint>

 #include "search.h"
-#include "timeman.h"
-#include "uci.h"
+#include "ucioption.h"

-TimeManagement Time; // Our global time management object
+namespace Stockfish {

+TimePoint TimeManagement::optimum() const { return optimumTime; }
+TimePoint TimeManagement::maximum() const { return maximumTime; }

-/// TimeManagement::init() is called at the beginning of the search and calculates
-/// the bounds of time allowed for the current game ply. We currently support:
+void TimeManagement::clear() {
+    availableNodes = -1;  // When in 'nodes as time' mode
+}
+
+void TimeManagement::advance_nodes_time(std::int64_t nodes) {
+    assert(useNodesTime);
+    availableNodes = std::max(int64_t(0), availableNodes - nodes);
+}
+
+// Called at the beginning of the search and calculates
+// the bounds of time allowed for the current game ply. We currently support:
 //      1) x basetime (+ z increment)
 //      2) x moves in y seconds (+ z increment)
+void TimeManagement::init(Search::LimitsType& limits,
+                          Color               us,
+                          int                 ply,
+                          const OptionsMap&   options,
+                          double&             originalTimeAdjust) {
+    TimePoint npmsec = TimePoint(options["nodestime"]);

-void TimeManagement::init(Search::LimitsType& limits, Color us, int ply) {
+    // If we have no time, we don't need to fully initialize TM.
+    // startTime is used by movetime and useNodesTime is used in elapsed calls.
+    startTime    = limits.startTime;
+    useNodesTime = npmsec != 0;

-  TimePoint moveOverhead    = TimePoint(Options["Move Overhead"]);
-  TimePoint slowMover       = TimePoint(Options["Slow Mover"]);
-  TimePoint npmsec          = TimePoint(Options["nodestime"]);
+    if (limits.time[us] == 0)
+        return;

-  // optScale is a percentage of available time to use for the current move.
-  // maxScale is a multiplier applied to optimumTime.
-  double optScale, maxScale;
+    TimePoint moveOverhead = TimePoint(options["Move Overhead"]);

-  // If we have to play in 'nodes as time' mode, then convert from time
-  // to nodes, and use resulting values in time management formulas.
-  // WARNING: to avoid time losses, the given npmsec (nodes per millisecond)
-  // must be much lower than the real engine speed.
-  if (npmsec)
-  {
-      if (!availableNodes) // Only once at game start
-          availableNodes = npmsec * limits.time[us]; // Time is in msec
+    // optScale is a percentage of available time to use for the current move.
+    // maxScale is a multiplier applied to optimumTime.
+    double optScale, maxScale;

-      // Convert from milliseconds to nodes
-      limits.time[us] = TimePoint(availableNodes);
-      limits.inc[us] *= npmsec;
-      limits.npmsec = npmsec;
-  }
+    // If we have to play in 'nodes as time' mode, then convert from time
+    // to nodes, and use resulting values in time management formulas.
+    // WARNING: to avoid time losses, the given npmsec (nodes per millisecond)
+    // must be much lower than the real engine speed.
+    if (useNodesTime)
+    {
+        if (availableNodes == -1)                       // Only once at game start
+            availableNodes = npmsec * limits.time[us];  // Time is in msec

-  startTime = limits.startTime;
+        // Convert from milliseconds to nodes
+        limits.time[us] = TimePoint(availableNodes);
+        limits.inc[us] *= npmsec;
+        limits.npmsec = npmsec;
+        moveOverhead *= npmsec;
+    }

-  // Maximum move horizon of 50 moves
-  int mtg = limits.movestogo ? std::min(limits.movestogo, 50) : 50;
+    // These numbers are used where multiplications, divisions or comparisons
+    // with constants are involved.
+    const int64_t   scaleFactor = useNodesTime ? npmsec : 1;
+    const TimePoint scaledTime  = limits.time[us] / scaleFactor;
+    const TimePoint scaledInc   = limits.inc[us] / scaleFactor;

-  // Make sure timeLeft is > 0 since we may use it as a divisor
-  TimePoint timeLeft =  std::max(TimePoint(1),
-      limits.time[us] + limits.inc[us] * (mtg - 1) - moveOverhead * (2 + mtg));
+    // Maximum move horizon
+    int centiMTG = limits.movestogo ? std::min(limits.movestogo * 100, 5000) : 5051;

-  // A user may scale time usage by setting UCI option "Slow Mover"
-  // Default is 100 and changing this value will probably lose elo.
-  timeLeft = slowMover * timeLeft / 100;
+    // If less than one second, gradually reduce mtg
+    if (scaledTime < 1000 && double(centiMTG) / scaledInc > 5.051)
+    {
+        centiMTG = scaledTime * 5.051;
+    }

-  // x basetime (+ z increment)
-  // If there is a healthy increment, timeLeft can exceed actual available
-  // game time for the current move, so also cap to 20% of available game time.
-  if (limits.movestogo == 0)
-  {
-      optScale = std::min(0.0084 + std::pow(ply + 3.0, 0.5) * 0.0042,
-                           0.2 * limits.time[us] / double(timeLeft));
-      maxScale = std::min(7.0, 4.0 + ply / 12.0);
-  }
+    // Make sure timeLeft is > 0 since we may use it as a divisor
+    TimePoint timeLeft =
+      std::max(TimePoint(1),
+               limits.time[us]
+                 + (limits.inc[us] * (centiMTG - 100) - moveOverhead * (200 + centiMTG)) / 100);

-  // x moves in y seconds (+ z increment)
-  else
-  {
-      optScale = std::min((0.8 + ply / 128.0) / mtg,
-                            0.8 * limits.time[us] / double(timeLeft));
-      maxScale = std::min(6.3, 1.5 + 0.11 * mtg);
-  }
+    // x basetime (+ z increment)
+    // If there is a healthy increment, timeLeft can exceed the actual available
+    // game time for the current move, so also cap to a percentage of available game time.
+    if (limits.movestogo == 0)
+    {
+        // Extra time according to timeLeft
+        if (originalTimeAdjust < 0)
+            originalTimeAdjust = 0.3128 * std::log10(timeLeft) - 0.4354;

-  // Never use more than 80% of the available time for this move
-  optimumTime = TimePoint(optScale * timeLeft);
-  maximumTime = TimePoint(std::min(0.8 * limits.time[us] - moveOverhead, maxScale * optimumTime));
+        // Calculate time constants based on current time left.
+        double logTimeInSec = std::log10(scaledTime / 1000.0);
+        double optConstant  = std::min(0.0032116 + 0.000321123 * logTimeInSec, 0.00508017);
+        double maxConstant  = std::max(3.3977 + 3.03950 * logTimeInSec, 2.94761);

-  if (Options["Ponder"])
-      optimumTime += optimumTime / 4;
+        optScale = std::min(0.0121431 + std::pow(ply + 2.94693, 0.461073) * optConstant,
+                            0.213035 * limits.time[us] / timeLeft)
+                 * originalTimeAdjust;
+
+        maxScale = std::min(6.67704, maxConstant + ply / 11.9847);
+    }
+
+    // x moves in y seconds (+ z increment)
+    else
+    {
+        optScale =
+          std::min((0.88 + ply / 116.4) / (centiMTG / 100.0), 0.88 * limits.time[us] / timeLeft);
+        maxScale = 1.3 + 0.11 * (centiMTG / 100.0);
+    }
+
+    // Limit the maximum possible time for this move
+    optimumTime = TimePoint(optScale * timeLeft);
+    maximumTime =
+      TimePoint(std::min(0.825179 * limits.time[us] - moveOverhead, maxScale * optimumTime)) - 10;
+
+    if (options["Ponder"])
+        optimumTime += optimumTime / 4;
 }
+
+}  // namespace Stockfish
@@ -1,6 +1,6 @@
 /*
  Stockfish, a UCI chess playing engine derived from Glaurung 2.1
-  Copyright (C) 2004-2021 The Stockfish developers (see AUTHORS file)
+  Copyright (C) 2004-2025 The Stockfish developers (see AUTHORS file)

  Stockfish is free software: you can redistribute it and/or modify
  it under the terms of the GNU General Public License as published by
@@ -19,29 +19,49 @@
 #ifndef TIMEMAN_H_INCLUDED
 #define TIMEMAN_H_INCLUDED

+#include <cstdint>
+
 #include "misc.h"
-#include "search.h"
-#include "thread.h"
+#include "types.h"

-/// The TimeManagement class computes the optimal time to think depending on
-/// the maximum available time, the game move number and other parameters.
+namespace Stockfish {

+class OptionsMap;
+
+namespace Search {
+struct LimitsType;
+}
+
+// The TimeManagement class computes the optimal time to think depending on
+// the maximum available time, the game move number, and other parameters.
 class TimeManagement {
-public:
-  void init(Search::LimitsType& limits, Color us, int ply);
-  TimePoint optimum() const { return optimumTime; }
-  TimePoint maximum() const { return maximumTime; }
-  TimePoint elapsed() const { return Search::Limits.npmsec ?
-                                     TimePoint(Threads.nodes_searched()) : now() - startTime; }
+   public:
+    void init(Search::LimitsType& limits,
+              Color               us,
+              int                 ply,
+              const OptionsMap&   options,
+              double&             originalTimeAdjust);

-  int64_t availableNodes; // When in 'nodes as time' mode
+    TimePoint optimum() const;
+    TimePoint maximum() const;
+    template<typename FUNC>
+    TimePoint elapsed(FUNC nodes) const {
+        return useNodesTime ? TimePoint(nodes()) : elapsed_time();
+    }
+    TimePoint elapsed_time() const { return now() - startTime; };

-private:
-  TimePoint startTime;
-  TimePoint optimumTime;
-  TimePoint maximumTime;
+    void clear();
+    void advance_nodes_time(std::int64_t nodes);
+
+   private:
+    TimePoint startTime;
+    TimePoint optimumTime;
+    TimePoint maximumTime;
+
+    std::int64_t availableNodes = -1;     // When in 'nodes as time' mode
+    bool         useNodesTime   = false;  // True if we are in 'nodes as time' mode
 };

-extern TimeManagement Time;
+}  // namespace Stockfish

-#endif // #ifndef TIMEMAN_H_INCLUDED
+#endif  // #ifndef TIMEMAN_H_INCLUDED
@@ -1,6 +1,6 @@
 /*
  Stockfish, a UCI chess playing engine derived from Glaurung 2.1
-  Copyright (C) 2004-2021 The Stockfish developers (see AUTHORS file)
+  Copyright (C) 2004-2025 The Stockfish developers (see AUTHORS file)

  Stockfish is free software: you can redistribute it and/or modify
  it under the terms of the GNU General Public License as published by
@@ -16,143 +16,236 @@
  along with this program.  If not, see <http://www.gnu.org/licenses/>.
 */

-#include <cstring>   // For std::memset
-#include <iostream>
-#include <thread>
-
-#include "bitboard.h"
-#include "misc.h"
-#include "thread.h"
 #include "tt.h"
-#include "uci.h"

-TranspositionTable TT; // Our global transposition table
+#include <cassert>
+#include <cstdint>
+#include <cstdlib>
+#include <cstring>
+#include <iostream>

-/// TTEntry::save() populates the TTEntry with a new node's data, possibly
-/// overwriting an old position. Update is not atomic and can be racy.
+#include "memory.h"
+#include "misc.h"
+#include "syzygy/tbprobe.h"
+#include "thread.h"

-void TTEntry::save(Key k, Value v, bool pv, Bound b, Depth d, Move m, Value ev) {
+namespace Stockfish {

-  // Preserve any existing move for the same position
-  if (m || (uint16_t)k != key16)
-      move16 = (uint16_t)m;

-  // Overwrite less valuable entries (cheapest checks first)
-  if (b == BOUND_EXACT
-      || (uint16_t)k != key16
-      || d - DEPTH_OFFSET > depth8 - 4)
-  {
-      assert(d > DEPTH_OFFSET);
-      assert(d < 256 + DEPTH_OFFSET);
+// TTEntry struct is the 10 bytes transposition table entry, defined as below:
+//
+// key        16 bit
+// depth       8 bit
+// generation  5 bit
+// pv node     1 bit
+// bound type  2 bit
+// move       16 bit
+// value      16 bit
+// evaluation 16 bit
+//
+// These fields are in the same order as accessed by TT::probe(), since memory is fastest sequentially.
+// Equally, the store order in save() matches this order.

-      key16     = (uint16_t)k;
-      depth8    = (uint8_t)(d - DEPTH_OFFSET);
-      genBound8 = (uint8_t)(TT.generation8 | uint8_t(pv) << 2 | b);
-      value16   = (int16_t)v;
-      eval16    = (int16_t)ev;
-  }
+struct TTEntry {
+
+    // Convert internal bitfields to external types
+    TTData read() const {
+        return TTData{Move(move16),           Value(value16),
+                      Value(eval16),          Depth(depth8 + DEPTH_ENTRY_OFFSET),
+                      Bound(genBound8 & 0x3), bool(genBound8 & 0x4)};
+    }
+
+    bool is_occupied() const;
+    void save(Key k, Value v, bool pv, Bound b, Depth d, Move m, Value ev, uint8_t generation8);
+    // The returned age is a multiple of TranspositionTable::GENERATION_DELTA
+    uint8_t relative_age(const uint8_t generation8) const;
+
+   private:
+    friend class TranspositionTable;
+
+    uint16_t key16;
+    uint8_t  depth8;
+    uint8_t  genBound8;
+    Move     move16;
+    int16_t  value16;
+    int16_t  eval16;
+};
+
+// `genBound8` is where most of the details are. We use the following constants to manipulate 5 leading generation bits
+// and 3 trailing miscellaneous bits.
+
+// These bits are reserved for other things.
+static constexpr unsigned GENERATION_BITS = 3;
+// increment for generation field
+static constexpr int GENERATION_DELTA = (1 << GENERATION_BITS);
+// cycle length
+static constexpr int GENERATION_CYCLE = 255 + GENERATION_DELTA;
+// mask to pull out generation number
+static constexpr int GENERATION_MASK = (0xFF << GENERATION_BITS) & 0xFF;
+
+// DEPTH_ENTRY_OFFSET exists because 1) we use `bool(depth8)` as the occupancy check, but
+// 2) we need to store negative depths for QS. (`depth8` is the only field with "spare bits":
+// we sacrifice the ability to store depths greater than 1<<8 less the offset, as asserted in `save`.)
+bool TTEntry::is_occupied() const { return bool(depth8); }
+
+// Populates the TTEntry with a new node's data, possibly
+// overwriting an old position. The update is not atomic and can be racy.
+void TTEntry::save(
+  Key k, Value v, bool pv, Bound b, Depth d, Move m, Value ev, uint8_t generation8) {
+
+    // Preserve the old ttmove if we don't have a new one
+    if (m || uint16_t(k) != key16)
+        move16 = m;
+
+    // Overwrite less valuable entries (cheapest checks first)
+    if (b == BOUND_EXACT || uint16_t(k) != key16 || d - DEPTH_ENTRY_OFFSET + 2 * pv > depth8 - 4
+        || relative_age(generation8))
+    {
+        assert(d > DEPTH_ENTRY_OFFSET);
+        assert(d < 256 + DEPTH_ENTRY_OFFSET);
+
+        key16     = uint16_t(k);
+        depth8    = uint8_t(d - DEPTH_ENTRY_OFFSET);
+        genBound8 = uint8_t(generation8 | uint8_t(pv) << 2 | b);
+        value16   = int16_t(v);
+        eval16    = int16_t(ev);
+    }
 }


-/// TranspositionTable::resize() sets the size of the transposition table,
-/// measured in megabytes. Transposition table consists of a power of 2 number
-/// of clusters and each cluster consists of ClusterSize number of TTEntry.
-
-void TranspositionTable::resize(size_t mbSize) {
-
-  Threads.main()->wait_for_search_finished();
-
-  aligned_large_pages_free(table);
-
-  clusterCount = mbSize * 1024 * 1024 / sizeof(Cluster);
-
-  table = static_cast<Cluster*>(aligned_large_pages_alloc(clusterCount * sizeof(Cluster)));
-  if (!table)
-  {
-      std::cerr << "Failed to allocate " << mbSize
-                << "MB for transposition table." << std::endl;
-      exit(EXIT_FAILURE);
-  }
-
-  clear();
+uint8_t TTEntry::relative_age(const uint8_t generation8) const {
+    // Due to our packed storage format for generation and its cyclic
+    // nature we add GENERATION_CYCLE (256 is the modulus, plus what
+    // is needed to keep the unrelated lowest n bits from affecting
+    // the result) to calculate the entry age correctly even after
+    // generation8 overflows into the next cycle.
+    return (GENERATION_CYCLE + generation8 - genBound8) & GENERATION_MASK;
 }


-/// TranspositionTable::clear() initializes the entire transposition table to zero,
-//  in a multi-threaded way.
+// TTWriter is but a very thin wrapper around the pointer
+TTWriter::TTWriter(TTEntry* tte) :
+    entry(tte) {}

-void TranspositionTable::clear() {
-
-  std::vector<std::thread> threads;
-
-  for (size_t idx = 0; idx < Options["Threads"]; ++idx)
-  {
-      threads.emplace_back([this, idx]() {
-
-          // Thread binding gives faster search on systems with a first-touch policy
-          if (Options["Threads"] > 8)
-              WinProcGroup::bindThisThread(idx);
-
-          // Each thread will zero its part of the hash table
-          const size_t stride = size_t(clusterCount / Options["Threads"]),
-                       start  = size_t(stride * idx),
-                       len    = idx != Options["Threads"] - 1 ?
-                                stride : clusterCount - start;
-
-          std::memset(&table[start], 0, len * sizeof(Cluster));
-      });
-  }
-
-  for (std::thread& th : threads)
-      th.join();
+void TTWriter::write(
+  Key k, Value v, bool pv, Bound b, Depth d, Move m, Value ev, uint8_t generation8) {
+    entry->save(k, v, pv, b, d, m, ev, generation8);
 }


-/// TranspositionTable::probe() looks up the current position in the transposition
-/// table. It returns true and a pointer to the TTEntry if the position is found.
-/// Otherwise, it returns false and a pointer to an empty or least valuable TTEntry
-/// to be replaced later. The replace value of an entry is calculated as its depth
-/// minus 8 times its relative age. TTEntry t1 is considered more valuable than
-/// TTEntry t2 if its replace value is greater than that of t2.
+// A TranspositionTable is an array of Cluster, of size clusterCount. Each cluster consists of ClusterSize number
+// of TTEntry. Each non-empty TTEntry contains information on exactly one position. The size of a Cluster should
+// divide the size of a cache line for best performance, as the cacheline is prefetched when possible.

-TTEntry* TranspositionTable::probe(const Key key, bool& found) const {
+static constexpr int ClusterSize = 3;

-  TTEntry* const tte = first_entry(key);
-  const uint16_t key16 = (uint16_t)key;  // Use the low 16 bits as key inside the cluster
+struct Cluster {
+    TTEntry entry[ClusterSize];
+    char    padding[2];  // Pad to 32 bytes
+};

-  for (int i = 0; i < ClusterSize; ++i)
-      if (tte[i].key16 == key16 || !tte[i].depth8)
-      {
-          tte[i].genBound8 = uint8_t(generation8 | (tte[i].genBound8 & (GENERATION_DELTA - 1))); // Refresh
+static_assert(sizeof(Cluster) == 32, "Suboptimal Cluster size");

-          return found = (bool)tte[i].depth8, &tte[i];
-      }

-  // Find an entry to be replaced according to the replacement strategy
-  TTEntry* replace = tte;
-  for (int i = 1; i < ClusterSize; ++i)
-      // Due to our packed storage format for generation and its cyclic
-      // nature we add GENERATION_CYCLE (256 is the modulus, plus what
-      // is needed to keep the unrelated lowest n bits from affecting
-      // the result) to calculate the entry age correctly even after
-      // generation8 overflows into the next cycle.
-      if (  replace->depth8 - ((GENERATION_CYCLE + generation8 - replace->genBound8) & GENERATION_MASK)
-          >   tte[i].depth8 - ((GENERATION_CYCLE + generation8 -   tte[i].genBound8) & GENERATION_MASK))
-          replace = &tte[i];
+// Sets the size of the transposition table,
+// measured in megabytes. Transposition table consists
+// of clusters and each cluster consists of ClusterSize number of TTEntry.
+void TranspositionTable::resize(size_t mbSize, ThreadPool& threads) {
+    aligned_large_pages_free(table);

-  return found = false, replace;
+    clusterCount = mbSize * 1024 * 1024 / sizeof(Cluster);
+
+    table = static_cast<Cluster*>(aligned_large_pages_alloc(clusterCount * sizeof(Cluster)));
+
+    if (!table)
+    {
+        std::cerr << "Failed to allocate " << mbSize << "MB for transposition table." << std::endl;
+        exit(EXIT_FAILURE);
+    }
+
+    clear(threads);
 }


-/// TranspositionTable::hashfull() returns an approximation of the hashtable
-/// occupation during a search. The hash is x permill full, as per UCI protocol.
+// Initializes the entire transposition table to zero,
+// in a multi-threaded way.
+void TranspositionTable::clear(ThreadPool& threads) {
+    generation8              = 0;
+    const size_t threadCount = threads.num_threads();

-int TranspositionTable::hashfull() const {
+    for (size_t i = 0; i < threadCount; ++i)
+    {
+        threads.run_on_thread(i, [this, i, threadCount]() {
+            // Each thread will zero its part of the hash table
+            const size_t stride = clusterCount / threadCount;
+            const size_t start  = stride * i;
+            const size_t len    = i + 1 != threadCount ? stride : clusterCount - start;

-  int cnt = 0;
-  for (int i = 0; i < 1000; ++i)
-      for (int j = 0; j < ClusterSize; ++j)
-          cnt += table[i].entry[j].depth8 && (table[i].entry[j].genBound8 & GENERATION_MASK) == generation8;
+            std::memset(&table[start], 0, len * sizeof(Cluster));
+        });
+    }

-  return cnt / ClusterSize;
+    for (size_t i = 0; i < threadCount; ++i)
+        threads.wait_on_thread(i);
 }
+
+
+// Returns an approximation of the hashtable
+// occupation during a search. The hash is x permill full, as per UCI protocol.
+// Only counts entries which match the current generation.
+int TranspositionTable::hashfull(int maxAge) const {
+    int maxAgeInternal = maxAge << GENERATION_BITS;
+    int cnt            = 0;
+    for (int i = 0; i < 1000; ++i)
+        for (int j = 0; j < ClusterSize; ++j)
+            cnt += table[i].entry[j].is_occupied()
+                && table[i].entry[j].relative_age(generation8) <= maxAgeInternal;
+
+    return cnt / ClusterSize;
+}
+
+
+void TranspositionTable::new_search() {
+    // increment by delta to keep lower bits as is
+    generation8 += GENERATION_DELTA;
+}
+
+
+uint8_t TranspositionTable::generation() const { return generation8; }
+
+
+// Looks up the current position in the transposition
+// table. It returns true if the position is found.
+// Otherwise, it returns false and a pointer to an empty or least valuable TTEntry
+// to be replaced later. The replace value of an entry is calculated as its depth
+// minus 8 times its relative age. TTEntry t1 is considered more valuable than
+// TTEntry t2 if its replace value is greater than that of t2.
+std::tuple<bool, TTData, TTWriter> TranspositionTable::probe(const Key key) const {
+
+    TTEntry* const tte   = first_entry(key);
+    const uint16_t key16 = uint16_t(key);  // Use the low 16 bits as key inside the cluster
+
+    for (int i = 0; i < ClusterSize; ++i)
+        if (tte[i].key16 == key16)
+            // This gap is the main place for read races.
+            // After `read()` completes that copy is final, but may be self-inconsistent.
+            return {tte[i].is_occupied(), tte[i].read(), TTWriter(&tte[i])};
+
+    // Find an entry to be replaced according to the replacement strategy
+    TTEntry* replace = tte;
+    for (int i = 1; i < ClusterSize; ++i)
+        if (replace->depth8 - replace->relative_age(generation8) * 2
+            > tte[i].depth8 - tte[i].relative_age(generation8) * 2)
+            replace = &tte[i];
+
+    return {false,
+            TTData{Move::none(), VALUE_NONE, VALUE_NONE, DEPTH_ENTRY_OFFSET, BOUND_NONE, false},
+            TTWriter(replace)};
+}
+
+
+TTEntry* TranspositionTable::first_entry(const Key key) const {
+    return &table[mul_hi64(key, clusterCount)].entry[0];
+}
+
+}  // namespace Stockfish
@@ -1,6 +1,6 @@
 /*
  Stockfish, a UCI chess playing engine derived from Glaurung 2.1
-  Copyright (C) 2004-2021 The Stockfish developers (see AUTHORS file)
+  Copyright (C) 2004-2025 The Stockfish developers (see AUTHORS file)

  Stockfish is free software: you can redistribute it and/or modify
  it under the terms of the GNU General Public License as published by
@@ -19,85 +19,92 @@
 #ifndef TT_H_INCLUDED
 #define TT_H_INCLUDED

-#include "misc.h"
+#include <cstddef>
+#include <cstdint>
+#include <tuple>
+
+#include "memory.h"
 #include "types.h"

-/// TTEntry struct is the 10 bytes transposition table entry, defined as below:
-///
-/// key        16 bit
-/// depth       8 bit
-/// generation  5 bit
-/// pv node     1 bit
-/// bound type  2 bit
-/// move       16 bit
-/// value      16 bit
-/// eval value 16 bit
+namespace Stockfish {

-struct TTEntry {
+class ThreadPool;
+struct TTEntry;
+struct Cluster;

-  Move  move()  const { return (Move )move16; }
-  Value value() const { return (Value)value16; }
-  Value eval()  const { return (Value)eval16; }
-  Depth depth() const { return (Depth)depth8 + DEPTH_OFFSET; }
-  bool is_pv()  const { return (bool)(genBound8 & 0x4); }
-  Bound bound() const { return (Bound)(genBound8 & 0x3); }
-  void save(Key k, Value v, bool pv, Bound b, Depth d, Move m, Value ev);
+// There is only one global hash table for the engine and all its threads. For chess in particular, we even allow racy
+// updates between threads to and from the TT, as taking the time to synchronize access would cost thinking time and
+// thus elo. As a hash table, collisions are possible and may cause chess playing issues (bizarre blunders, faulty mate
+// reports, etc). Fixing these also loses elo; however such risk decreases quickly with larger TT size.
+//
+// `probe` is the primary method: given a board position, we lookup its entry in the table, and return a tuple of:
+//   1) whether the entry already has this position
+//   2) a copy of the prior data (if any) (may be inconsistent due to read races)
+//   3) a writer object to this entry
+// The copied data and the writer are separated to maintain clear boundaries between local vs global objects.

-private:
-  friend class TranspositionTable;

-  uint16_t key16;
-  uint8_t  depth8;
-  uint8_t  genBound8;
-  uint16_t move16;
-  int16_t  value16;
-  int16_t  eval16;
+// A copy of the data already in the entry (possibly collided). `probe` may be racy, resulting in inconsistent data.
+struct TTData {
+    Move  move;
+    Value value, eval;
+    Depth depth;
+    Bound bound;
+    bool  is_pv;
+
+    TTData() = delete;
+
+    // clang-format off
+    TTData(Move m, Value v, Value ev, Depth d, Bound b, bool pv) :
+        move(m),
+        value(v),
+        eval(ev),
+        depth(d),
+        bound(b),
+        is_pv(pv) {};
+    // clang-format on
 };


-/// A TranspositionTable is an array of Cluster, of size clusterCount. Each
-/// cluster consists of ClusterSize number of TTEntry. Each non-empty TTEntry
-/// contains information on exactly one position. The size of a Cluster should
-/// divide the size of a cache line for best performance, as the cacheline is
-/// prefetched when possible.
+// This is used to make racy writes to the global TT.
+struct TTWriter {
+   public:
+    void write(Key k, Value v, bool pv, Bound b, Depth d, Move m, Value ev, uint8_t generation8);
+
+   private:
+    friend class TranspositionTable;
+    TTEntry* entry;
+    TTWriter(TTEntry* tte);
+};
+

 class TranspositionTable {

-  static constexpr int ClusterSize = 3;
+   public:
+    ~TranspositionTable() { aligned_large_pages_free(table); }

-  struct Cluster {
-    TTEntry entry[ClusterSize];
-    char padding[2]; // Pad to 32 bytes
-  };
+    void resize(size_t mbSize, ThreadPool& threads);  // Set TT size
+    void clear(ThreadPool& threads);                  // Re-initialize memory, multithreaded
+    int  hashfull(int maxAge = 0)
+      const;  // Approximate what fraction of entries (permille) have been written to during this root search

-  static_assert(sizeof(Cluster) == 32, "Unexpected Cluster size");
+    void
+    new_search();  // This must be called at the beginning of each root search to track entry aging
+    uint8_t generation() const;  // The current age, used when writing new data to the TT
+    std::tuple<bool, TTData, TTWriter>
+    probe(const Key key) const;  // The main method, whose retvals separate local vs global objects
+    TTEntry* first_entry(const Key key)
+      const;  // This is the hash function; its only external use is memory prefetching.

-  // Constants used to refresh the hash table periodically
-  static constexpr unsigned GENERATION_BITS  = 3;                                // nb of bits reserved for other things
-  static constexpr int      GENERATION_DELTA = (1 << GENERATION_BITS);           // increment for generation field
-  static constexpr int      GENERATION_CYCLE = 255 + (1 << GENERATION_BITS);     // cycle length
-  static constexpr int      GENERATION_MASK  = (0xFF << GENERATION_BITS) & 0xFF; // mask to pull out generation number
+   private:
+    friend struct TTEntry;

-public:
- ~TranspositionTable() { aligned_large_pages_free(table); }
-  void new_search() { generation8 += GENERATION_DELTA; } // Lower bits are used for other things
-  TTEntry* probe(const Key key, bool& found) const;
-  int hashfull() const;
-  void resize(size_t mbSize);
-  void clear();
+    size_t   clusterCount;
+    Cluster* table = nullptr;

-  TTEntry* first_entry(const Key key) const {
-    return &table[mul_hi64(key, clusterCount)].entry[0];
-  }
-
-private:
-  friend struct TTEntry;
-
-  size_t clusterCount;
-  Cluster* table;
-  uint8_t generation8; // Size must be not bigger than TTEntry::genBound8
+    uint8_t generation8 = 0;  // Size must be not bigger than TTEntry::genBound8
 };

-extern TranspositionTable TT;
+}  // namespace Stockfish

-#endif // #ifndef TT_H_INCLUDED
+#endif  // #ifndef TT_H_INCLUDED
@@ -1,6 +1,6 @@
 /*
  Stockfish, a UCI chess playing engine derived from Glaurung 2.1
-  Copyright (C) 2004-2021 The Stockfish developers (see AUTHORS file)
+  Copyright (C) 2004-2025 The Stockfish developers (see AUTHORS file)

  Stockfish is free software: you can redistribute it and/or modify
  it under the terms of the GNU General Public License as published by
@@ -16,116 +16,98 @@
  along with this program.  If not, see <http://www.gnu.org/licenses/>.
 */

+#include "tune.h"
+
 #include <algorithm>
 #include <iostream>
+#include <map>
+#include <optional>
 #include <sstream>
+#include <string>

-#include "types.h"
-#include "misc.h"
-#include "uci.h"
+#include "ucioption.h"

 using std::string;

-bool Tune::update_on_last;
-const UCI::Option* LastOption = nullptr;
-BoolConditions Conditions;
-static std::map<std::string, int> TuneResults;
+namespace Stockfish {
+
+bool          Tune::update_on_last;
+const Option* LastOption = nullptr;
+OptionsMap*   Tune::options;
+namespace {
+std::map<std::string, int> TuneResults;
+
+std::optional<std::string> on_tune(const Option& o) {
+
+    if (!Tune::update_on_last || LastOption == &o)
+        Tune::read_options();
+
+    return std::nullopt;
+}
+}
+
+void Tune::make_option(OptionsMap* opts, const string& n, int v, const SetRange& r) {
+
+    // Do not generate option when there is nothing to tune (ie. min = max)
+    if (r(v).first == r(v).second)
+        return;
+
+    if (TuneResults.count(n))
+        v = TuneResults[n];
+
+    opts->add(n, Option(v, r(v).first, r(v).second, on_tune));
+    LastOption = &((*opts)[n]);
+
+    // Print formatted parameters, ready to be copy-pasted in Fishtest
+    std::cout << n << ","                                  //
+              << v << ","                                  //
+              << r(v).first << ","                         //
+              << r(v).second << ","                        //
+              << (r(v).second - r(v).first) / 20.0 << ","  //
+              << "0.0020" << std::endl;
+}

 string Tune::next(string& names, bool pop) {

-  string name;
+    string name;

-  do {
-      string token = names.substr(0, names.find(','));
+    do
+    {
+        string token = names.substr(0, names.find(','));

-      if (pop)
-          names.erase(0, token.size() + 1);
+        if (pop)
+            names.erase(0, token.size() + 1);

-      std::stringstream ws(token);
-      name += (ws >> token, token); // Remove trailing whitespace
+        std::stringstream ws(token);
+        name += (ws >> token, token);  // Remove trailing whitespace

-  } while (  std::count(name.begin(), name.end(), '(')
-           - std::count(name.begin(), name.end(), ')'));
+    } while (std::count(name.begin(), name.end(), '(') - std::count(name.begin(), name.end(), ')'));

-  return name;
+    return name;
 }

-static void on_tune(const UCI::Option& o) {

-  if (!Tune::update_on_last || LastOption == &o)
-      Tune::read_options();
+template<>
+void Tune::Entry<int>::init_option() {
+    make_option(options, name, value, range);
 }

-static void make_option(const string& n, int v, const SetRange& r) {
-
-  // Do not generate option when there is nothing to tune (ie. min = max)
-  if (r(v).first == r(v).second)
-      return;
-
-  if (TuneResults.count(n))
-      v = TuneResults[n];
-
-  Options[n] << UCI::Option(v, r(v).first, r(v).second, on_tune);
-  LastOption = &Options[n];
-
-  // Print formatted parameters, ready to be copy-pasted in Fishtest
-  std::cout << n << ","
-            << v << ","
-            << r(v).first << "," << r(v).second << ","
-            << (r(v).second - r(v).first) / 20.0 << ","
-            << "0.0020"
-            << std::endl;
-}
-
-template<> void Tune::Entry<int>::init_option() { make_option(name, value, range); }
-
-template<> void Tune::Entry<int>::read_option() {
-  if (Options.count(name))
-      value = int(Options[name]);
-}
-
-template<> void Tune::Entry<Value>::init_option() { make_option(name, value, range); }
-
-template<> void Tune::Entry<Value>::read_option() {
-  if (Options.count(name))
-      value = Value(int(Options[name]));
-}
-
-template<> void Tune::Entry<Score>::init_option() {
-  make_option("m" + name, mg_value(value), range);
-  make_option("e" + name, eg_value(value), range);
-}
-
-template<> void Tune::Entry<Score>::read_option() {
-  if (Options.count("m" + name))
-      value = make_score(int(Options["m" + name]), eg_value(value));
-
-  if (Options.count("e" + name))
-      value = make_score(mg_value(value), int(Options["e" + name]));
+template<>
+void Tune::Entry<int>::read_option() {
+    if (options->count(name))
+        value = int((*options)[name]);
 }

 // Instead of a variable here we have a PostUpdate function: just call it
-template<> void Tune::Entry<Tune::PostUpdate>::init_option() {}
-template<> void Tune::Entry<Tune::PostUpdate>::read_option() { value(); }
-
-
-// Set binary conditions according to a probability that depends
-// on the corresponding parameter value.
-
-void BoolConditions::set() {
-
-  static PRNG rng(now());
-  static bool startup = true; // To workaround fishtest bench
-
-  for (size_t i = 0; i < binary.size(); i++)
-      binary[i] = !startup && (values[i] + int(rng.rand<unsigned>() % variance) > threshold);
-
-  startup = false;
-
-  for (size_t i = 0; i < binary.size(); i++)
-      sync_cout << binary[i] << sync_endl;
+template<>
+void Tune::Entry<Tune::PostUpdate>::init_option() {}
+template<>
+void Tune::Entry<Tune::PostUpdate>::read_option() {
+    value();
 }

+}  // namespace Stockfish
+

 // Init options with tuning session results instead of default values. Useful to
 // get correct bench signature after a tuning session or to test tuned values.
@@ -136,9 +118,9 @@ void BoolConditions::set() {
 //
 // Then paste the output below, as the function body

-#include <cmath>

-void Tune::read_results() {
+namespace Stockfish {

-  /* ...insert your values here... */
-}
+void Tune::read_results() { /* ...insert your values here... */ }
+
+}  // namespace Stockfish
@@ -1,6 +1,6 @@
 /*
  Stockfish, a UCI chess playing engine derived from Glaurung 2.1
-  Copyright (C) 2004-2021 The Stockfish developers (see AUTHORS file)
+  Copyright (C) 2004-2025 The Stockfish developers (see AUTHORS file)

  Stockfish is free software: you can redistribute it and/or modify
  it under the terms of the GNU General Public License as published by
@@ -19,175 +19,165 @@
 #ifndef TUNE_H_INCLUDED
 #define TUNE_H_INCLUDED

+#include <cstddef>
 #include <memory>
 #include <string>
-#include <type_traits>
+#include <type_traits>  // IWYU pragma: keep
+#include <utility>
 #include <vector>

-typedef std::pair<int, int> Range; // Option's min-max values
-typedef Range (RangeFun) (int);
+namespace Stockfish {
+
+class OptionsMap;
+
+using Range    = std::pair<int, int>;  // Option's min-max values
+using RangeFun = Range(int);

 // Default Range function, to calculate Option's min-max values
-inline Range default_range(int v) {
-  return v > 0 ? Range(0, 2 * v) : Range(2 * v, 0);
-}
+inline Range default_range(int v) { return v > 0 ? Range(0, 2 * v) : Range(2 * v, 0); }

 struct SetRange {
-  explicit SetRange(RangeFun f) : fun(f) {}
-  SetRange(int min, int max) : fun(nullptr), range(min, max) {}
-  Range operator()(int v) const { return fun ? fun(v) : range; }
+    explicit SetRange(RangeFun f) :
+        fun(f) {}
+    SetRange(int min, int max) :
+        fun(nullptr),
+        range(min, max) {}
+    Range operator()(int v) const { return fun ? fun(v) : range; }

-  RangeFun* fun;
-  Range range;
+    RangeFun* fun;
+    Range     range;
 };

 #define SetDefaultRange SetRange(default_range)


-/// BoolConditions struct is used to tune boolean conditions in the
-/// code by toggling them on/off according to a probability that
-/// depends on the value of a tuned integer parameter: for high
-/// values of the parameter condition is always disabled, for low
-/// values is always enabled, otherwise it is enabled with a given
-/// probability that depnends on the parameter under tuning.
-
-struct BoolConditions {
-  void init(size_t size) { values.resize(size, defaultValue), binary.resize(size, 0); }
-  void set();
-
-  std::vector<int> binary, values;
-  int defaultValue = 465, variance = 40, threshold = 500;
-  SetRange range = SetRange(0, 1000);
-};
-
-extern BoolConditions Conditions;
-
-inline void set_conditions() { Conditions.set(); }
-
-
-/// Tune class implements the 'magic' code that makes the setup of a fishtest
-/// tuning session as easy as it can be. Mainly you have just to remove const
-/// qualifiers from the variables you want to tune and flag them for tuning, so
-/// if you have:
-///
-///   const Score myScore = S(10, 15);
-///   const Value myValue[][2] = { { V(100), V(20) }, { V(7), V(78) } };
-///
-/// If you have a my_post_update() function to run after values have been updated,
-/// and a my_range() function to set custom Option's min-max values, then you just
-/// remove the 'const' qualifiers and write somewhere below in the file:
-///
-///   TUNE(SetRange(my_range), myScore, myValue, my_post_update);
-///
-/// You can also set the range directly, and restore the default at the end
-///
-///   TUNE(SetRange(-100, 100), myScore, SetDefaultRange);
-///
-/// In case update function is slow and you have many parameters, you can add:
-///
-///   UPDATE_ON_LAST();
-///
-/// And the values update, including post update function call, will be done only
-/// once, after the engine receives the last UCI option, that is the one defined
-/// and created as the last one, so the GUI should send the options in the same
-/// order in which have been defined.
+// Tune class implements the 'magic' code that makes the setup of a fishtest tuning
+// session as easy as it can be. Mainly you have just to remove const qualifiers
+// from the variables you want to tune and flag them for tuning, so if you have:
+//
+//   const Value myValue[][2] = { { V(100), V(20) }, { V(7), V(78) } };
+//
+// If you have a my_post_update() function to run after values have been updated,
+// and a my_range() function to set custom Option's min-max values, then you just
+// remove the 'const' qualifiers and write somewhere below in the file:
+//
+//   TUNE(SetRange(my_range), myValue, my_post_update);
+//
+// You can also set the range directly, and restore the default at the end
+//
+//   TUNE(SetRange(-100, 100), myValue, SetDefaultRange);
+//
+// In case update function is slow and you have many parameters, you can add:
+//
+//   UPDATE_ON_LAST();
+//
+// And the values update, including post update function call, will be done only
+// once, after the engine receives the last UCI option, that is the one defined
+// and created as the last one, so the GUI should send the options in the same
+// order in which have been defined.

 class Tune {

-  typedef void (PostUpdate) (); // Post-update function
+    using PostUpdate = void();  // Post-update function

-  Tune() { read_results(); }
-  Tune(const Tune&) = delete;
-  void operator=(const Tune&) = delete;
-  void read_results();
+    Tune() { read_results(); }
+    Tune(const Tune&)           = delete;
+    void operator=(const Tune&) = delete;
+    void read_results();

-  static Tune& instance() { static Tune t; return t; } // Singleton
+    static Tune& instance() {
+        static Tune t;
+        return t;
+    }  // Singleton

-  // Use polymorphism to accomodate Entry of different types in the same vector
-  struct EntryBase {
-    virtual ~EntryBase() = default;
-    virtual void init_option() = 0;
-    virtual void read_option() = 0;
-  };
+    // Use polymorphism to accommodate Entry of different types in the same vector
+    struct EntryBase {
+        virtual ~EntryBase()       = default;
+        virtual void init_option() = 0;
+        virtual void read_option() = 0;
+    };

-  template<typename T>
-  struct Entry : public EntryBase {
+    template<typename T>
+    struct Entry: public EntryBase {

-    static_assert(!std::is_const<T>::value, "Parameter cannot be const!");
+        static_assert(!std::is_const_v<T>, "Parameter cannot be const!");

-    static_assert(   std::is_same<T,   int>::value
-                  || std::is_same<T, Value>::value
-                  || std::is_same<T, Score>::value
-                  || std::is_same<T, PostUpdate>::value, "Parameter type not supported!");
+        static_assert(std::is_same_v<T, int> || std::is_same_v<T, PostUpdate>,
+                      "Parameter type not supported!");

-    Entry(const std::string& n, T& v, const SetRange& r) : name(n), value(v), range(r) {}
-    void operator=(const Entry&) = delete; // Because 'value' is a reference
-    void init_option() override;
-    void read_option() override;
+        Entry(const std::string& n, T& v, const SetRange& r) :
+            name(n),
+            value(v),
+            range(r) {}
+        void operator=(const Entry&) = delete;  // Because 'value' is a reference
+        void init_option() override;
+        void read_option() override;

-    std::string name;
-    T& value;
-    SetRange range;
-  };
+        std::string name;
+        T&          value;
+        SetRange    range;
+    };

-  // Our facility to fill the container, each Entry corresponds to a parameter
-  // to tune. We use variadic templates to deal with an unspecified number of
-  // entries, each one of a possible different type.
-  static std::string next(std::string& names, bool pop = true);
+    // Our facility to fill the container, each Entry corresponds to a parameter
+    // to tune. We use variadic templates to deal with an unspecified number of
+    // entries, each one of a possible different type.
+    static std::string next(std::string& names, bool pop = true);

-  int add(const SetRange&, std::string&&) { return 0; }
+    int add(const SetRange&, std::string&&) { return 0; }

-  template<typename T, typename... Args>
-  int add(const SetRange& range, std::string&& names, T& value, Args&&... args) {
-    list.push_back(std::unique_ptr<EntryBase>(new Entry<T>(next(names), value, range)));
-    return add(range, std::move(names), args...);
-  }
+    template<typename T, typename... Args>
+    int add(const SetRange& range, std::string&& names, T& value, Args&&... args) {
+        list.push_back(std::unique_ptr<EntryBase>(new Entry<T>(next(names), value, range)));
+        return add(range, std::move(names), args...);
+    }

-  // Template specialization for arrays: recursively handle multi-dimensional arrays
-  template<typename T, size_t N, typename... Args>
-  int add(const SetRange& range, std::string&& names, T (&value)[N], Args&&... args) {
-    for (size_t i = 0; i < N; i++)
-        add(range, next(names, i == N - 1) + "[" + std::to_string(i) + "]", value[i]);
-    return add(range, std::move(names), args...);
-  }
+    // Template specialization for arrays: recursively handle multi-dimensional arrays
+    template<typename T, size_t N, typename... Args>
+    int add(const SetRange& range, std::string&& names, T (&value)[N], Args&&... args) {
+        for (size_t i = 0; i < N; i++)
+            add(range, next(names, i == N - 1) + "[" + std::to_string(i) + "]", value[i]);
+        return add(range, std::move(names), args...);
+    }

-  // Template specialization for SetRange
-  template<typename... Args>
-  int add(const SetRange&, std::string&& names, SetRange& value, Args&&... args) {
-    return add(value, (next(names), std::move(names)), args...);
-  }
+    // Template specialization for SetRange
+    template<typename... Args>
+    int add(const SetRange&, std::string&& names, SetRange& value, Args&&... args) {
+        return add(value, (next(names), std::move(names)), args...);
+    }

-  // Template specialization for BoolConditions
-  template<typename... Args>
-  int add(const SetRange& range, std::string&& names, BoolConditions& cond, Args&&... args) {
-    for (size_t size = cond.values.size(), i = 0; i < size; i++)
-        add(cond.range, next(names, i == size - 1) + "_" + std::to_string(i), cond.values[i]);
-    return add(range, std::move(names), args...);
-  }
+    static void make_option(OptionsMap* options, const std::string& n, int v, const SetRange& r);

-  std::vector<std::unique_ptr<EntryBase>> list;
+    std::vector<std::unique_ptr<EntryBase>> list;

-public:
-  template<typename... Args>
-  static int add(const std::string& names, Args&&... args) {
-    return instance().add(SetDefaultRange, names.substr(1, names.size() - 2), args...); // Remove trailing parenthesis
-  }
-  static void init() { for (auto& e : instance().list) e->init_option(); read_options(); } // Deferred, due to UCI::Options access
-  static void read_options() { for (auto& e : instance().list) e->read_option(); }
-  static bool update_on_last;
+   public:
+    template<typename... Args>
+    static int add(const std::string& names, Args&&... args) {
+        return instance().add(SetDefaultRange, names.substr(1, names.size() - 2),
+                              args...);  // Remove trailing parenthesis
+    }
+    static void init(OptionsMap& o) {
+        options = &o;
+        for (auto& e : instance().list)
+            e->init_option();
+        read_options();
+    }  // Deferred, due to UCIEngine::Options access
+    static void read_options() {
+        for (auto& e : instance().list)
+            e->read_option();
+    }
+
+    static bool        update_on_last;
+    static OptionsMap* options;
 };

-// Some macro magic :-) we define a dummy int variable that compiler initializes calling Tune::add()
+// Some macro magic :-) we define a dummy int variable that the compiler initializes calling Tune::add()
 #define STRINGIFY(x) #x
-#define UNIQUE2(x, y) x ## y
-#define UNIQUE(x, y) UNIQUE2(x, y) // Two indirection levels to expand __LINE__
+#define UNIQUE2(x, y) x##y
+#define UNIQUE(x, y) UNIQUE2(x, y)  // Two indirection levels to expand __LINE__
 #define TUNE(...) int UNIQUE(p, __LINE__) = Tune::add(STRINGIFY((__VA_ARGS__)), __VA_ARGS__)

 #define UPDATE_ON_LAST() bool UNIQUE(p, __LINE__) = Tune::update_on_last = true

-// Some macro to tune toggling of boolean conditions
-#define CONDITION(x) (Conditions.binary[__COUNTER__] || (x))
-#define TUNE_CONDITIONS() int UNIQUE(c, __LINE__) = (Conditions.init(__COUNTER__), 0); \
-                          TUNE(Conditions, set_conditions)
+}  // namespace Stockfish

-#endif // #ifndef TUNE_H_INCLUDED
+#endif  // #ifndef TUNE_H_INCLUDED
--- a/Show More
+++ b/Show More