From de24fcebc873ce2d65b30e039745dbc2e851f443 Mon Sep 17 00:00:00 2001 From: mstembera Date: Fri, 26 Jun 2020 17:26:46 -0700 Subject: [PATCH 01/86] Fix fragile code to use proper random 64 bit keys. This fixes an old issue where we want to make a position unique but only change a small number of bits in the key instead of all 64 of them randomly. This is fragile and can lead to non uniqueness issues in the TT. Key make_key(uint64_t seed) takes any integer and produces a unique random 64 bit key. It is computationally efficient and is based on a congruential pseudo random number generator using well tested constants by Donald Knuth (see https://en.wikipedia.org/wiki/Linear_congruential_generator) STC https://tests.stockfishchess.org/tests/view/5ef6c78f761b685b4c724bb6 LLR: 2.95 (-2.94,2.94) {-1.50,0.50} Total: 154320 W: 29343 L: 29376 D: 95601 Ptnml(0-2): 2543, 18170, 35891, 17889, 2667 LTC https://tests.stockfishchess.org/tests/view/5ef7d1a9020eec13834a940e LLR: 2.95 (-2.94,2.94) {-1.50,0.50} Total: 53488 W: 6629 L: 6584 D: 40275 Ptnml(0-2): 372, 4878, 16183, 4955, 356 closes https://github.com/official-stockfish/Stockfish/pull/2773 bench: 4626776 --- src/search.cpp | 2 +- src/types.h | 5 +++++ 2 files changed, 6 insertions(+), 1 deletion(-) diff --git a/src/search.cpp b/src/search.cpp index 1e2980cb..0fa39988 100644 --- a/src/search.cpp +++ b/src/search.cpp @@ -662,7 +662,7 @@ namespace { // search to overwrite a previous full search TT value, so we use a different // position key in case of an excluded move. excludedMove = ss->excludedMove; - posKey = pos.key() ^ (Key(excludedMove) << 48); // Isn't a very good hash + posKey = excludedMove == MOVE_NONE ? pos.key() : pos.key() ^ make_key(excludedMove); tte = TT.probe(posKey, ttHit); ttValue = ttHit ? value_from_tt(tte->value(), ss->ply, pos.rule50_count()) : VALUE_NONE; ttMove = rootNode ? thisThread->rootMoves[thisThread->pvIdx].pv[0] diff --git a/src/types.h b/src/types.h index 0c512f5b..c1598561 100644 --- a/src/types.h +++ b/src/types.h @@ -455,6 +455,11 @@ constexpr bool is_ok(Move m) { return from_sq(m) != to_sq(m); // Catch MOVE_NULL and MOVE_NONE } +/// Based on a congruential pseudo random number generator +constexpr Key make_key(uint64_t seed) { + return seed * 6364136223846793005ULL + 1442695040888963407ULL; +} + #endif // #ifndef TYPES_H_INCLUDED #include "tune.h" // Global visibility to tuning setup From 547c4a216a9931e4d5ff95414f146cb6eb877611 Mon Sep 17 00:00:00 2001 From: mstembera Date: Thu, 25 Jun 2020 22:08:17 -0700 Subject: [PATCH 02/86] Remove old zobrist trick for castling rights Removes an 8 year old micro optimization aimed at 32-bit architectures because back then doing an xor of a Key could not be done in one instruction. See original commit here 821e1c7 STC https://tests.stockfishchess.org/tests/view/5ef5833dde213bf647527d0c LLR: 2.94 (-2.94,2.94) {-1.50,0.50} Total: 162648 W: 31053 L: 31097 D: 100498 Ptnml(0-2): 2841, 18966, 37715, 19000, 2802 LTC https://tests.stockfishchess.org/tests/view/5ef7b1bbf993893290cc1489 LLR: 2.93 (-2.94,2.94) {-1.50,0.50} Total: 62360 W: 7617 L: 7586 D: 47157 Ptnml(0-2): 423, 5662, 18994, 5663, 438 closes https://github.com/official-stockfish/Stockfish/pull/2775 bench: 4591425 --- src/position.cpp | 16 ++++------------ 1 file changed, 4 insertions(+), 12 deletions(-) diff --git a/src/position.cpp b/src/position.cpp index 471ef01f..6ef7aedc 100644 --- a/src/position.cpp +++ b/src/position.cpp @@ -119,15 +119,7 @@ void Position::init() { Zobrist::enpassant[f] = rng.rand(); for (int cr = NO_CASTLING; cr <= ANY_CASTLING; ++cr) - { - Zobrist::castling[cr] = 0; - Bitboard b = cr; - while (b) - { - Key k = Zobrist::castling[1ULL << pop_lsb(&b)]; - Zobrist::castling[cr] ^= k ? k : rng.rand(); - } - } + Zobrist::castling[cr] = rng.rand(); Zobrist::side = rng.rand(); Zobrist::noPawns = rng.rand(); @@ -780,9 +772,9 @@ void Position::do_move(Move m, StateInfo& newSt, bool givesCheck) { // Update castling rights if needed if (st->castlingRights && (castlingRightsMask[from] | castlingRightsMask[to])) { - int cr = castlingRightsMask[from] | castlingRightsMask[to]; - k ^= Zobrist::castling[st->castlingRights & cr]; - st->castlingRights &= ~cr; + k ^= Zobrist::castling[st->castlingRights]; + st->castlingRights &= ~(castlingRightsMask[from] | castlingRightsMask[to]); + k ^= Zobrist::castling[st->castlingRights]; } // Move the piece. The tricky Chess960 castling is handled earlier From 2810a1ea85b3fbe62095fcb24442c08306f00af3 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ste=CC=81phane=20Nicolet?= Date: Sun, 28 Jun 2020 06:00:28 +0200 Subject: [PATCH 03/86] Increase value of pawns on fifth rank This patch increases the endgame value of pawns on the fifth rank. The increase is very small (+1 evaluation point, about 0.005 pawn) for the pawns on external columns (a-b-c-f-g-h) and a bit bigger (+7 evaluation points, about 0.033 pawn) for the pawns on d5/e5. STC: LLR: 2.93 (-2.94,2.94) {-0.50,1.50} Total: 79864 W: 15331 L: 15027 D: 49506 Ptnml(0-2): 1336, 9284, 18433, 9498, 1381 https://tests.stockfishchess.org/tests/view/5ef73e2ef993893290cc0c47 LTC: LLR: 2.94 (-2.94,2.94) {0.25,1.75} Total: 47240 W: 5927 L: 5630 D: 35683 Ptnml(0-2): 320, 4133, 14440, 4384, 343 https://tests.stockfishchess.org/tests/view/5ef7c0c4f993893290cc14b7 closes https://github.com/official-stockfish/Stockfish/pull/2776 Bench: 4794633 --- src/psqt.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/psqt.cpp b/src/psqt.cpp index c5da9785..5e8dd2c7 100644 --- a/src/psqt.cpp +++ b/src/psqt.cpp @@ -92,7 +92,7 @@ constexpr Score PBonus[RANK_NB][FILE_NB] = { S( 3,-10), S( 3, -6), S( 10, 10), S( 19, 0), S( 16, 14), S( 19, 7), S( 7, -5), S( -5,-19) }, { S( -9,-10), S(-15,-10), S( 11,-10), S( 15, 4), S( 32, 4), S( 22, 3), S( 5, -6), S(-22, -4) }, { S( -4, 6), S(-23, -2), S( 6, -8), S( 20, -4), S( 40,-13), S( 17,-12), S( 4,-10), S( -8, -9) }, - { S( 13, 9), S( 0, 4), S(-13, 3), S( 1,-12), S( 11,-12), S( -2, -6), S(-13, 13), S( 5, 8) }, + { S( 13, 10), S( 0, 5), S(-13, 4), S( 1, -5), S( 11, -5), S( -2, -5), S(-13, 14), S( 5, 9) }, { S( 5, 28), S(-12, 20), S( -7, 21), S( 22, 28), S( -8, 30), S( -5, 7), S(-15, 6), S( -8, 13) }, { S( -7, 0), S( 7,-11), S( -3, 12), S(-13, 21), S( 5, 25), S(-16, 19), S( 10, 4), S( -8, 7) } }; From 16836f39b295ec635c9883498400f7006ac2869f Mon Sep 17 00:00:00 2001 From: Stefan Geschwentner Date: Sun, 28 Jun 2020 16:28:55 +0200 Subject: [PATCH 04/86] Scale down eval for drawish rook endgames. STC: LLR: 2.96 (-2.94,2.94) {-0.50,1.50} Total: 82136 W: 15694 L: 15407 D: 51035 Ptnml(0-2): 1076, 8960, 20767, 9131, 1134 https://tests.stockfishchess.org/tests/view/5ef86cf8020eec13834a94dd LTC: LLR: 2.93 (-2.94,2.94) {0.25,1.75} Total: 70200 W: 8787 L: 8440 D: 52973 Ptnml(0-2): 325, 5983, 22170, 6264, 358 https://tests.stockfishchess.org/tests/view/5ef88225020eec13834a950a closes https://github.com/official-stockfish/Stockfish/pull/2780 Bench: 4478869 --- src/evaluate.cpp | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/src/evaluate.cpp b/src/evaluate.cpp index 60ec9c72..65f7bddc 100644 --- a/src/evaluate.cpp +++ b/src/evaluate.cpp @@ -782,6 +782,13 @@ namespace { else sf = 22 + 3 * pos.count(strongSide); } + else if( pos.non_pawn_material(WHITE) == RookValueMg + && pos.non_pawn_material(BLACK) == RookValueMg + && !pe->passed_pawns(strongSide) + && pos.count(strongSide) - pos.count(~strongSide) <= 1 + && bool(KingSide & pos.pieces(strongSide, PAWN)) != bool(QueenSide & pos.pieces(strongSide, PAWN)) + && (attacks_bb(pos.square(~strongSide)) & pos.pieces(~strongSide, PAWN))) + sf = 36; else sf = std::min(sf, 36 + 7 * pos.count(strongSide)); } From c7194bd924a606ab75d582d30cb41749312ea94e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ste=CC=81phane=20Nicolet?= Date: Sun, 28 Jun 2020 22:24:57 +0200 Subject: [PATCH 05/86] Scale down eval for queen imbalance We lower the endgame value of the evaluation when we detect that there is only one queen left on the board (more precisely, we use a scale factor of 37/64, or about 0.58, for the endgame part of the evaluation). Hopefully this helps a little bit for the assessment of positions with queen imbalance, which are one of the well-known Stockfish weaknesses. STC: LLR: 2.94 (-2.94,2.94) {-0.50,1.50} Total: 21600 W: 4176 L: 3955 D: 13469 Ptnml(0-2): 351, 2457, 5003, 2598, 391 https://tests.stockfishchess.org/tests/view/5ef871b6020eec13834a94e8 LTC: LLR: 2.97 (-2.94,2.94) {0.25,1.75} Total: 248328 W: 30596 L: 29720 D: 188012 Ptnml(0-2): 1544, 22345, 75665, 22911, 1699 https://tests.stockfishchess.org/tests/view/5ef87aec020eec13834a94fe Closes https://github.com/official-stockfish/Stockfish/pull/2781 Bench: 4441323 --- src/evaluate.cpp | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/src/evaluate.cpp b/src/evaluate.cpp index 65f7bddc..d19cf34e 100644 --- a/src/evaluate.cpp +++ b/src/evaluate.cpp @@ -767,7 +767,6 @@ namespace { eg += v; // Compute the scale factor for the winning side - Color strongSide = eg > VALUE_DRAW ? WHITE : BLACK; int sf = me->scale_factor(pos, strongSide); @@ -782,13 +781,15 @@ namespace { else sf = 22 + 3 * pos.count(strongSide); } - else if( pos.non_pawn_material(WHITE) == RookValueMg + else if ( pos.non_pawn_material(WHITE) == RookValueMg && pos.non_pawn_material(BLACK) == RookValueMg && !pe->passed_pawns(strongSide) && pos.count(strongSide) - pos.count(~strongSide) <= 1 && bool(KingSide & pos.pieces(strongSide, PAWN)) != bool(QueenSide & pos.pieces(strongSide, PAWN)) && (attacks_bb(pos.square(~strongSide)) & pos.pieces(~strongSide, PAWN))) sf = 36; + else if (pos.count() == 1) + sf = 37; else sf = std::min(sf, 36 + 7 * pos.count(strongSide)); } From 69d3be42a112645a9e599df615f730d61a5dca8c Mon Sep 17 00:00:00 2001 From: Stefan Geschwentner Date: Mon, 29 Jun 2020 19:35:24 +0200 Subject: [PATCH 06/86] Tweak single queen endgame scaling. Increase scaling factor for each minor of the opponent side of the queen. STC: LLR: 2.94 (-2.94,2.94) {-0.50,1.50} Total: 14528 W: 2860 L: 2653 D: 9015 Ptnml(0-2): 217, 1632, 3408, 1741, 266 https://tests.stockfishchess.org/tests/view/5ef98384020eec13834a96a0 LTC: LLR: 2.95 (-2.94,2.94) {0.25,1.75} Total: 34584 W: 4371 L: 4111 D: 26102 Ptnml(0-2): 205, 3080, 10501, 3262, 244 https://tests.stockfishchess.org/tests/view/5ef99972020eec13834a96c9 closes https://github.com/official-stockfish/Stockfish/pull/2782 Bench: 4523573 --- src/evaluate.cpp | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/evaluate.cpp b/src/evaluate.cpp index d19cf34e..615df1ba 100644 --- a/src/evaluate.cpp +++ b/src/evaluate.cpp @@ -789,7 +789,8 @@ namespace { && (attacks_bb(pos.square(~strongSide)) & pos.pieces(~strongSide, PAWN))) sf = 36; else if (pos.count() == 1) - sf = 37; + sf = 37 + 3 * (pos.count(WHITE) == 1 ? pos.count(BLACK) + pos.count(BLACK) + : pos.count(WHITE) + pos.count(WHITE)); else sf = std::min(sf, 36 + 7 * pos.count(strongSide)); } From 110068808b51344ac59f8c6a0846f5dfdf670392 Mon Sep 17 00:00:00 2001 From: Joost VandeVondele Date: Sat, 27 Jun 2020 21:29:29 +0200 Subject: [PATCH 07/86] Provide WDL statistics A number of engines, GUIs and tournaments start to report WDL estimates along or instead of scores. This patch enables reporting of those stats in a more or less standard way (http://www.talkchess.com/forum3/viewtopic.php?t=72140) The model this reporting uses is based on data derived from a few million fishtest LTC games, given a score and a game ply, a win rate is provided that matches rather closely, especially in the intermediate range [0.05, 0.95] that data. Some data is shown at https://github.com/glinscott/fishtest/wiki/UsefulData#win-loss-draw-statistics-of-ltc-games-on-fishtest Making the conversion game ply dependent is important for a good fit, and is in line with experience that a +1 score in the early midgame is more likely a win than in the late endgame. Even when enabled, the printing of the info causes no significant overhead. Passed STC: LLR: 2.94 (-2.94,2.94) {-1.50,0.50} Total: 197112 W: 37226 L: 37347 D: 122539 Ptnml(0-2): 2591, 21025, 51464, 20866, 2610 https://tests.stockfishchess.org/tests/view/5ef79ef4f993893290cc146b closes https://github.com/official-stockfish/Stockfish/pull/2778 No functional change --- Readme.md | 5 +++++ src/search.cpp | 3 +++ src/uci.cpp | 39 +++++++++++++++++++++++++++++++++++++++ src/uci.h | 1 + src/ucioption.cpp | 1 + 5 files changed, 49 insertions(+) diff --git a/Readme.md b/Readme.md index 2b1de86b..e60ac718 100644 --- a/Readme.md +++ b/Readme.md @@ -66,6 +66,11 @@ Currently, Stockfish has the following UCI options: If enabled by UCI_LimitStrength, aim for an engine strength of the given Elo. This Elo rating has been calibrated at a time control of 60s+0.6s and anchored to CCRL 40/4. + * #### UCI_ShowWDL + If enabled, show approximate WDL statistics as part of the engine output. + These WDL numbers model expected game outcomes for a given evaluation and + game ply for engine self-play at fishtest LTC conditions (60+0.6s per game). + * #### Move Overhead Assume a time delay of x ms due to network and GUI overheads. This is useful to avoid losses on time in those cases. diff --git a/src/search.cpp b/src/search.cpp index 0fa39988..f14bdf77 100644 --- a/src/search.cpp +++ b/src/search.cpp @@ -1835,6 +1835,9 @@ string UCI::pv(const Position& pos, Depth depth, Value alpha, Value beta) { << " multipv " << i + 1 << " score " << UCI::value(v); + if (Options["UCI_ShowWDL"]) + ss << UCI::wdl(v, pos.game_ply()); + if (!tb && i == pvIdx) ss << (v >= beta ? " lowerbound" : v <= alpha ? " upperbound" : ""); diff --git a/src/uci.cpp b/src/uci.cpp index 11d5adc6..bb57c80b 100644 --- a/src/uci.cpp +++ b/src/uci.cpp @@ -19,6 +19,7 @@ */ #include +#include #include #include #include @@ -182,6 +183,28 @@ namespace { << "\nNodes/second : " << 1000 * nodes / elapsed << endl; } + // The win rate model returns the probability (per mille) of winning given an eval + // and a game-ply. The model fits rather accurately the LTC fishtest statistics. + int win_rate_model(Value v, int ply) { + + // The model captures only up to 240 plies, so limit input (and rescale) + double m = std::min(240, ply) / 64.0; + + // Coefficients of a 3rd order polynomial fit based on fishtest data + // for two parameters needed to transform eval to the argument of a + // logistic function. + double as[] = {-8.24404295, 64.23892342, -95.73056462, 153.86478679}; + double bs[] = {-3.37154371, 28.44489198, -56.67657741, 72.05858751}; + double a = (((as[0] * m + as[1]) * m + as[2]) * m) + as[3]; + double b = (((bs[0] * m + bs[1]) * m + bs[2]) * m) + bs[3]; + + // Transform eval to centipawns with limited range + double x = Utility::clamp(double(100 * v) / PawnValueEg, -1000.0, 1000.0); + + // Return win rate in per mille (rounded to nearest) + return int(0.5 + 1000 / (1 + std::exp((a - x) / b))); + } + } // namespace @@ -269,6 +292,22 @@ string UCI::value(Value v) { } +/// UCI::wdl() report WDL statistics given an evaluation and a game ply, based on +/// data gathered for fishtest LTC games. + +string UCI::wdl(Value v, int ply) { + + stringstream ss; + + int wdl_w = win_rate_model( v, ply); + int wdl_l = win_rate_model(-v, ply); + int wdl_d = 1000 - wdl_w - wdl_l; + ss << " wdl " << wdl_w << " " << wdl_d << " " << wdl_l; + + return ss.str(); +} + + /// UCI::square() converts a Square to a string in algebraic notation (g1, a7, etc.) std::string UCI::square(Square s) { diff --git a/src/uci.h b/src/uci.h index b845889b..ad954d9f 100644 --- a/src/uci.h +++ b/src/uci.h @@ -73,6 +73,7 @@ std::string value(Value v); std::string square(Square s); std::string move(Move m, bool chess960); std::string pv(const Position& pos, Depth depth, Value alpha, Value beta); +std::string wdl(Value v, int ply); Move to_move(const Position& pos, std::string& str); } // namespace UCI diff --git a/src/ucioption.cpp b/src/ucioption.cpp index c268c975..4befa6ac 100644 --- a/src/ucioption.cpp +++ b/src/ucioption.cpp @@ -74,6 +74,7 @@ void init(OptionsMap& o) { o["UCI_AnalyseMode"] << Option(false); o["UCI_LimitStrength"] << Option(false); o["UCI_Elo"] << Option(1350, 1350, 2850); + o["UCI_ShowWDL"] << Option(true); o["SyzygyPath"] << Option("", on_tb_path); o["SyzygyProbeDepth"] << Option(1, 1, 100); o["Syzygy50MoveRule"] << Option(true); From 268c00b648ba4a48be79a849dde5733e6705ddbf Mon Sep 17 00:00:00 2001 From: Alain SAVARD Date: Wed, 1 Jul 2020 02:12:59 -0400 Subject: [PATCH 08/86] Use arrays for safe checks, outposts and king protectors in evaluate.cpp Tested for non regression on the safe checks https://tests.stockfishchess.org/tests/view/5ef8b75c020eec13834a9596 LLR: 2.95 (-2.94,2.94) {-1.50,0.50} Total: 22256 W: 4283 L: 4143 D: 13830 Ptnml(0-2): 291, 2439, 5588, 2459, 351 Tested for non regression on the safe checks, outposts and king protectors https://tests.stockfishchess.org/tests/view/5ef8e543020eec13834a95e7 LLR: 2.95 (-2.94,2.94) {-1.50,0.50} Total: 28400 W: 5382 L: 5253 D: 17765 Ptnml(0-2): 394, 3078, 7119, 3223, 386 closes https://github.com/official-stockfish/Stockfish/pull/2785 No functional change --- src/evaluate.cpp | 76 ++++++++++++++++++++++-------------------------- src/pawns.cpp | 4 ++- 2 files changed, 38 insertions(+), 42 deletions(-) diff --git a/src/evaluate.cpp b/src/evaluate.cpp index 615df1ba..48db2b3b 100644 --- a/src/evaluate.cpp +++ b/src/evaluate.cpp @@ -80,11 +80,11 @@ namespace { // KingAttackWeights[PieceType] contains king attack weights by piece type constexpr int KingAttackWeights[PIECE_TYPE_NB] = { 0, 0, 81, 52, 44, 10 }; - // Penalties for enemy's safe checks - constexpr int QueenSafeCheck = 772; - constexpr int RookSafeCheck = 1084; - constexpr int BishopSafeCheck = 645; - constexpr int KnightSafeCheck = 792; + // SafeCheck[PieceType][single/multiple] contains safe check bonus by piece type, + // higher if multiple safe checks are possible for that piece type. + constexpr int SafeCheck[][2] = { + {}, {}, {792, 1283}, {645, 967}, {1084, 1897}, {772, 1119} + }; #define S(mg, eg) make_score(mg, eg) @@ -106,6 +106,18 @@ namespace { S(110,182), S(114,182), S(114,192), S(116,219) } }; + // KingProtector[knight/bishop] contains penalty for each distance unit to own king + constexpr Score KingProtector[] = { S(8, 9), S(6, 9) }; + + // Outpost[knight/bishop] contains bonuses for each knight or bishop occupying a + // pawn protected square on rank 4 to 6 which is also safe from a pawn attack. + constexpr Score Outpost[] = { S(56, 36), S(30, 23) }; + + // PassedRank[Rank] contains a bonus according to the rank of a passed pawn + constexpr Score PassedRank[RANK_NB] = { + S(0, 0), S(10, 28), S(17, 33), S(15, 41), S(62, 72), S(168, 177), S(276, 260) + }; + // RookOnFile[semiopen/open] contains bonuses for each rook when there is // no (friendly) pawn on the rook file. constexpr Score RookOnFile[] = { S(19, 7), S(48, 29) }; @@ -121,23 +133,14 @@ namespace { S(0, 0), S(3, 46), S(37, 68), S(42, 60), S(0, 38), S(58, 41) }; - // PassedRank[Rank] contains a bonus according to the rank of a passed pawn - constexpr Score PassedRank[RANK_NB] = { - S(0, 0), S(10, 28), S(17, 33), S(15, 41), S(62, 72), S(168, 177), S(276, 260) - }; - // Assorted bonuses and penalties - constexpr Score BishopKingProtector = S( 6, 9); constexpr Score BishopOnKingRing = S( 24, 0); - constexpr Score BishopOutpost = S( 30, 23); constexpr Score BishopPawns = S( 3, 7); constexpr Score BishopXRayPawns = S( 4, 5); constexpr Score CorneredBishop = S( 50, 50); constexpr Score FlankAttacks = S( 8, 0); constexpr Score Hanging = S( 69, 36); - constexpr Score KnightKingProtector = S( 8, 9); constexpr Score KnightOnQueen = S( 16, 11); - constexpr Score KnightOutpost = S( 56, 36); constexpr Score LongDiagonalBishop = S( 45, 0); constexpr Score MinorBehindPawn = S( 18, 3); constexpr Score PassedFile = S( 11, 8); @@ -308,7 +311,7 @@ namespace { // Bonus if piece is on an outpost square or can reach one bb = OutpostRanks & attackedBy[Us][PAWN] & ~pe->pawn_attacks_span(Them); if (bb & s) - score += (Pt == KNIGHT) ? KnightOutpost : BishopOutpost; + score += Outpost[Pt == BISHOP]; else if (Pt == KNIGHT && bb & b & ~pos.pieces(Us)) score += ReachableOutpost; @@ -317,8 +320,7 @@ namespace { score += MinorBehindPawn; // Penalty if the piece is far from the king - score -= (Pt == KNIGHT ? KnightKingProtector - : BishopKingProtector) * distance(pos.square(Us), s); + score -= KingProtector[Pt == BISHOP] * distance(pos.square(Us), s); if (Pt == BISHOP) { @@ -420,41 +422,33 @@ namespace { b2 = attacks_bb(ksq, pos.pieces() ^ pos.pieces(Us, QUEEN)); // Enemy rooks checks - rookChecks = b1 & safe & attackedBy[Them][ROOK]; + rookChecks = b1 & attackedBy[Them][ROOK] & safe; if (rookChecks) - kingDanger += more_than_one(rookChecks) ? RookSafeCheck * 175/100 - : RookSafeCheck; + kingDanger += SafeCheck[ROOK][more_than_one(rookChecks)]; else unsafeChecks |= b1 & attackedBy[Them][ROOK]; - // Enemy queen safe checks: we count them only if they are from squares from - // which we can't give a rook check, because rook checks are more valuable. - queenChecks = (b1 | b2) - & attackedBy[Them][QUEEN] - & safe - & ~attackedBy[Us][QUEEN] - & ~rookChecks; + // Enemy queen safe checks: count them only if the checks are from squares from + // which opponent cannot give a rook check, because rook checks are more valuable. + queenChecks = (b1 | b2) & attackedBy[Them][QUEEN] & safe + & ~(attackedBy[Us][QUEEN] | rookChecks); if (queenChecks) - kingDanger += more_than_one(queenChecks) ? QueenSafeCheck * 145/100 - : QueenSafeCheck; + kingDanger += SafeCheck[QUEEN][more_than_one(queenChecks)]; - // Enemy bishops checks: we count them only if they are from squares from - // which we can't give a queen check, because queen checks are more valuable. - bishopChecks = b2 - & attackedBy[Them][BISHOP] - & safe + // Enemy bishops checks: count them only if they are from squares from which + // opponent cannot give a queen check, because queen checks are more valuable. + bishopChecks = b2 & attackedBy[Them][BISHOP] & safe & ~queenChecks; if (bishopChecks) - kingDanger += more_than_one(bishopChecks) ? BishopSafeCheck * 3/2 - : BishopSafeCheck; + kingDanger += SafeCheck[BISHOP][more_than_one(bishopChecks)]; + else unsafeChecks |= b2 & attackedBy[Them][BISHOP]; // Enemy knights checks knightChecks = attacks_bb(ksq) & attackedBy[Them][KNIGHT]; if (knightChecks & safe) - kingDanger += more_than_one(knightChecks & safe) ? KnightSafeCheck * 162/100 - : KnightSafeCheck; + kingDanger += SafeCheck[KNIGHT][more_than_one(knightChecks & safe)]; else unsafeChecks |= knightChecks; @@ -464,7 +458,7 @@ namespace { b2 = b1 & attackedBy2[Them]; b3 = attackedBy[Us][ALL_PIECES] & KingFlank[file_of(ksq)] & Camp; - int kingFlankAttack = popcount(b1) + popcount(b2); + int kingFlankAttack = popcount(b1) + popcount(b2); int kingFlankDefense = popcount(b3); kingDanger += kingAttackersCount[Them] * kingAttackersWeight[Them] @@ -741,8 +735,8 @@ namespace { bool almostUnwinnable = outflanking < 0 && !pawnsOnBothFlanks; - bool infiltration = rank_of(pos.square(WHITE)) > RANK_4 - || rank_of(pos.square(BLACK)) < RANK_5; + bool infiltration = rank_of(pos.square(WHITE)) > RANK_4 + || rank_of(pos.square(BLACK)) < RANK_5; // Compute the initiative bonus for the attacking side int complexity = 9 * pe->passed_count() diff --git a/src/pawns.cpp b/src/pawns.cpp index d741b2ef..d365ba12 100644 --- a/src/pawns.cpp +++ b/src/pawns.cpp @@ -38,7 +38,9 @@ namespace { constexpr Score WeakLever = S( 0, 56); constexpr Score WeakUnopposed = S(13, 27); - constexpr Score BlockedStorm[RANK_NB] = {S( 0, 0), S( 0, 0), S( 76, 78), S(-10, 15), S(-7, 10), S(-4, 6), S(-1, 2)}; + constexpr Score BlockedStorm[RANK_NB] = { + S(0, 0), S(0, 0), S(76, 78), S(-10, 15), S(-7, 10), S(-4, 6), S(-1, 2) + }; // Connected pawn bonus constexpr int Connected[RANK_NB] = { 0, 7, 8, 12, 29, 48, 86 }; From fb83da0892c183690ddeb1f7c3dbf6779b12707a Mon Sep 17 00:00:00 2001 From: Joost VandeVondele Date: Thu, 2 Jul 2020 18:58:37 +0200 Subject: [PATCH 09/86] Set UCI_ShowWDL by default to false UCI_ShowWDL might not be shown by GUIs that don't know the option, but crash on the WDL output, effectively making it hard for users to turn it off and run the engine. This sets it by default to false. fixes https://github.com/official-stockfish/Stockfish/issues/2787 closes https://github.com/official-stockfish/Stockfish/pull/2788 No functional change. --- src/ucioption.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/ucioption.cpp b/src/ucioption.cpp index 4befa6ac..ef54ef4e 100644 --- a/src/ucioption.cpp +++ b/src/ucioption.cpp @@ -74,7 +74,7 @@ void init(OptionsMap& o) { o["UCI_AnalyseMode"] << Option(false); o["UCI_LimitStrength"] << Option(false); o["UCI_Elo"] << Option(1350, 1350, 2850); - o["UCI_ShowWDL"] << Option(true); + o["UCI_ShowWDL"] << Option(false); o["SyzygyPath"] << Option("", on_tb_path); o["SyzygyProbeDepth"] << Option(1, 1, 100); o["Syzygy50MoveRule"] << Option(true); From 67818ee9481ba99369fa8a8d92e5c50428fb300e Mon Sep 17 00:00:00 2001 From: SFisGOD Date: Thu, 2 Jul 2020 00:11:23 +0800 Subject: [PATCH 10/86] Remove passed pawn condition. This will help scale down relatively high eval in drawish rook endgames with passed pawn like in TCEC S18 Superfinal Game 90. Passed STC LLR: 2.94 (-2.94,2.94) {-1.50,0.50} Total: 50456 W: 9644 L: 9540 D: 31272 Ptnml(0-2): 760, 5637, 12332, 5737, 762 https://tests.stockfishchess.org/tests/view/5efcb76e59f6f035328940ed Passed LTC LLR: 2.94 (-2.94,2.94) {-1.50,0.50} Total: 77264 W: 9518 L: 9518 D: 58228 Ptnml(0-2): 402, 6766, 24321, 6716, 427 https://tests.stockfishchess.org/tests/view/5efd2ad759f6f03532894143 closes https://github.com/official-stockfish/Stockfish/pull/2792 Bench: 4431626 --- src/evaluate.cpp | 1 - 1 file changed, 1 deletion(-) diff --git a/src/evaluate.cpp b/src/evaluate.cpp index 48db2b3b..bb1724a4 100644 --- a/src/evaluate.cpp +++ b/src/evaluate.cpp @@ -777,7 +777,6 @@ namespace { } else if ( pos.non_pawn_material(WHITE) == RookValueMg && pos.non_pawn_material(BLACK) == RookValueMg - && !pe->passed_pawns(strongSide) && pos.count(strongSide) - pos.count(~strongSide) <= 1 && bool(KingSide & pos.pieces(strongSide, PAWN)) != bool(QueenSide & pos.pieces(strongSide, PAWN)) && (attacks_bb(pos.square(~strongSide)) & pos.pieces(~strongSide, PAWN))) From c5b2a92cd17c65a639ec6739dd511767f65e188d Mon Sep 17 00:00:00 2001 From: protonspring Date: Tue, 30 Jun 2020 10:17:50 -0600 Subject: [PATCH 11/86] denormalize KRKP. a non-functional code style change that denormalizes the KRKP endgame, making it somewhat easier to read. closes https://github.com/official-stockfish/Stockfish/pull/2786 No functional change --- src/endgame.cpp | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/src/endgame.cpp b/src/endgame.cpp index be0755a8..40f49dce 100644 --- a/src/endgame.cpp +++ b/src/endgame.cpp @@ -181,15 +181,15 @@ Value Endgame::operator()(const Position& pos) const { assert(verify_material(pos, strongSide, RookValueMg, 0)); assert(verify_material(pos, weakSide, VALUE_ZERO, 1)); - Square strongKing = relative_square(strongSide, pos.square(strongSide)); - Square weakKing = relative_square(strongSide, pos.square(weakSide)); - Square strongRook = relative_square(strongSide, pos.square(strongSide)); - Square weakPawn = relative_square(strongSide, pos.square(weakSide)); - Square queeningSquare = make_square(file_of(weakPawn), RANK_1); + Square strongKing = pos.square(strongSide); + Square weakKing = pos.square(weakSide); + Square strongRook = pos.square(strongSide); + Square weakPawn = pos.square(weakSide); + Square queeningSquare = make_square(file_of(weakPawn), relative_rank(weakSide, RANK_8)); Value result; // If the stronger side's king is in front of the pawn, it's a win - if (forward_file_bb(WHITE, strongKing) & weakPawn) + if (forward_file_bb(strongSide, strongKing) & weakPawn) result = RookValueEg - distance(strongKing, weakPawn); // If the weaker side's king is too far from the pawn and the rook, @@ -200,15 +200,15 @@ Value Endgame::operator()(const Position& pos) const { // If the pawn is far advanced and supported by the defending king, // the position is drawish - else if ( rank_of(weakKing) <= RANK_3 + else if ( relative_rank(strongSide, weakKing) <= RANK_3 && distance(weakKing, weakPawn) == 1 - && rank_of(strongKing) >= RANK_4 + && relative_rank(strongSide, strongKing) >= RANK_4 && distance(strongKing, weakPawn) > 2 + (pos.side_to_move() == strongSide)) result = Value(80) - 8 * distance(strongKing, weakPawn); else - result = Value(200) - 8 * ( distance(strongKing, weakPawn + SOUTH) - - distance(weakKing, weakPawn + SOUTH) + result = Value(200) - 8 * ( distance(strongKing, weakPawn + pawn_push(weakSide)) + - distance(weakKing, weakPawn + pawn_push(weakSide)) - distance(weakPawn, queeningSquare)); return strongSide == pos.side_to_move() ? result : -result; From 7225d254f90c7b9d64d4adf85ec2d319c6cf75a0 Mon Sep 17 00:00:00 2001 From: Stefan Geschwentner Date: Mon, 6 Jul 2020 09:30:23 +0200 Subject: [PATCH 12/86] Add a rank based bonus for blocked pawns. Fix for overevaluated blocked pawns on the 5th and 6th rank. This is a rewrite of the original idea that uses only two parameters. Thanks to rocky640 for pointing this out. STC: LLR: 2.94 (-2.94,2.94) {-0.50,1.50} Total: 50800 W: 9707 L: 9446 D: 31647 Ptnml(0-2): 831, 5851, 11822, 6018, 878 https://tests.stockfishchess.org/tests/view/5f00b4f359f6f03532894304 LTC: LLR: 2.93 (-2.94,2.94) {0.25,1.75} Total: 52064 W: 6477 L: 6167 D: 39420 Ptnml(0-2): 331, 4628, 15834, 4878, 361 https://tests.stockfishchess.org/tests/view/5f0115fe59f6f03532894345 closes https://github.com/official-stockfish/Stockfish/pull/2794 Bench: 4882833 --- src/pawns.cpp | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/src/pawns.cpp b/src/pawns.cpp index d365ba12..f18e0315 100644 --- a/src/pawns.cpp +++ b/src/pawns.cpp @@ -38,6 +38,9 @@ namespace { constexpr Score WeakLever = S( 0, 56); constexpr Score WeakUnopposed = S(13, 27); + // Bonus for blocked pawns at 5th or 6th rank + constexpr Score BlockedPawn[2] = { S(-10, -3), S(-3, 3) }; + constexpr Score BlockedStorm[RANK_NB] = { S(0, 0), S(0, 0), S(76, 78), S(-10, 15), S(-7, 10), S(-4, 6), S(-1, 2) }; @@ -169,6 +172,9 @@ namespace { if (!support) score -= Doubled * doubled + WeakLever * more_than_one(lever); + + if (blocked && r > RANK_4) + score += BlockedPawn[r-4]; } return score; From 76a039027d14640852f60bda6d62ca16bdac3b9e Mon Sep 17 00:00:00 2001 From: Alain SAVARD Date: Mon, 6 Jul 2020 22:43:54 -0400 Subject: [PATCH 13/86] Clean-up en passant processing the goal of this PR is to better document how we process the ep square (if any) given position fen command, and to output more meaningful (and consistent) debug fen on the "d" command. The implementation follows https://en.wikipedia.org/wiki/X-FEN#Encoding_en-passant following x-fen, it is "valid" to record ep even if ep would put king en prise. fixes #2784 closes https://github.com/official-stockfish/Stockfish/pull/2797 No functional change --- src/position.cpp | 26 +++++++++++++++++--------- 1 file changed, 17 insertions(+), 9 deletions(-) diff --git a/src/position.cpp b/src/position.cpp index 6ef7aedc..396bff5f 100644 --- a/src/position.cpp +++ b/src/position.cpp @@ -178,9 +178,9 @@ Position& Position::set(const string& fenStr, bool isChess960, StateInfo* si, Th 4) En passant target square (in algebraic notation). If there's no en passant target square, this is "-". If a pawn has just made a 2-square move, this - is the position "behind" the pawn. This is recorded only if there is a pawn - in position to make an en passant capture, and if there really is a pawn - that might have advanced two squares. + is the position "behind" the pawn. Following X-FEN standard, this is recorded only + if there is a pawn in position to make an en passant capture, and if there really + is a pawn that might have advanced two squares. 5) Halfmove clock. This is the number of halfmoves since the last pawn advance or capture. This is used to determine if a draw can be claimed under the @@ -251,17 +251,25 @@ Position& Position::set(const string& fenStr, bool isChess960, StateInfo* si, Th set_castling_right(c, rsq); } - // 4. En passant square. Ignore if no pawn capture is possible + // 4. En passant square. + // Ignore if square is invalid or not on side to move relative rank 6. + bool enpassant = false; + if ( ((ss >> col) && (col >= 'a' && col <= 'h')) - && ((ss >> row) && (row == '3' || row == '6'))) + && ((ss >> row) && (row == (sideToMove == WHITE ? '6' : '3')))) { st->epSquare = make_square(File(col - 'a'), Rank(row - '1')); - if ( !(attackers_to(st->epSquare) & pieces(sideToMove, PAWN)) - || !(pieces(~sideToMove, PAWN) & (st->epSquare + pawn_push(~sideToMove)))) - st->epSquare = SQ_NONE; + // En passant square will be considered only if + // a) side to move have a pawn threatening epSquare + // b) there is an enemy pawn in front of epSquare + // c) there is no piece on epSquare or behind epSquare + enpassant = pawn_attacks_bb(~sideToMove, st->epSquare) & pieces(sideToMove, PAWN) + && (pieces(~sideToMove, PAWN) & (st->epSquare + pawn_push(~sideToMove))) + && !(pieces() & (st->epSquare | (st->epSquare + pawn_push(sideToMove)))); } - else + + if (!enpassant) st->epSquare = SQ_NONE; // 5-6. Halfmove clock and fullmove number From 804a29c738847b7ea5f8a4bff001964bd234d332 Mon Sep 17 00:00:00 2001 From: FauziAkram Date: Wed, 8 Jul 2020 01:29:03 +0300 Subject: [PATCH 14/86] Connected / blocked pawns simplification There is no need to score blocked pawns at many places. The idea originated from: Rocky Tuning and testing by: Fauzi Passed STC: https://tests.stockfishchess.org/tests/view/5f04f8fd59f6f035328945d4 LLR: 2.94 (-2.94,2.94) {-1.50,0.50} Total: 6352 W: 1299 L: 1118 D: 3935 Ptnml(0-2): 89, 695, 1469, 792, 131 Passed LTC: https://tests.stockfishchess.org/tests/view/5f0527bd59f6f035328945e3 LLR: 2.94 (-2.94,2.94) {-1.50,0.50} Total: 27648 W: 3517 L: 3433 D: 20698 Ptnml(0-2): 177, 2561, 8301, 2571, 214 closes https://github.com/official-stockfish/Stockfish/pull/2799 Bench: 4734746 --- src/pawns.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/pawns.cpp b/src/pawns.cpp index f18e0315..7f8d451a 100644 --- a/src/pawns.cpp +++ b/src/pawns.cpp @@ -39,7 +39,7 @@ namespace { constexpr Score WeakUnopposed = S(13, 27); // Bonus for blocked pawns at 5th or 6th rank - constexpr Score BlockedPawn[2] = { S(-10, -3), S(-3, 3) }; + constexpr Score BlockedPawn[2] = { S(-11, -4), S(-3, 4) }; constexpr Score BlockedStorm[RANK_NB] = { S(0, 0), S(0, 0), S(76, 78), S(-10, 15), S(-7, 10), S(-4, 6), S(-1, 2) @@ -148,7 +148,7 @@ namespace { // Score this pawn if (support | phalanx) { - int v = Connected[r] * (4 + 2 * bool(phalanx) - 2 * bool(opposed) - bool(blocked)) / 2 + int v = Connected[r] * (2 + bool(phalanx) - bool(opposed)) + 21 * popcount(support); score += make_score(v, v * (r - 2) / 4); From bf5ce1c214f8f8e3f98e5e3ac43db0dd28617e35 Mon Sep 17 00:00:00 2001 From: mstembera Date: Sun, 5 Jul 2020 15:17:04 -0700 Subject: [PATCH 15/86] Simplify make_promotions() Remove special case handling of QUIET_CHECKS in make_promotions() STC https://tests.stockfishchess.org/tests/view/5f055dbb59f6f035328945fb LLR: 2.98 (-2.94,2.94) {-1.50,0.50} Total: 42808 W: 8177 L: 8054 D: 26577 Ptnml(0-2): 665, 4890, 10201, 4953, 695 LTC https://tests.stockfishchess.org/tests/view/5f06231a59f6f03532894661 LLR: 2.96 (-2.94,2.94) {-1.50,0.50} Total: 9616 W: 1214 L: 1111 D: 7291 Ptnml(0-2): 53, 821, 2965, 908, 61 closes https://github.com/official-stockfish/Stockfish/pull/2800 Bench: 4576410 --- src/movegen.cpp | 22 ++++++++++------------ src/search.cpp | 4 ++-- 2 files changed, 12 insertions(+), 14 deletions(-) diff --git a/src/movegen.cpp b/src/movegen.cpp index 17203a95..4ff12fc6 100644 --- a/src/movegen.cpp +++ b/src/movegen.cpp @@ -29,22 +29,20 @@ namespace { ExtMove* make_promotions(ExtMove* moveList, Square to, Square ksq) { if (Type == CAPTURES || Type == EVASIONS || Type == NON_EVASIONS) + { *moveList++ = make(to - D, to, QUEEN); + if (attacks_bb(to) & ksq) + *moveList++ = make(to - D, to, KNIGHT); + } if (Type == QUIETS || Type == EVASIONS || Type == NON_EVASIONS) { *moveList++ = make(to - D, to, ROOK); *moveList++ = make(to - D, to, BISHOP); - *moveList++ = make(to - D, to, KNIGHT); + if (!(attacks_bb(to) & ksq)) + *moveList++ = make(to - D, to, KNIGHT); } - // Knight promotion is the only promotion that can give a direct check - // that's not already included in the queen promotion. - if (Type == QUIET_CHECKS && (attacks_bb(to) & ksq)) - *moveList++ = make(to - D, to, KNIGHT); - else - (void)ksq; // Silence a warning under MSVC - return moveList; } @@ -263,8 +261,8 @@ namespace { } // namespace -/// Generates all pseudo-legal captures and queen promotions -/// Generates all pseudo-legal non-captures and underpromotions +/// Generates all pseudo-legal captures plus queen and checking knight promotions +/// Generates all pseudo-legal non-captures and underpromotions(except checking knight) /// Generates all pseudo-legal captures and non-captures /// /// Returns a pointer to the end of the move list. @@ -287,8 +285,8 @@ template ExtMove* generate(const Position&, ExtMove*); template ExtMove* generate(const Position&, ExtMove*); -/// generate generates all pseudo-legal non-captures and knight -/// underpromotions that give check. Returns a pointer to the end of the move list. +/// generate generates all pseudo-legal non-captures. +/// Returns a pointer to the end of the move list. template<> ExtMove* generate(const Position& pos, ExtMove* moveList) { diff --git a/src/search.cpp b/src/search.cpp index f14bdf77..1610c206 100644 --- a/src/search.cpp +++ b/src/search.cpp @@ -1486,8 +1486,8 @@ moves_loop: // When in check, search starts from here // Initialize a MovePicker object for the current position, and prepare // to search the moves. Because the depth is <= 0 here, only captures, - // queen promotions and checks (only if depth >= DEPTH_QS_CHECKS) will - // be generated. + // queen and checking knight promotions, and other checks(only if depth >= DEPTH_QS_CHECKS) + // will be generated. MovePicker mp(pos, ttMove, depth, &thisThread->mainHistory, &thisThread->captureHistory, contHist, From 4006f2c9132db034a27a94be33070d6aaab75b24 Mon Sep 17 00:00:00 2001 From: Joost VandeVondele Date: Thu, 9 Jul 2020 22:01:06 +0200 Subject: [PATCH 16/86] Small cleanups closes https://github.com/official-stockfish/Stockfish/pull/2772 No functional change --- Readme.md | 3 --- src/benchmark.cpp | 2 +- src/bitboard.h | 32 ++++++++++++++++---------------- src/evaluate.cpp | 10 +++++----- src/search.cpp | 14 +++++++++----- 5 files changed, 31 insertions(+), 30 deletions(-) diff --git a/Readme.md b/Readme.md index e60ac718..823518d1 100644 --- a/Readme.md +++ b/Readme.md @@ -75,9 +75,6 @@ Currently, Stockfish has the following UCI options: Assume a time delay of x ms due to network and GUI overheads. This is useful to avoid losses on time in those cases. - * #### Minimum Thinking Time - Search for at least x ms per move. - * #### Slow Mover Lower values will make Stockfish take less time in games, higher values will make it think longer. diff --git a/src/benchmark.cpp b/src/benchmark.cpp index f338cdda..3299f373 100644 --- a/src/benchmark.cpp +++ b/src/benchmark.cpp @@ -88,7 +88,7 @@ const vector Defaults = { // Chess 960 "setoption name UCI_Chess960 value true", - "bbqnnrkr/pppppppp/8/8/8/8/PPPPPPPP/BBQNNRKR w KQkq - 0 1 moves g2g3 d7d5 d2d4 c8h3 c1g5 e8d6 g5e7 f7f6", + "bbqnnrkr/pppppppp/8/8/8/8/PPPPPPPP/BBQNNRKR w HFhf - 0 1 moves g2g3 d7d5 d2d4 c8h3 c1g5 e8d6 g5e7 f7f6", "setoption name UCI_Chess960 value false" }; diff --git a/src/bitboard.h b/src/bitboard.h index 1c598108..afeb40ec 100644 --- a/src/bitboard.h +++ b/src/bitboard.h @@ -124,7 +124,7 @@ inline Bitboard operator&(Square s, Bitboard b) { return b & s; } inline Bitboard operator|(Square s, Bitboard b) { return b | s; } inline Bitboard operator^(Square s, Bitboard b) { return b ^ s; } -inline Bitboard operator|(Square s, Square s2) { return square_bb(s) | s2; } +inline Bitboard operator|(Square s1, Square s2) { return square_bb(s1) | s2; } constexpr bool more_than_one(Bitboard b) { return b & (b - 1); @@ -138,19 +138,19 @@ constexpr bool opposite_colors(Square s1, Square s2) { /// rank_bb() and file_bb() return a bitboard representing all the squares on /// the given file or rank. -inline Bitboard rank_bb(Rank r) { +constexpr Bitboard rank_bb(Rank r) { return Rank1BB << (8 * r); } -inline Bitboard rank_bb(Square s) { +constexpr Bitboard rank_bb(Square s) { return rank_bb(rank_of(s)); } -inline Bitboard file_bb(File f) { +constexpr Bitboard file_bb(File f) { return FileABB << f; } -inline Bitboard file_bb(Square s) { +constexpr Bitboard file_bb(Square s) { return file_bb(file_of(s)); } @@ -195,16 +195,16 @@ constexpr Bitboard pawn_double_attacks_bb(Bitboard b) { /// adjacent_files_bb() returns a bitboard representing all the squares on the -/// adjacent files of the given one. +/// adjacent files of a given square. -inline Bitboard adjacent_files_bb(Square s) { +constexpr Bitboard adjacent_files_bb(Square s) { return shift(file_bb(s)) | shift(file_bb(s)); } -/// line_bb(Square, Square) returns a bitboard representing an entire line, -/// from board edge to board edge, that intersects the given squares. If the -/// given squares are not on a same file/rank/diagonal, returns 0. For instance, +/// line_bb() returns a bitboard representing an entire line (from board edge +/// to board edge) that intersects the two given squares. If the given squares +/// are not on a same file/rank/diagonal, the function returns 0. For instance, /// line_bb(SQ_C4, SQ_F7) will return a bitboard with the A2-G8 diagonal. inline Bitboard line_bb(Square s1, Square s2) { @@ -215,8 +215,8 @@ inline Bitboard line_bb(Square s1, Square s2) { /// between_bb() returns a bitboard representing squares that are linearly -/// between the given squares (excluding the given squares). If the given -/// squares are not on a same file/rank/diagonal, return 0. For instance, +/// between the two given squares (excluding the given squares). If the given +/// squares are not on a same file/rank/diagonal, we return 0. For instance, /// between_bb(SQ_C4, SQ_F7) will return a bitboard with squares D5 and E6. inline Bitboard between_bb(Square s1, Square s2) { @@ -229,7 +229,7 @@ inline Bitboard between_bb(Square s1, Square s2) { /// in front of the given one, from the point of view of the given color. For instance, /// forward_ranks_bb(BLACK, SQ_D3) will return the 16 squares on ranks 1 and 2. -inline Bitboard forward_ranks_bb(Color c, Square s) { +constexpr Bitboard forward_ranks_bb(Color c, Square s) { return c == WHITE ? ~Rank1BB << 8 * relative_rank(WHITE, s) : ~Rank8BB >> 8 * relative_rank(BLACK, s); } @@ -238,7 +238,7 @@ inline Bitboard forward_ranks_bb(Color c, Square s) { /// forward_file_bb() returns a bitboard representing all the squares along the /// line in front of the given one, from the point of view of the given color. -inline Bitboard forward_file_bb(Color c, Square s) { +constexpr Bitboard forward_file_bb(Color c, Square s) { return forward_ranks_bb(c, s) & file_bb(s); } @@ -247,7 +247,7 @@ inline Bitboard forward_file_bb(Color c, Square s) { /// be attacked by a pawn of the given color when it moves along its file, starting /// from the given square. -inline Bitboard pawn_attack_span(Color c, Square s) { +constexpr Bitboard pawn_attack_span(Color c, Square s) { return forward_ranks_bb(c, s) & adjacent_files_bb(s); } @@ -255,7 +255,7 @@ inline Bitboard pawn_attack_span(Color c, Square s) { /// passed_pawn_span() returns a bitboard which can be used to test if a pawn of /// the given color and on the given square is a passed pawn. -inline Bitboard passed_pawn_span(Color c, Square s) { +constexpr Bitboard passed_pawn_span(Color c, Square s) { return pawn_attack_span(c, s) | forward_file_bb(c, s); } diff --git a/src/evaluate.cpp b/src/evaluate.cpp index bb1724a4..6f2dd69b 100644 --- a/src/evaluate.cpp +++ b/src/evaluate.cpp @@ -719,9 +719,9 @@ namespace { } - // Evaluation::winnable() adjusts the mg and eg score components based on the - // known attacking/defending status of the players. A single value is derived - // by interpolation from the mg and eg values and returned. + // Evaluation::winnable() adjusts the midgame and endgame score components, based on + // the known attacking/defending status of the players. The final value is derived + // by interpolation from the midgame and endgame values. template Value Evaluation::winnable(Score score) const { @@ -764,7 +764,7 @@ namespace { Color strongSide = eg > VALUE_DRAW ? WHITE : BLACK; int sf = me->scale_factor(pos, strongSide); - // If scale is not already specific, scale down the endgame via general heuristics + // If scale factor is not already specific, scale down via general heuristics if (sf == SCALE_FACTOR_NORMAL) { if (pos.opposite_bishops()) @@ -779,7 +779,7 @@ namespace { && pos.non_pawn_material(BLACK) == RookValueMg && pos.count(strongSide) - pos.count(~strongSide) <= 1 && bool(KingSide & pos.pieces(strongSide, PAWN)) != bool(QueenSide & pos.pieces(strongSide, PAWN)) - && (attacks_bb(pos.square(~strongSide)) & pos.pieces(~strongSide, PAWN))) + && (attackedBy[~strongSide][KING] & pos.pieces(~strongSide, PAWN))) sf = 36; else if (pos.count() == 1) sf = 37 + 3 * (pos.count(WHITE) == 1 ? pos.count(BLACK) + pos.count(BLACK) diff --git a/src/search.cpp b/src/search.cpp index 1610c206..720a9100 100644 --- a/src/search.cpp +++ b/src/search.cpp @@ -263,10 +263,10 @@ void MainThread::search() { Thread* bestThread = this; - if (int(Options["MultiPV"]) == 1 && - !Limits.depth && - !(Skill(Options["Skill Level"]).enabled() || int(Options["UCI_LimitStrength"])) && - rootMoves[0].pv[0] != MOVE_NONE) + if ( int(Options["MultiPV"]) == 1 + && !Limits.depth + && !(Skill(Options["Skill Level"]).enabled() || int(Options["UCI_LimitStrength"])) + && rootMoves[0].pv[0] != MOVE_NONE) bestThread = Threads.get_best_thread(); bestPreviousScore = bestThread->rootMoves[0].score; @@ -670,7 +670,11 @@ namespace { ttPv = PvNode || (ttHit && tte->is_pv()); formerPv = ttPv && !PvNode; - if (ttPv && depth > 12 && ss->ply - 1 < MAX_LPH && !priorCapture && is_ok((ss-1)->currentMove)) + if ( ttPv + && depth > 12 + && ss->ply - 1 < MAX_LPH + && !priorCapture + && is_ok((ss-1)->currentMove)) thisThread->lowPlyHistory[ss->ply - 1][from_to((ss-1)->currentMove)] << stat_bonus(depth - 5); // thisThread->ttHitAverage can be used to approximate the running average of ttHit From 5e91c5dcc8066e9f346a10010ddce70f2d317ef6 Mon Sep 17 00:00:00 2001 From: Vizvezdenec Date: Sat, 11 Jul 2020 00:06:55 +0300 Subject: [PATCH 17/86] Maximize usage of transposition table in probcut Probcut is a heuristic that wasn't changed a lot in past years, all attempts to change it using information / writing info to transposition table failed. This patch has a number of differences that can be summarized as follows: * For TT write/read we use depth - 3. Because probcut search is depth - 4 but we actually do the move prior to it so effectively we do depth - 3 search; * In any case of depth of eval from transposition table being >= depth - 3 we either produce cutoff or refuse to even do probcut search, this is allowing us to write info of probcut to transposition table because we know that we wouldn't be overwriting some deeper data with our depth - 3 search - this is an important aspect of this patch; * For some not really known reason this patch completely ignores tte->bound() - which was the case for previous patch that made probcut interact with TT, maybe 2) is the reason, although it's unproven. A first version of this patch passed STC and LTC passed STC https://tests.stockfishchess.org/tests/view/5f05908a59f6f03532894613 LLR: 2.95 (-2.94,2.94) {-0.50,1.50} Total: 95776 W: 18300 L: 17973 D: 59503 Ptnml(0-2): 1646, 10944, 22377, 11279, 1642 passed LTC https://tests.stockfishchess.org/tests/view/5f06b54059f6f035328946bb LLR: 2.94 (-2.94,2.94) {0.25,1.75} Total: 57128 W: 7266 L: 6938 D: 42924 Ptnml(0-2): 372, 5163, 17217, 5389, 423 However, an additional bugfix was needed to avoid checking a condition on ttMove if was not available. This passed non-regression bounds on top of the first version: at STC https://tests.stockfishchess.org/tests/view/5f080e5059f6f03532894766 LLR: 2.94 (-2.94,2.94) {-1.50,0.50} Total: 14096 W: 2800 L: 2628 D: 8668 Ptnml(0-2): 225, 1620, 3238, 1688, 277 at LTC https://tests.stockfishchess.org/tests/view/5f0836a559f6f0353289479c LLR: 2.95 (-2.94,2.94) {-1.50,0.50} Total: 25352 W: 3228 L: 3139 D: 18985 Ptnml(0-2): 175, 2350, 7549, 2415, 187 closes https://github.com/official-stockfish/Stockfish/pull/2804 Bench 4540940 --- src/search.cpp | 41 ++++++++++++++++++++++++++++------------- 1 file changed, 28 insertions(+), 13 deletions(-) diff --git a/src/search.cpp b/src/search.cpp index 720a9100..6cf2f90d 100644 --- a/src/search.cpp +++ b/src/search.cpp @@ -596,7 +596,7 @@ namespace { Key posKey; Move ttMove, move, excludedMove, bestMove; Depth extension, newDepth; - Value bestValue, value, ttValue, eval, maxValue; + Value bestValue, value, ttValue, eval, maxValue, probcutBeta; bool ttHit, ttPv, formerPv, givesCheck, improving, didLMR, priorCapture; bool captureOrPromotion, doFullDepthSearch, moveCountPruning, ttCapture, singularQuietLMR; @@ -871,23 +871,33 @@ namespace { } } + probcutBeta = beta + 176 - 49 * improving; + // Step 10. ProbCut (~10 Elo) // If we have a good enough capture and a reduced search returns a value // much above beta, we can (almost) safely prune the previous move. if ( !PvNode && depth > 4 - && abs(beta) < VALUE_TB_WIN_IN_MAX_PLY) + && abs(beta) < VALUE_TB_WIN_IN_MAX_PLY + && !( ttHit + && tte->depth() >= depth - 3 + && ttValue != VALUE_NONE + && ttValue < probcutBeta)) { - Value raisedBeta = beta + 176 - 49 * improving; - assert(raisedBeta < VALUE_INFINITE); - MovePicker mp(pos, ttMove, raisedBeta - ss->staticEval, &captureHistory); + if ( ttHit + && tte->depth() >= depth - 3 + && ttValue != VALUE_NONE + && ttValue >= probcutBeta + && ttMove + && pos.capture_or_promotion(ttMove)) + return probcutBeta; + + assert(probcutBeta < VALUE_INFINITE); + MovePicker mp(pos, ttMove, probcutBeta - ss->staticEval, &captureHistory); int probCutCount = 0; while ( (move = mp.next_move()) != MOVE_NONE - && probCutCount < 2 + 2 * cutNode - && !( move == ttMove - && tte->depth() >= depth - 4 - && ttValue < raisedBeta)) + && probCutCount < 2 + 2 * cutNode) if (move != excludedMove && pos.legal(move)) { assert(pos.capture_or_promotion(move)); @@ -905,16 +915,21 @@ namespace { pos.do_move(move, st); // Perform a preliminary qsearch to verify that the move holds - value = -qsearch(pos, ss+1, -raisedBeta, -raisedBeta+1); + value = -qsearch(pos, ss+1, -probcutBeta, -probcutBeta+1); // If the qsearch held, perform the regular search - if (value >= raisedBeta) - value = -search(pos, ss+1, -raisedBeta, -raisedBeta+1, depth - 4, !cutNode); + if (value >= probcutBeta) + value = -search(pos, ss+1, -probcutBeta, -probcutBeta+1, depth - 4, !cutNode); pos.undo_move(move); - if (value >= raisedBeta) + if (value >= probcutBeta) + { + tte->save(posKey, value_to_tt(value, ss->ply), ttPv, + BOUND_LOWER, + depth - 3, move, ss->staticEval); return value; + } } } From 1f3bd968bb194a1f42af661cca9ec445c13978e8 Mon Sep 17 00:00:00 2001 From: SFisGOD Date: Wed, 8 Jul 2020 10:09:32 +0800 Subject: [PATCH 18/86] Introduce bad outpost penalty In some French games, Stockfish likes to bring the Knight to a bad outpost spot. This is evident in TCEC S18 Superfinal Game 63, where there is a Knight outpost on the queenside but is actually useless. Stockfish is effectively playing a piece down while holding ground against Leela's break on the kingside. This patch turns the +56 mg bonus for a Knight outpost into a -7 mg penalty if it satisfies the following conditions: * The outpost square is not on the CenterFiles (i.e. not on files C,D,E and F) * The knight is not attacking non pawn enemies. * The side where the outpost is located contains only few enemies, with a particular conditional_more_than_two() implementation Thank you to apospa...@gmail.com for bringing this to our attention and for providing insights. See https://groups.google.com/forum/?fromgroups=#!topic/fishcooking/dEXNzSIBgZU Reference game: https://tcec-chess.com/#div=sf&game=63&season=18 Passed STC: LLR: 2.93 (-2.94,2.94) {-0.50,1.50} Total: 6960 W: 1454 L: 1247 D: 4259 Ptnml(0-2): 115, 739, 1610, 856, 160 https://tests.stockfishchess.org/tests/view/5f08221059f6f0353289477e Passed LTC: LLR: 2.98 (-2.94,2.94) {0.25,1.75} Total: 21440 W: 2767 L: 2543 D: 16130 Ptnml(0-2): 122, 1904, 6462, 2092, 140 https://tests.stockfishchess.org/tests/view/5f0838ed59f6f035328947a2 various related tests show strong test results, but so far no generalizations or simplifications of conditional_more_than_two() are found. See PR for details. closes https://github.com/official-stockfish/Stockfish/pull/2803 Bench: 4366686 --- src/bitboard.h | 7 +++++++ src/evaluate.cpp | 9 ++++++++- 2 files changed, 15 insertions(+), 1 deletion(-) diff --git a/src/bitboard.h b/src/bitboard.h index afeb40ec..15ec4153 100644 --- a/src/bitboard.h +++ b/src/bitboard.h @@ -130,6 +130,13 @@ constexpr bool more_than_one(Bitboard b) { return b & (b - 1); } +/// Counts the occupation of the bitboard depending on the occupation of SQ_A1 +/// as in `b & (1ULL << SQ_A1) ? more_than_two(b) : more_than_one(b)` + +constexpr bool conditional_more_than_two(Bitboard b) { + return b & (b - 1) & (b - 2); +} + constexpr bool opposite_colors(Square s1, Square s2) { return (s1 + rank_of(s1) + s2 + rank_of(s2)) & 1; } diff --git a/src/evaluate.cpp b/src/evaluate.cpp index 6f2dd69b..ca6ea5c4 100644 --- a/src/evaluate.cpp +++ b/src/evaluate.cpp @@ -134,6 +134,7 @@ namespace { }; // Assorted bonuses and penalties + constexpr Score BadOutpost = S( -7, 36); constexpr Score BishopOnKingRing = S( 24, 0); constexpr Score BishopPawns = S( 3, 7); constexpr Score BishopXRayPawns = S( 4, 5); @@ -310,7 +311,13 @@ namespace { { // Bonus if piece is on an outpost square or can reach one bb = OutpostRanks & attackedBy[Us][PAWN] & ~pe->pawn_attacks_span(Them); - if (bb & s) + if ( Pt == KNIGHT + && bb & s & ~CenterFiles + && !(b & pos.pieces(Them) & ~pos.pieces(PAWN)) + && !conditional_more_than_two( + pos.pieces(Them) & ~pos.pieces(PAWN) & (s & QueenSide ? QueenSide : KingSide))) + score += BadOutpost; + else if (bb & s) score += Outpost[Pt == BISHOP]; else if (Pt == KNIGHT && bb & b & ~pos.pieces(Us)) score += ReachableOutpost; From 6c197c3964ca0c637ff1f646dc7e6653b1bb4b45 Mon Sep 17 00:00:00 2001 From: syzygy1 <3028851+syzygy1@users.noreply.github.com> Date: Sat, 11 Jul 2020 16:25:34 +0200 Subject: [PATCH 19/86] Corrects a functional change in a cleanup patch. This corrects a functional change in https://github.com/official-stockfish/Stockfish/commit/ddcbacd04d1c860e808202ce8c1206c8acdca627 changing evaluation of KPPvK. Bench remains unchanged at low depth With this patch, 8/8/5k1p/8/7p/7K/8/8 b - - 1 11 is again correctly evaluated as a draw. closes https://github.com/official-stockfish/Stockfish/pull/2807 Bench: 4366686 --- src/endgame.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/endgame.cpp b/src/endgame.cpp index 40f49dce..a8ceb648 100644 --- a/src/endgame.cpp +++ b/src/endgame.cpp @@ -589,8 +589,8 @@ ScaleFactor Endgame::operator()(const Position& pos) const { Bitboard strongPawns = pos.pieces(strongSide, PAWN); // If all pawns are ahead of the king on a single rook file, it's a draw. - if (!((strongPawns & ~FileABB) || (strongPawns & ~FileHBB)) && - !(strongPawns & ~passed_pawn_span(weakSide, weakKing))) + if ( !(strongPawns & ~(FileABB | FileHBB)) + && !(strongPawns & ~passed_pawn_span(weakSide, weakKing))) return SCALE_FACTOR_DRAW; return SCALE_FACTOR_NONE; From c3092c54bc6fb837137365fc60eb57bd188deaca Mon Sep 17 00:00:00 2001 From: mstembera Date: Sun, 12 Jul 2020 13:58:00 -0700 Subject: [PATCH 20/86] Multiple lazy stages. An extension of the lazy eval idea: when the score is sufficiently large we now skip more granular parts of the eval. Inspired by an original patch by Moez Jellouli https://tests.stockfishchess.org/tests/view/5f03b2a159f6f03532894529 Credit to him! STC https://tests.stockfishchess.org/tests/view/5f0a862c59f6f03532894924 LLR: 2.95 (-2.94,2.94) {-0.50,1.50} Total: 13504 W: 2684 L: 2472 D: 8348 Ptnml(0-2): 229, 1496, 3111, 1666, 250 LTC https://tests.stockfishchess.org/tests/view/5f0ac0e159f6f0353289495b LLR: 2.94 (-2.94,2.94) {0.25,1.75} Total: 31312 W: 3926 L: 3677 D: 23709 Ptnml(0-2): 185, 2773, 9509, 2986, 203 closes https://github.com/official-stockfish/Stockfish/pull/2814 bench: 4541608 --- src/evaluate.cpp | 25 +++++++++++++++++-------- 1 file changed, 17 insertions(+), 8 deletions(-) diff --git a/src/evaluate.cpp b/src/evaluate.cpp index ca6ea5c4..dbb725d4 100644 --- a/src/evaluate.cpp +++ b/src/evaluate.cpp @@ -74,7 +74,8 @@ using namespace Trace; namespace { // Threshold for lazy and space evaluation - constexpr Value LazyThreshold = Value(1400); + constexpr Value LazyThreshold1 = Value(1400); + constexpr Value LazyThreshold2 = Value(1300); constexpr Value SpaceThreshold = Value(12222); // KingAttackWeights[PieceType] contains king attack weights by piece type @@ -786,7 +787,7 @@ namespace { && pos.non_pawn_material(BLACK) == RookValueMg && pos.count(strongSide) - pos.count(~strongSide) <= 1 && bool(KingSide & pos.pieces(strongSide, PAWN)) != bool(QueenSide & pos.pieces(strongSide, PAWN)) - && (attackedBy[~strongSide][KING] & pos.pieces(~strongSide, PAWN))) + && (attacks_bb(pos.square(~strongSide)) & pos.pieces(~strongSide, PAWN))) sf = 36; else if (pos.count() == 1) sf = 37 + 3 * (pos.count(WHITE) == 1 ? pos.count(BLACK) + pos.count(BLACK) @@ -837,9 +838,12 @@ namespace { score += pe->pawn_score(WHITE) - pe->pawn_score(BLACK); // Early exit if score is high - Value v = (mg_value(score) + eg_value(score)) / 2; - if (abs(v) > LazyThreshold + pos.non_pawn_material() / 64) - return pos.side_to_move() == WHITE ? v : -v; + auto lazy_skip = [&](Value lazyThreshold) { + return abs(mg_value(score) + eg_value(score)) / 2 > lazyThreshold + pos.non_pawn_material() / 64; + }; + + if (lazy_skip(LazyThreshold1)) + goto make_v; // Main evaluation begins here initialize(); @@ -856,12 +860,17 @@ namespace { // More complex interactions that require fully populated attack bitboards score += king< WHITE>() - king< BLACK>() - + threats() - threats() - + passed< WHITE>() - passed< BLACK>() + + passed< WHITE>() - passed< BLACK>(); + + if (lazy_skip(LazyThreshold2)) + goto make_v; + + score += threats() - threats() + space< WHITE>() - space< BLACK>(); +make_v: // Derive single value from mg and eg parts of score - v = winnable(score); + Value v = winnable(score); // In case of tracing add all remaining individual evaluation terms if (T) From d89730d5c8dcf10eb9e1d91a81f903d9fc3c949a Mon Sep 17 00:00:00 2001 From: Vizvezdenec Date: Mon, 13 Jul 2020 20:30:58 +0300 Subject: [PATCH 21/86] Do not overwrite valuable TT data after probcut. This patch allows an engine to write probcut data only in case the probcut search depth is greater than transposition table depth. passed STC https://tests.stockfishchess.org/tests/view/5f0b52e959f6f035328949a6 LLR: 2.97 (-2.94,2.94) {-0.50,1.50} Total: 52544 W: 10145 L: 9880 D: 32519 Ptnml(0-2): 853, 6097, 12121, 6334, 867 passed LTC https://tests.stockfishchess.org/tests/view/5f0bd94c59f6f035328949f3 LLR: 2.93 (-2.94,2.94) {0.25,1.75} Total: 49576 W: 6164 L: 5863 D: 37549 Ptnml(0-2): 297, 4371, 15218, 4538, 364 closes https://github.com/official-stockfish/Stockfish/pull/2815 bench 4578298 --- src/search.cpp | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/src/search.cpp b/src/search.cpp index 6cf2f90d..17ccab92 100644 --- a/src/search.cpp +++ b/src/search.cpp @@ -925,9 +925,12 @@ namespace { if (value >= probcutBeta) { - tte->save(posKey, value_to_tt(value, ss->ply), ttPv, - BOUND_LOWER, - depth - 3, move, ss->staticEval); + if ( !(ttHit + && tte->depth() >= depth - 3 + && ttValue != VALUE_NONE)) + tte->save(posKey, value_to_tt(value, ss->ply), ttPv, + BOUND_LOWER, + depth - 3, move, ss->staticEval); return value; } } From f0abde241d39ee4507778bf41b392492c5391652 Mon Sep 17 00:00:00 2001 From: protonspring Date: Sat, 25 Jul 2020 07:32:19 -0600 Subject: [PATCH 22/86] Remove conditional_more_than_two(). This is a functional simplification that removes the conditional_more_than_two() function, which was quite strange and kooky. Note the very minor change to the bench value. See this thread for relevant comments on the passing branch: protonspring/Stockfish@d89730d...ff35b50 STC LLR: 2.95 (-2.94,2.94) {-1.50,0.50} Total: 59760 W: 11411 L: 11311 D: 37038 Ptnml(0-2): 992, 6863, 14044, 7015, 966 https://tests.stockfishchess.org/tests/view/5f179988c09435d870cb9b9a LTC LLR: 2.93 (-2.94,2.94) {-1.50,0.50} Total: 45208 W: 5553 L: 5497 D: 34158 Ptnml(0-2): 315, 4081, 13761, 4127, 320 https://tests.stockfishchess.org/tests/view/5f184847c09435d870cb9bee closes https://github.com/official-stockfish/Stockfish/pull/2826 Bench: 4578290 --- src/bitboard.h | 6 ------ src/evaluate.cpp | 12 +++++++----- 2 files changed, 7 insertions(+), 11 deletions(-) diff --git a/src/bitboard.h b/src/bitboard.h index 15ec4153..8c95de8c 100644 --- a/src/bitboard.h +++ b/src/bitboard.h @@ -130,12 +130,6 @@ constexpr bool more_than_one(Bitboard b) { return b & (b - 1); } -/// Counts the occupation of the bitboard depending on the occupation of SQ_A1 -/// as in `b & (1ULL << SQ_A1) ? more_than_two(b) : more_than_one(b)` - -constexpr bool conditional_more_than_two(Bitboard b) { - return b & (b - 1) & (b - 2); -} constexpr bool opposite_colors(Square s1, Square s2) { return (s1 + rank_of(s1) + s2 + rank_of(s2)) & 1; diff --git a/src/evaluate.cpp b/src/evaluate.cpp index dbb725d4..d16648a8 100644 --- a/src/evaluate.cpp +++ b/src/evaluate.cpp @@ -310,13 +310,15 @@ namespace { if (Pt == BISHOP || Pt == KNIGHT) { - // Bonus if piece is on an outpost square or can reach one + // Bonus if the piece is on an outpost square or can reach one + // Reduced bonus for knights (BadOutpost) if few relevant targets bb = OutpostRanks & attackedBy[Us][PAWN] & ~pe->pawn_attacks_span(Them); + Bitboard targets = pos.pieces(Them) & ~pos.pieces(PAWN); + if ( Pt == KNIGHT - && bb & s & ~CenterFiles - && !(b & pos.pieces(Them) & ~pos.pieces(PAWN)) - && !conditional_more_than_two( - pos.pieces(Them) & ~pos.pieces(PAWN) & (s & QueenSide ? QueenSide : KingSide))) + && bb & s & ~CenterFiles // on a side outpost + && !(b & targets) // no relevant attacks + && (!more_than_one(targets & (s & QueenSide ? QueenSide : KingSide)))) score += BadOutpost; else if (bb & s) score += Outpost[Pt == BISHOP]; From 62d3106caa2f5acf5ba32500cc19912b8f10612c Mon Sep 17 00:00:00 2001 From: UnaiCorzo Date: Sat, 25 Jul 2020 22:30:05 +0200 Subject: [PATCH 23/86] Remove late irreversible move extension We simplify away the late irreversible move extension, which does not seem to be necessary in the current master. STC LLR: 2.93 (-2.94,2.94) {-1.50,0.50} Total: 38584 W: 7464 L: 7342 D: 23778 Ptnml(0-2): 581, 4328, 9365, 4424, 594 https://tests.stockfishchess.org/tests/view/5f1c9669c09435d870cb9de9 LTC LLR: 2.94 (-2.94,2.94) {-1.50,0.50} Total: 27840 W: 3417 L: 3353 D: 21070 Ptnml(0-2): 120, 2315, 8994, 2363, 128 https://tests.stockfishchess.org/tests/view/5f1d2e22c09435d870cb9e21 closes https://github.com/official-stockfish/Stockfish/pull/2836 bench: 4829420 --- src/search.cpp | 8 +------- 1 file changed, 1 insertion(+), 7 deletions(-) diff --git a/src/search.cpp b/src/search.cpp index 17ccab92..6ec4d803 100644 --- a/src/search.cpp +++ b/src/search.cpp @@ -1067,7 +1067,7 @@ moves_loop: // When in check, search starts from here // search of (alpha-s, beta-s), and just one fails high on (alpha, beta), // then that move is singular and should be extended. To verify this we do // a reduced search on all the other moves but the ttMove and if the - // result is lower than ttValue minus a margin then we will extend the ttMove. + // result is lower than ttValue minus a margin, then we will extend the ttMove. if ( depth >= 6 && move == ttMove && !rootNode @@ -1131,12 +1131,6 @@ moves_loop: // When in check, search starts from here if (type_of(move) == CASTLING) extension = 1; - // Late irreversible move extension - if ( move == ttMove - && pos.rule50_count() > 80 - && (captureOrPromotion || type_of(movedPiece) == PAWN)) - extension = 2; - // Add extension to new depth newDepth += extension; From 33f3cfae0093b934563e1eca78486261f18e4650 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?St=C3=A9phane=20Nicolet?= Date: Tue, 28 Jul 2020 10:08:09 +0200 Subject: [PATCH 24/86] Improve handling of queen imbalance We double the bonus for potential threats by minors and rooks against our queen, in case of "queen vs pieces imbalance". Hopefully this will improve a little bit the evaluation for this well-known Stockfish weakness. passed STC: LLR: 2.93 (-2.94,2.94) {-0.50,1.50} Total: 72976 W: 14003 L: 13710 D: 45263 Ptnml(0-2): 1218, 8370, 17094, 8513, 1293 https://tests.stockfishchess.org/tests/view/5efa50eb020eec13834a977d passed LTC: LLR: 2.93 (-2.94,2.94) {0.25,1.75} Total: 22232 W: 2779 L: 2560 D: 16893 Ptnml(0-2): 129, 1885, 6896, 2050, 156 https://tests.stockfishchess.org/tests/view/5f1fdd2dc09435d870cb9f13 closes https://github.com/official-stockfish/Stockfish/pull/2864 Bench: 4367349 --- src/evaluate.cpp | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/src/evaluate.cpp b/src/evaluate.cpp index d16648a8..b34d82f6 100644 --- a/src/evaluate.cpp +++ b/src/evaluate.cpp @@ -579,17 +579,21 @@ namespace { // Bonus for threats on the next moves against enemy queen if (pos.count(Them) == 1) { + bool queenImbalance = pos.count() == 1; + Square s = pos.square(Them); - safe = mobilityArea[Us] & ~stronglyProtected; + safe = mobilityArea[Us] + & ~pos.pieces(Us, PAWN) + & ~stronglyProtected; b = attackedBy[Us][KNIGHT] & attacks_bb(s); - score += KnightOnQueen * popcount(b & safe); + score += KnightOnQueen * popcount(b & safe) * (1 + queenImbalance); b = (attackedBy[Us][BISHOP] & attacks_bb(s, pos.pieces())) | (attackedBy[Us][ROOK ] & attacks_bb(s, pos.pieces())); - score += SliderOnQueen * popcount(b & safe & attackedBy2[Us]); + score += SliderOnQueen * popcount(b & safe & attackedBy2[Us]) * (1 + queenImbalance); } if (T) From 9587eeeb5ed29f834d4f956b92e0e732877c47a7 Mon Sep 17 00:00:00 2001 From: Stefan Geschwentner Date: Thu, 30 Jul 2020 18:56:11 +0200 Subject: [PATCH 25/86] Tweak cutnode reduction Less reduction for second move at non-check CUT node with depth <= 10. STC: LLR: 2.94 (-2.94,2.94) {-0.50,1.50} Total: 38680 W: 7490 L: 7245 D: 23945 Ptnml(0-2): 643, 4441, 8967, 4606, 683 https://tests.stockfishchess.org/tests/view/5f21e1782f7e63962b99f451 LTC: LLR: 2.95 (-2.94,2.94) {0.25,1.75} Total: 71976 W: 9003 L: 8636 D: 54337 Ptnml(0-2): 440, 6414, 21972, 6663, 499 https://tests.stockfishchess.org/tests/view/5f2245762f7e63962b99f4bd closes https://github.com/official-stockfish/Stockfish/pull/2868 Bench: 4746616 --- src/search.cpp | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/src/search.cpp b/src/search.cpp index 6ec4d803..91ac60ad 100644 --- a/src/search.cpp +++ b/src/search.cpp @@ -1167,6 +1167,13 @@ moves_loop: // When in check, search starts from here { Depth r = reduction(improving, depth, moveCount); + // Decrease reduction at non-check cut nodes for second move at low depths + if ( cutNode + && depth <= 10 + && moveCount <= 2 + && !ss->inCheck) + r--; + // Decrease reduction if the ttHit running average is large if (thisThread->ttHitAverage > 473 * TtHitAverageResolution * TtHitAverageWindow / 1024) r--; From 84f3e867903f62480c33243dd0ecbffd342796fc Mon Sep 17 00:00:00 2001 From: nodchip Date: Wed, 5 Aug 2020 17:11:15 +0200 Subject: [PATCH 26/86] Add NNUE evaluation This patch ports the efficiently updatable neural network (NNUE) evaluation to Stockfish. Both the NNUE and the classical evaluations are available, and can be used to assign a value to a position that is later used in alpha-beta (PVS) search to find the best move. The classical evaluation computes this value as a function of various chess concepts, handcrafted by experts, tested and tuned using fishtest. The NNUE evaluation computes this value with a neural network based on basic inputs. The network is optimized and trained on the evalutions of millions of positions at moderate search depth. The NNUE evaluation was first introduced in shogi, and ported to Stockfish afterward. It can be evaluated efficiently on CPUs, and exploits the fact that only parts of the neural network need to be updated after a typical chess move. [The nodchip repository](https://github.com/nodchip/Stockfish) provides additional tools to train and develop the NNUE networks. This patch is the result of contributions of various authors, from various communities, including: nodchip, ynasu87, yaneurao (initial port and NNUE authors), domschl, FireFather, rqs, xXH4CKST3RXx, tttak, zz4032, joergoster, mstembera, nguyenpham, erbsenzaehler, dorzechowski, and vondele. This new evaluation needed various changes to fishtest and the corresponding infrastructure, for which tomtor, ppigazzini, noobpwnftw, daylen, and vondele are gratefully acknowledged. The first networks have been provided by gekkehenker and sergiovieri, with the latter net (nn-97f742aaefcd.nnue) being the current default. The evaluation function can be selected at run time with the `Use NNUE` (true/false) UCI option, provided the `EvalFile` option points the the network file (depending on the GUI, with full path). The performance of the NNUE evaluation relative to the classical evaluation depends somewhat on the hardware, and is expected to improve quickly, but is currently on > 80 Elo on fishtest: 60000 @ 10+0.1 th 1 https://tests.stockfishchess.org/tests/view/5f28fe6ea5abc164f05e4c4c ELO: 92.77 +-2.1 (95%) LOS: 100.0% Total: 60000 W: 24193 L: 8543 D: 27264 Ptnml(0-2): 609, 3850, 9708, 10948, 4885 40000 @ 20+0.2 th 8 https://tests.stockfishchess.org/tests/view/5f290229a5abc164f05e4c58 ELO: 89.47 +-2.0 (95%) LOS: 100.0% Total: 40000 W: 12756 L: 2677 D: 24567 Ptnml(0-2): 74, 1583, 8550, 7776, 2017 At the same time, the impact on the classical evaluation remains minimal, causing no significant regression: sprt @ 10+0.1 th 1 https://tests.stockfishchess.org/tests/view/5f2906a2a5abc164f05e4c5b LLR: 2.94 (-2.94,2.94) {-6.00,-4.00} Total: 34936 W: 6502 L: 6825 D: 21609 Ptnml(0-2): 571, 4082, 8434, 3861, 520 sprt @ 60+0.6 th 1 https://tests.stockfishchess.org/tests/view/5f2906cfa5abc164f05e4c5d LLR: 2.93 (-2.94,2.94) {-6.00,-4.00} Total: 10088 W: 1232 L: 1265 D: 7591 Ptnml(0-2): 49, 914, 3170, 843, 68 The needed networks can be found at https://tests.stockfishchess.org/nns It is recommended to use the default one as indicated by the `EvalFile` UCI option. Guidelines for testing new nets can be found at https://github.com/glinscott/fishtest/wiki/Creating-my-first-test#nnue-net-tests Integration has been discussed in various issues: https://github.com/official-stockfish/Stockfish/issues/2823 https://github.com/official-stockfish/Stockfish/issues/2728 The integration branch will be closed after the merge: https://github.com/official-stockfish/Stockfish/pull/2825 https://github.com/official-stockfish/Stockfish/tree/nnue-player-wip closes https://github.com/official-stockfish/Stockfish/pull/2912 This will be an exciting time for computer chess, looking forward to seeing the evolution of this approach. Bench: 4746616 --- .travis.yml | 31 +- AUTHORS | 17 +- Readme.md => README.md | 129 ++++--- appveyor.yml | 17 +- src/Makefile | 227 +++++++++++-- src/benchmark.cpp | 4 +- src/bitbase.cpp | 4 +- src/bitboard.cpp | 4 +- src/bitboard.h | 4 +- src/endgame.cpp | 4 +- src/endgame.h | 4 +- src/evaluate.cpp | 114 +++++-- src/evaluate.h | 24 +- src/main.cpp | 5 +- src/material.cpp | 4 +- src/material.h | 4 +- src/misc.cpp | 62 +++- src/misc.h | 6 +- src/movegen.cpp | 4 +- src/movegen.h | 4 +- src/movepick.cpp | 4 +- src/movepick.h | 4 +- src/nnue/architectures/halfkp_256x2-32-32.h | 54 +++ src/nnue/evaluate_nnue.cpp | 178 ++++++++++ src/nnue/evaluate_nnue.h | 48 +++ src/nnue/features/feature_set.h | 135 ++++++++ src/nnue/features/features_common.h | 45 +++ src/nnue/features/half_kp.cpp | 92 +++++ src/nnue/features/half_kp.h | 67 ++++ src/nnue/features/index_list.h | 64 ++++ src/nnue/layers/affine_transform.h | 215 ++++++++++++ src/nnue/layers/clipped_relu.h | 186 ++++++++++ src/nnue/layers/input_slice.h | 68 ++++ src/nnue/nnue_accumulator.h | 39 +++ src/nnue/nnue_architecture.h | 38 +++ src/nnue/nnue_common.h | 77 +++++ src/nnue/nnue_feature_transformer.h | 355 ++++++++++++++++++++ src/pawns.cpp | 4 +- src/pawns.h | 4 +- src/position.cpp | 108 +++++- src/position.h | 42 ++- src/psqt.cpp | 4 +- src/search.cpp | 6 +- src/search.h | 4 +- src/syzygy/tbprobe.cpp | 3 +- src/syzygy/tbprobe.h | 3 +- src/thread.cpp | 4 +- src/thread.h | 4 +- src/thread_win32_osx.h | 4 +- src/timeman.cpp | 4 +- src/timeman.h | 4 +- src/tt.cpp | 4 +- src/tt.h | 4 +- src/tune.cpp | 4 +- src/tune.h | 4 +- src/types.h | 129 ++++++- src/uci.cpp | 22 +- src/uci.h | 4 +- src/ucioption.cpp | 9 +- 59 files changed, 2474 insertions(+), 245 deletions(-) rename Readme.md => README.md (79%) create mode 100644 src/nnue/architectures/halfkp_256x2-32-32.h create mode 100644 src/nnue/evaluate_nnue.cpp create mode 100644 src/nnue/evaluate_nnue.h create mode 100644 src/nnue/features/feature_set.h create mode 100644 src/nnue/features/features_common.h create mode 100644 src/nnue/features/half_kp.cpp create mode 100644 src/nnue/features/half_kp.h create mode 100644 src/nnue/features/index_list.h create mode 100644 src/nnue/layers/affine_transform.h create mode 100644 src/nnue/layers/clipped_relu.h create mode 100644 src/nnue/layers/input_slice.h create mode 100644 src/nnue/nnue_accumulator.h create mode 100644 src/nnue/nnue_architecture.h create mode 100644 src/nnue/nnue_common.h create mode 100644 src/nnue/nnue_feature_transformer.h diff --git a/.travis.yml b/.travis.yml index e2ae61be..d563a1e1 100644 --- a/.travis.yml +++ b/.travis.yml @@ -1,5 +1,5 @@ language: cpp -dist: xenial +dist: bionic matrix: include: @@ -7,7 +7,6 @@ matrix: compiler: gcc addons: apt: - sources: ['ubuntu-toolchain-r-test'] packages: ['g++-8', 'g++-8-multilib', 'g++-multilib', 'valgrind', 'expect', 'curl'] env: - COMPILER=g++-8 @@ -17,23 +16,23 @@ matrix: compiler: clang addons: apt: - sources: ['ubuntu-toolchain-r-test', 'llvm-toolchain-xenial-6.0'] - packages: ['clang-6.0', 'llvm-6.0-dev', 'g++-multilib', 'valgrind', 'expect', 'curl'] + packages: ['clang-10', 'llvm-10-dev', 'g++-multilib', 'valgrind', 'expect', 'curl'] env: - - COMPILER=clang++-6.0 + - COMPILER=clang++-10 - COMP=clang - - LDFLAGS=-fuse-ld=lld - os: osx + osx_image: xcode12 compiler: gcc env: - COMPILER=g++ - COMP=gcc - os: osx + osx_image: xcode12 compiler: clang env: - - COMPILER=clang++ V='Apple LLVM 9.4.1' # Apple LLVM version 9.1.0 (clang-902.0.39.2) + - COMPILER=clang++ - COMP=clang branches: @@ -48,26 +47,34 @@ script: - git log HEAD | grep "\b[Bb]ench[ :]\+[0-9]\{7\}" | head -n 1 | sed "s/[^0-9]*\([0-9]*\).*/\1/g" > git_sig - export benchref=$(cat git_sig) - echo "Reference bench:" $benchref + + # + # Compiler version string + - $COMPILER -v + # # Verify bench number against various builds - export CXXFLAGS="-Werror -D_GLIBCXX_DEBUG" - make clean && make -j2 ARCH=x86-64 optimize=no debug=yes build && ../tests/signature.sh $benchref - - make clean && make -j2 ARCH=x86-32 optimize=no debug=yes build && ../tests/signature.sh $benchref - - make clean && make -j2 ARCH=x86-32 build && ../tests/signature.sh $benchref + - if [[ "$TRAVIS_OS_NAME" == "linux" ]]; then make clean && make -j2 ARCH=x86-32 optimize=no debug=yes build && ../tests/signature.sh $benchref; fi + - if [[ "$TRAVIS_OS_NAME" == "linux" ]]; then make clean && make -j2 ARCH=x86-32 build && ../tests/signature.sh $benchref; fi # # Check perft and reproducible search + - export CXXFLAGS="-Werror" + - make clean && make -j2 ARCH=x86-64 build - ../tests/perft.sh - ../tests/reprosearch.sh + # # Valgrind # - export CXXFLAGS="-O1 -fno-inline" - if [ -x "$(command -v valgrind )" ]; then make clean && make -j2 ARCH=x86-64 debug=yes optimize=no build > /dev/null && ../tests/instrumented.sh --valgrind; fi - if [ -x "$(command -v valgrind )" ]; then ../tests/instrumented.sh --valgrind-thread; fi + # # Sanitizer # - # Use g++-8 as a proxy for having sanitizers, might need revision as they become available for more recent versions of clang/gcc - - if [[ "$COMPILER" == "g++-8" ]]; then make clean && make -j2 ARCH=x86-64 sanitize=undefined optimize=no debug=yes build > /dev/null && ../tests/instrumented.sh --sanitizer-undefined; fi - - if [[ "$COMPILER" == "g++-8" ]]; then make clean && make -j2 ARCH=x86-64 sanitize=thread optimize=no debug=yes build > /dev/null && ../tests/instrumented.sh --sanitizer-thread; fi + - if [[ "$TRAVIS_OS_NAME" == "linux" ]]; then make clean && make -j2 ARCH=x86-64 sanitize=undefined optimize=no debug=yes build > /dev/null && ../tests/instrumented.sh --sanitizer-undefined; fi + - if [[ "$TRAVIS_OS_NAME" == "linux" ]]; then make clean && make -j2 ARCH=x86-64 sanitize=thread optimize=no debug=yes build > /dev/null && ../tests/instrumented.sh --sanitizer-thread; fi diff --git a/AUTHORS b/AUTHORS index f08d71d3..2e080e61 100644 --- a/AUTHORS +++ b/AUTHORS @@ -1,10 +1,17 @@ -# List of authors for Stockfish, as of March 30, 2020 +# List of authors for Stockfish, as of August 4, 2020 +# Founders of the Stockfish project and fishtest infrastructure Tord Romstad (romstad) Marco Costalba (mcostalba) Joona Kiiski (zamar) Gary Linscott (glinscott) +# Authors and inventors of NNUE, training, NNUE port +Yu Nasu (ynasu87) +Motohiro Isozaki (yaneurao) +Hisayori Noda (nodchip) + +# all other authors of the code in alphabetical order Aditya (absimaldata) Adrian Petrescu (apetresc) Ajith Chandy Jose (ajithcj) @@ -36,6 +43,7 @@ Dariusz Orzechowski David Zar Daylen Yang (daylen) DiscanX +Dominik Schlösser (domschl) double-beep Eduardo Cáceres (eduherminio) Eelco de Groot (KingDefender) @@ -115,7 +123,8 @@ Nick Pelling (nickpelling) Nicklas Persson (NicklasPersson) Niklas Fiekas (niklasf) Nikolay Kostov (NikolayIT) -Nguyen Pham +Nguyen Pham (nguyenpham) +Norman Schmidt (FireFather) Ondrej Mosnáček (WOnder93) Oskar Werkelin Ahlin Pablo Vazquez @@ -135,6 +144,7 @@ Richard Lloyd Rodrigo Exterckötter Tjäder Ron Britvich (Britvich) Ronald de Man (syzygy1, syzygy) +rqs Ryan Schmitt Ryan Takker Sami Kiminki (skiminki) @@ -143,6 +153,7 @@ Sergei Antonov (saproj) Sergei Ivanov (svivanov72) sf-x Shane Booth (shane31) +Shawn Varghese (xXH4CKST3RXx) Stefan Geschwentner (locutus2) Stefano Cardanobile (Stefano80) Steinar Gunderson (sesse) @@ -155,9 +166,11 @@ Tom Vijlbrief (tomtor) Tomasz Sobczyk (Sopel97) Torsten Franz (torfranz, tfranzer) Tracey Emery (basepr1me) +tttak Unai Corzo (unaiic) Uri Blass (uriblass) Vince Negri (cuddlestmonkey) +zz4032 # Additionally, we acknowledge the authors and maintainers of fishtest, diff --git a/Readme.md b/README.md similarity index 79% rename from Readme.md rename to README.md index 823518d1..f71a8b34 100644 --- a/Readme.md +++ b/README.md @@ -4,7 +4,13 @@ [![Build Status](https://ci.appveyor.com/api/projects/status/github/official-stockfish/Stockfish?branch=master&svg=true)](https://ci.appveyor.com/project/mcostalba/stockfish/branch/master) [Stockfish](https://stockfishchess.org) is a free, powerful UCI chess engine -derived from Glaurung 2.1. It is not a complete chess program and requires a +derived from Glaurung 2.1. It features two evaluation functions, the classical +evaluation based on handcrafted terms, and the NNUE evaluation based on +efficiently updateable neural networks. The classical evaluation runs efficiently +on most 64bit CPU architectures, while the NNUE evaluation benefits strongly from the +vector intrinsics available on modern CPUs (avx2 or similar). + +Stockfish is not a complete chess program and requires a UCI-compatible GUI (e.g. XBoard with PolyGlot, Scid, Cute Chess, eboard, Arena, Sigma Chess, Shredder, Chess Partner or Fritz) in order to be used comfortably. Read the documentation for your GUI of choice for information about how to use @@ -22,21 +28,20 @@ This distribution of Stockfish consists of the following files: * src, a subdirectory containing the full source code, including a Makefile that can be used to compile Stockfish on Unix-like systems. +To use the NNUE evaluation an additional data file with neural network parameters +needs to be downloaded. The filename for the default set can be found as the default +value of the `EvalFile` UCI option, with the format +`nn-[SHA256 first 12 digits].nnue` (e.g. nn-c157e0a5755b.nnue). This file can be downloaded from +``` +https://tests.stockfishchess.org/api/nn/[filename] +``` +replacing `[filename]` as needed. -## UCI parameters + +## UCI options Currently, Stockfish has the following UCI options: - * #### Debug Log File - Write all communication to and from the engine into a text file. - - * #### Contempt - A positive value for contempt favors middle game positions and avoids draws. - - * #### Analysis Contempt - By default, contempt is set to prefer the side to move. Set this option to "White" - or "Black" to analyse with contempt for that side, or "Off" to disable contempt. - * #### Threads The number of CPU threads used for searching a position. For best performance, set this equal to the number of CPU cores available. @@ -44,9 +49,6 @@ Currently, Stockfish has the following UCI options: * #### Hash The size of the hash table in MB. It is recommended to set Hash after setting Threads. - * #### Clear Hash - Clear the hash table. - * #### Ponder Let Stockfish ponder its next move while the opponent is thinking. @@ -54,10 +56,32 @@ Currently, Stockfish has the following UCI options: Output the N best lines (principal variations, PVs) when searching. Leave at 1 for best performance. - * #### Skill Level - Lower the Skill Level in order to make Stockfish play weaker (see also UCI_LimitStrength). - Internally, MultiPV is enabled, and with a certain probability depending on the Skill Level a - weaker move will be played. + * #### Use NNUE + Toggle between the NNUE and classical evaluation functions. If set to "true", + the network parameters must be availabe to load from file (see also EvalFile). + + * #### EvalFile + The name of the file of the NNUE evaluation parameters. Depending on the GUI the + filename should include the full path to the folder/directory that contains the file. + + * #### Contempt + A positive value for contempt favors middle game positions and avoids draws, + effective for the classical evaluation only. + + * #### Analysis Contempt + By default, contempt is set to prefer the side to move. Set this option to "White" + or "Black" to analyse with contempt for that side, or "Off" to disable contempt. + + * #### UCI_AnalyseMode + An option handled by your GUI. + + * #### UCI_Chess960 + An option handled by your GUI. If true, Stockfish will play Chess960. + + * #### UCI_ShowWDL + If enabled, show approximate WDL statistics as part of the engine output. + These WDL numbers model expected game outcomes for a given evaluation and + game ply for engine self-play at fishtest LTC conditions (60+0.6s per game). * #### UCI_LimitStrength Enable weaker play aiming for an Elo rating as set by UCI_Elo. This option overrides Skill Level. @@ -66,28 +90,10 @@ Currently, Stockfish has the following UCI options: If enabled by UCI_LimitStrength, aim for an engine strength of the given Elo. This Elo rating has been calibrated at a time control of 60s+0.6s and anchored to CCRL 40/4. - * #### UCI_ShowWDL - If enabled, show approximate WDL statistics as part of the engine output. - These WDL numbers model expected game outcomes for a given evaluation and - game ply for engine self-play at fishtest LTC conditions (60+0.6s per game). - - * #### Move Overhead - Assume a time delay of x ms due to network and GUI overheads. This is useful to - avoid losses on time in those cases. - - * #### Slow Mover - Lower values will make Stockfish take less time in games, higher values will - make it think longer. - - * #### nodestime - Tells the engine to use nodes searched instead of wall time to account for - elapsed time. Useful for engine testing. - - * #### UCI_Chess960 - An option handled by your GUI. If true, Stockfish will play Chess960. - - * #### UCI_AnalyseMode - An option handled by your GUI. + * #### Skill Level + Lower the Skill Level in order to make Stockfish play weaker (see also UCI_LimitStrength). + Internally, MultiPV is enabled, and with a certain probability depending on the Skill Level a + weaker move will be played. * #### SyzygyPath Path to the folders/directories storing the Syzygy tablebase files. Multiple @@ -114,6 +120,47 @@ Currently, Stockfish has the following UCI options: Limit Syzygy tablebase probing to positions with at most this many pieces left (including kings and pawns). + * #### Move Overhead + Assume a time delay of x ms due to network and GUI overheads. This is useful to + avoid losses on time in those cases. + + * #### Slow Mover + Lower values will make Stockfish take less time in games, higher values will + make it think longer. + + * #### nodestime + Tells the engine to use nodes searched instead of wall time to account for + elapsed time. Useful for engine testing. + + * #### Clear Hash + Clear the hash table. + + * #### Debug Log File + Write all communication to and from the engine into a text file. + +## classical and NNUE evaluation + +Both approaches assign a value to a position that is used in alpha-beta (PVS) search +to find the best move. The classical evaluation computes this value as a function +of various chess concepts, handcrafted by experts, tested and tuned using fishtest. +The NNUE evaluation computes this value with a neural network based on basic +inputs (e.g. piece positions only). The network is optimized and trained +on the evalutions of millions of positions at moderate search depth. + +The NNUE evaluation was first introduced in shogi, and ported to Stockfish afterward. +It can be evaluated efficiently on CPUs, and exploits the fact that only parts +of the neural network need to be updated after a typical chess move. +[The nodchip repository](https://github.com/nodchip/Stockfish) provides additional +tools to train and develop the NNUE networks. + +On CPUs supporting modern vector instructions (avx2 and similar), the NNUE evaluation +results in stronger playing strength, even if the nodes per second computed by the engine +is somewhat lower (roughly 60% of nps is typical). + +Note that the NNUE evaluation depends on the Stockfish binary and the network parameter +file (see EvalFile). Not every parameter file is compatible with a given Stockfish binary. +The default value of the EvalFile UCI option is the name of a network that is guaranteed +to be compatible with that binary. ## What to expect from Syzygybases? diff --git a/appveyor.yml b/appveyor.yml index 21f3bbe3..d356ba2f 100644 --- a/appveyor.yml +++ b/appveyor.yml @@ -4,10 +4,9 @@ clone_depth: 50 branches: only: - master - - appveyor # Operating system (build VM template) -os: Visual Studio 2017 +os: Visual Studio 2019 # Build platform, i.e. x86, x64, AnyCPU. This setting is optional. platform: @@ -36,8 +35,11 @@ before_build: $src = $src.Replace("\", "/") # Build CMakeLists.txt - $t = 'cmake_minimum_required(VERSION 3.8)', + $t = 'cmake_minimum_required(VERSION 3.17)', 'project(Stockfish)', + 'set(CMAKE_CXX_STANDARD 17)', + 'set(CMAKE_CXX_STANDARD_REQUIRED ON)', + 'set (CMAKE_CXX_EXTENSIONS OFF)', 'set(CMAKE_RUNTIME_OUTPUT_DIRECTORY ${CMAKE_SOURCE_DIR}/src)', 'set(source_files', $src, ')', 'add_executable(stockfish ${source_files})' @@ -51,10 +53,11 @@ before_build: $b = git log HEAD | sls "\b[Bb]ench[ :]+[0-9]{7}" | select -first 1 $bench = $b -match '\D+(\d+)' | % { $matches[1] } Write-Host "Reference bench:" $bench - $g = "Visual Studio 15 2017" - If (${env:PLATFORM} -eq 'x64') { $g = $g + ' Win64' } - cmake -G "${g}" . - Write-Host "Generated files for: " $g + $g = "Visual Studio 16 2019" + If (${env:PLATFORM} -eq 'x64') { $a = "x64" } + If (${env:PLATFORM} -eq 'x86') { $a = "Win32" } + cmake -G "${g}" -A ${a} . + Write-Host "Generated files for: " $g $a build_script: - cmake --build . --config %CONFIGURATION% -- /verbosity:minimal diff --git a/src/Makefile b/src/Makefile index c3660a20..4741e722 100644 --- a/src/Makefile +++ b/src/Makefile @@ -38,11 +38,12 @@ PGOBENCH = ./$(EXE) bench ### Source and object files SRCS = benchmark.cpp bitbase.cpp bitboard.cpp endgame.cpp evaluate.cpp main.cpp \ material.cpp misc.cpp movegen.cpp movepick.cpp pawns.cpp position.cpp psqt.cpp \ - search.cpp thread.cpp timeman.cpp tt.cpp uci.cpp ucioption.cpp tune.cpp syzygy/tbprobe.cpp + search.cpp thread.cpp timeman.cpp tt.cpp uci.cpp ucioption.cpp tune.cpp syzygy/tbprobe.cpp \ + nnue/evaluate_nnue.cpp nnue/features/half_kp.cpp OBJS = $(notdir $(SRCS:.cpp=.o)) -VPATH = syzygy +VPATH = syzygy:nnue:nnue/features ### Establish the operating system name KERNEL = $(shell uname -s) @@ -67,7 +68,14 @@ endif # prefetch = yes/no --- -DUSE_PREFETCH --- Use prefetch asm-instruction # popcnt = yes/no --- -DUSE_POPCNT --- Use popcnt asm-instruction # sse = yes/no --- -msse --- Use Intel Streaming SIMD Extensions +# sse3 = yes/no --- -msse3 --- Use Intel Streaming SIMD Extensions 3 +# ssse3 = yes/no --- -mssse3 --- Use Intel Supplemental Streaming SIMD Extensions 3 +# sse41 = yes/no --- -msse4.1 --- Use Intel Streaming SIMD Extensions 4.1 +# sse42 = yes/no --- -msse4.2 --- Use Intel Streaming SIMD Extensions 4.2 +# avx2 = yes/no --- -mavx2 --- Use Intel Advanced Vector Extensions 2 # pext = yes/no --- -DUSE_PEXT --- Use pext x86_64 asm-instruction +# avx512 = yes/no --- -mavx512bw --- Use Intel Advanced Vector Extensions 512 +# neon = yes/no --- -DUSE_NEON --- Use ARM SIMD architecture # # Note that Makefile is space sensitive, so when adding new architectures # or modifying existing flags, you have to make sure there are no extra spaces @@ -81,7 +89,15 @@ bits = 64 prefetch = no popcnt = no sse = no +sse3 = no +ssse3 = no +sse41 = no +sse42 = no +avx2 = no pext = no +avx512 = no +neon = no +ARCH = x86-64-modern ### 2.2 Architecture specific ifeq ($(ARCH),general-32) @@ -111,11 +127,70 @@ ifeq ($(ARCH),x86-64) sse = yes endif +ifeq ($(ARCH),x86-64-sse3) + arch = x86_64 + prefetch = yes + sse = yes + sse3 = yes +endif + +ifeq ($(ARCH),x86-64-sse3-popcnt) + arch = x86_64 + prefetch = yes + sse = yes + sse3 = yes + popcnt = yes +endif + +ifeq ($(ARCH),x86-64-ssse3) + arch = x86_64 + prefetch = yes + sse = yes + sse3 = yes + ssse3 = yes +endif + +ifeq ($(ARCH),x86-64-sse41) + arch = x86_64 + prefetch = yes + popcnt = yes + sse = yes + sse3 = yes + ssse3 = yes + sse41 = yes +endif + ifeq ($(ARCH),x86-64-modern) arch = x86_64 prefetch = yes popcnt = yes sse = yes + sse3 = yes + ssse3 = yes + sse41 = yes +endif + +ifeq ($(ARCH),x86-64-sse42) + arch = x86_64 + prefetch = yes + popcnt = yes + sse = yes + sse3 = yes + ssse3 = yes + sse41 = yes + sse42 = yes +endif + +ifeq ($(ARCH),x86-64-avx2) + arch = x86_64 + prefetch = yes + popcnt = yes + sse = yes + sse3 = yes + ssse3 = yes + sse41 = yes + sse42 = yes + avx2 = yes endif ifeq ($(ARCH),x86-64-bmi2) @@ -123,9 +198,28 @@ ifeq ($(ARCH),x86-64-bmi2) prefetch = yes popcnt = yes sse = yes + sse3 = yes + ssse3 = yes + sse41 = yes + sse42 = yes + avx2 = yes pext = yes endif +ifeq ($(ARCH),x86-64-avx512) + arch = x86_64 + prefetch = yes + popcnt = yes + sse = yes + sse3 = yes + ssse3 = yes + sse41 = yes + sse42 = yes + avx2 = yes + pext = yes + avx512 = yes +endif + ifeq ($(ARCH),armv7) arch = armv7 prefetch = yes @@ -136,6 +230,14 @@ ifeq ($(ARCH),armv8) arch = armv8-a prefetch = yes popcnt = yes + neon = yes +endif + +ifeq ($(ARCH),apple-silicon) + arch = arm64 + prefetch = yes + popcnt = yes + neon = yes endif ifeq ($(ARCH),ppc-32) @@ -154,8 +256,8 @@ endif ### ========================================================================== ### 3.1 Selecting compiler (default = gcc) -CXXFLAGS += -Wall -Wcast-qual -fno-exceptions -std=c++11 $(EXTRACXXFLAGS) -DEPENDFLAGS += -std=c++11 +CXXFLAGS += -Wall -Wcast-qual -fno-exceptions -std=c++17 $(EXTRACXXFLAGS) +DEPENDFLAGS += -std=c++17 LDFLAGS += $(EXTRALDFLAGS) ifeq ($(COMP),) @@ -249,8 +351,8 @@ endif endif ifeq ($(KERNEL),Darwin) - CXXFLAGS += -arch $(arch) -mmacosx-version-min=10.9 - LDFLAGS += -arch $(arch) -mmacosx-version-min=10.9 + CXXFLAGS += -arch $(arch) -mmacosx-version-min=10.15 + LDFLAGS += -arch $(arch) -mmacosx-version-min=10.15 endif ### Travis CI script uses COMPILER to overwrite CXX @@ -283,8 +385,8 @@ endif ### 3.2.2 Debugging with undefined behavior sanitizers ifneq ($(sanitize),no) - CXXFLAGS += -g3 -fsanitize=$(sanitize) -fuse-ld=gold - LDFLAGS += -fsanitize=$(sanitize) -fuse-ld=gold + CXXFLAGS += -g3 -fsanitize=$(sanitize) + LDFLAGS += -fsanitize=$(sanitize) endif ### 3.3 Optimization @@ -322,7 +424,7 @@ endif ### 3.6 popcnt ifeq ($(popcnt),yes) - ifeq ($(arch),$(filter $(arch),ppc64 armv8-a)) + ifeq ($(arch),$(filter $(arch),ppc64 armv8-a arm64)) CXXFLAGS += -DUSE_POPCNT else ifeq ($(comp),icc) CXXFLAGS += -msse3 -DUSE_POPCNT @@ -331,11 +433,61 @@ ifeq ($(popcnt),yes) endif endif +ifeq ($(avx2),yes) + CXXFLAGS += -DUSE_AVX2 + ifeq ($(comp),$(filter $(comp),gcc clang mingw)) + CXXFLAGS += -mavx2 + endif +endif + +ifeq ($(avx512),yes) + CXXFLAGS += -DUSE_AVX512 + ifeq ($(comp),$(filter $(comp),gcc clang mingw)) + CXXFLAGS += -mavx512bw + endif +endif + +ifeq ($(sse42),yes) + CXXFLAGS += -DUSE_SSE42 + ifeq ($(comp),$(filter $(comp),gcc clang mingw)) + CXXFLAGS += -msse4.2 + endif +endif + +ifeq ($(sse41),yes) + CXXFLAGS += -DUSE_SSE41 + ifeq ($(comp),$(filter $(comp),gcc clang mingw)) + CXXFLAGS += -msse4.1 + endif +endif + +ifeq ($(ssse3),yes) + CXXFLAGS += -DUSE_SSSE3 + ifeq ($(comp),$(filter $(comp),gcc clang mingw)) + CXXFLAGS += -mssse3 + endif +endif + +ifeq ($(sse3),yes) + CXXFLAGS += -DUSE_SSE3 + ifeq ($(comp),$(filter $(comp),gcc clang mingw)) + CXXFLAGS += -msse3 + endif +endif + +ifeq ($(neon),yes) + CXXFLAGS += -DUSE_NEON +endif + +ifeq ($(arch),x86_64) + CXXFLAGS += -DUSE_SSE2 +endif + ### 3.7 pext ifeq ($(pext),yes) CXXFLAGS += -DUSE_PEXT ifeq ($(comp),$(filter $(comp),gcc clang mingw)) - CXXFLAGS += -msse4 -mbmi2 + CXXFLAGS += -mbmi2 endif endif @@ -381,15 +533,23 @@ help: @echo "Supported targets:" @echo "" @echo "build > Standard build" - @echo "profile-build > PGO build" + @echo "profile-build > Standard build with PGO" @echo "strip > Strip executable" @echo "install > Install executable" @echo "clean > Clean up" + @echo "net > Download the default nnue net" @echo "" @echo "Supported archs:" @echo "" - @echo "x86-64-bmi2 > x86 64-bit with pext support (also enables SSE4)" - @echo "x86-64-modern > x86 64-bit with popcnt support (also enables SSE3)" + @echo "x86-64-avx512 > x86 64-bit with avx512 support" + @echo "x86-64-bmi2 > x86 64-bit with bmi2 support" + @echo "x86-64-avx2 > x86 64-bit with avx2 support" + @echo "x86-64-sse42 > x86 64-bit with sse42 support" + @echo "x86-64-modern > x86 64-bit with sse41 support (x86-64-sse41)" + @echo "x86-64-sse41 > x86 64-bit with sse41 support" + @echo "x86-64-ssse3 > x86 64-bit with ssse3 support" + @echo "x86-64-sse3-popcnt > x86 64-bit with sse3 and popcnt support" + @echo "x86-64-sse3 > x86 64-bit with sse3 support" @echo "x86-64 > x86 64-bit generic" @echo "x86-32 > x86 32-bit (also enables SSE)" @echo "x86-32-old > x86 32-bit fall back for old hardware" @@ -397,6 +557,7 @@ help: @echo "ppc-32 > PPC 32-bit" @echo "armv7 > ARMv7 32-bit" @echo "armv8 > ARMv8 64-bit" + @echo "apple-silicon > Apple silicon ARM64" @echo "general-64 > unspecified 64-bit" @echo "general-32 > unspecified 32-bit" @echo "" @@ -409,17 +570,20 @@ help: @echo "" @echo "Simple examples. If you don't know what to do, you likely want to run: " @echo "" - @echo "make build ARCH=x86-64 (This is for 64-bit systems)" - @echo "make build ARCH=x86-32 (This is for 32-bit systems)" + @echo "make -j build ARCH=x86-64 (This is for 64-bit systems)" + @echo "make -j build ARCH=x86-32 (This is for 32-bit systems)" @echo "" @echo "Advanced examples, for experienced users: " @echo "" - @echo "make build ARCH=x86-64 COMP=clang" - @echo "make profile-build ARCH=x86-64-bmi2 COMP=gcc COMPCXX=g++-4.8" + @echo "make -j build ARCH=x86-64-modern COMP=clang" + @echo "make -j profile-build ARCH=x86-64-bmi2 COMP=gcc COMPCXX=g++-4.8" @echo "" + @echo "The selected architecture $(ARCH) enables the following configuration: " + @echo "" + @$(MAKE) ARCH=$(ARCH) COMP=$(COMP) config-sanity -.PHONY: help build profile-build strip install clean objclean profileclean \ +.PHONY: help build profile-build strip install clean net objclean profileclean \ config-sanity icc-profile-use icc-profile-make gcc-profile-use gcc-profile-make \ clang-profile-use clang-profile-make @@ -453,14 +617,21 @@ install: clean: objclean profileclean @rm -f .depend *~ core +net: + $(eval nnuenet := $(shell grep EvalFile ucioption.cpp | grep Option | sed 's/.*\(nn-[a-z0-9]\{12\}.nnue\).*/\1/')) + @echo "Default net: $(nnuenet)" + $(eval nnuedownloadurl := https://tests.stockfishchess.org/api/nn/$(nnuenet)) + $(eval curl_or_wget := $(shell if hash curl 2>/dev/null; then echo "curl -sL"; elif hash wget 2>/dev/null; then echo "wget -qO-"; fi)) + @if test -f "$(nnuenet)"; then echo "Already available."; else echo "Downloading $(nnuedownloadurl)"; $(curl_or_wget) $(nnuedownloadurl) > $(nnuenet); fi + # clean binaries and objects objclean: - @rm -f $(EXE) *.o ./syzygy/*.o + @rm -f $(EXE) *.o ./syzygy/*.o ./nnue/*.o ./nnue/features/*.o # clean auxiliary profiling files profileclean: @rm -rf profdir - @rm -f bench.txt *.gcda *.gcno + @rm -f bench.txt *.gcda *.gcno ./syzygy/*.gcda ./nnue/*.gcda ./nnue/features/*.gcda @rm -f stockfish.profdata *.profraw default: @@ -485,7 +656,14 @@ config-sanity: @echo "prefetch: '$(prefetch)'" @echo "popcnt: '$(popcnt)'" @echo "sse: '$(sse)'" + @echo "sse3: '$(sse3)'" + @echo "ssse3: '$(ssse3)'" + @echo "sse41: '$(sse41)'" + @echo "sse42: '$(sse42)'" + @echo "avx2: '$(avx2)'" @echo "pext: '$(pext)'" + @echo "avx512: '$(avx512)'" + @echo "neon: '$(neon)'" @echo "" @echo "Flags:" @echo "CXX: $(CXX)" @@ -499,12 +677,19 @@ config-sanity: @test "$(optimize)" = "yes" || test "$(optimize)" = "no" @test "$(arch)" = "any" || test "$(arch)" = "x86_64" || test "$(arch)" = "i386" || \ test "$(arch)" = "ppc64" || test "$(arch)" = "ppc" || \ - test "$(arch)" = "armv7" || test "$(arch)" = "armv8-a" + test "$(arch)" = "armv7" || test "$(arch)" = "armv8-a" || test "$(arch)" = "arm64" @test "$(bits)" = "32" || test "$(bits)" = "64" @test "$(prefetch)" = "yes" || test "$(prefetch)" = "no" @test "$(popcnt)" = "yes" || test "$(popcnt)" = "no" @test "$(sse)" = "yes" || test "$(sse)" = "no" + @test "$(sse3)" = "yes" || test "$(sse3)" = "no" + @test "$(ssse3)" = "yes" || test "$(ssse3)" = "no" + @test "$(sse41)" = "yes" || test "$(sse41)" = "no" + @test "$(sse42)" = "yes" || test "$(sse42)" = "no" + @test "$(avx2)" = "yes" || test "$(avx2)" = "no" @test "$(pext)" = "yes" || test "$(pext)" = "no" + @test "$(avx512)" = "yes" || test "$(avx512)" = "no" + @test "$(neon)" = "yes" || test "$(neon)" = "no" @test "$(comp)" = "gcc" || test "$(comp)" = "icc" || test "$(comp)" = "mingw" || test "$(comp)" = "clang" $(EXE): $(OBJS) diff --git a/src/benchmark.cpp b/src/benchmark.cpp index 3299f373..6041d642 100644 --- a/src/benchmark.cpp +++ b/src/benchmark.cpp @@ -1,8 +1,6 @@ /* Stockfish, a UCI chess playing engine derived from Glaurung 2.1 - Copyright (C) 2004-2008 Tord Romstad (Glaurung author) - Copyright (C) 2008-2015 Marco Costalba, Joona Kiiski, Tord Romstad - Copyright (C) 2015-2020 Marco Costalba, Joona Kiiski, Gary Linscott, Tord Romstad + Copyright (C) 2004-2020 The Stockfish developers (see AUTHORS file) Stockfish is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by diff --git a/src/bitbase.cpp b/src/bitbase.cpp index 7e27eb96..bbe8e9a7 100644 --- a/src/bitbase.cpp +++ b/src/bitbase.cpp @@ -1,8 +1,6 @@ /* Stockfish, a UCI chess playing engine derived from Glaurung 2.1 - Copyright (C) 2004-2008 Tord Romstad (Glaurung author) - Copyright (C) 2008-2015 Marco Costalba, Joona Kiiski, Tord Romstad - Copyright (C) 2015-2020 Marco Costalba, Joona Kiiski, Gary Linscott, Tord Romstad + Copyright (C) 2004-2020 The Stockfish developers (see AUTHORS file) Stockfish is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by diff --git a/src/bitboard.cpp b/src/bitboard.cpp index 0bf7eef9..f531010c 100644 --- a/src/bitboard.cpp +++ b/src/bitboard.cpp @@ -1,8 +1,6 @@ /* Stockfish, a UCI chess playing engine derived from Glaurung 2.1 - Copyright (C) 2004-2008 Tord Romstad (Glaurung author) - Copyright (C) 2008-2015 Marco Costalba, Joona Kiiski, Tord Romstad - Copyright (C) 2015-2020 Marco Costalba, Joona Kiiski, Gary Linscott, Tord Romstad + Copyright (C) 2004-2020 The Stockfish developers (see AUTHORS file) Stockfish is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by diff --git a/src/bitboard.h b/src/bitboard.h index 8c95de8c..a899d879 100644 --- a/src/bitboard.h +++ b/src/bitboard.h @@ -1,8 +1,6 @@ /* Stockfish, a UCI chess playing engine derived from Glaurung 2.1 - Copyright (C) 2004-2008 Tord Romstad (Glaurung author) - Copyright (C) 2008-2015 Marco Costalba, Joona Kiiski, Tord Romstad - Copyright (C) 2015-2020 Marco Costalba, Joona Kiiski, Gary Linscott, Tord Romstad + Copyright (C) 2004-2020 The Stockfish developers (see AUTHORS file) Stockfish is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by diff --git a/src/endgame.cpp b/src/endgame.cpp index a8ceb648..c8be2198 100644 --- a/src/endgame.cpp +++ b/src/endgame.cpp @@ -1,8 +1,6 @@ /* Stockfish, a UCI chess playing engine derived from Glaurung 2.1 - Copyright (C) 2004-2008 Tord Romstad (Glaurung author) - Copyright (C) 2008-2015 Marco Costalba, Joona Kiiski, Tord Romstad - Copyright (C) 2015-2020 Marco Costalba, Joona Kiiski, Gary Linscott, Tord Romstad + Copyright (C) 2004-2020 The Stockfish developers (see AUTHORS file) Stockfish is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by diff --git a/src/endgame.h b/src/endgame.h index fd1aba2d..1351d88a 100644 --- a/src/endgame.h +++ b/src/endgame.h @@ -1,8 +1,6 @@ /* Stockfish, a UCI chess playing engine derived from Glaurung 2.1 - Copyright (C) 2004-2008 Tord Romstad (Glaurung author) - Copyright (C) 2008-2015 Marco Costalba, Joona Kiiski, Tord Romstad - Copyright (C) 2015-2020 Marco Costalba, Joona Kiiski, Gary Linscott, Tord Romstad + Copyright (C) 2004-2020 The Stockfish developers (see AUTHORS file) Stockfish is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by diff --git a/src/evaluate.cpp b/src/evaluate.cpp index b34d82f6..f43c62d6 100644 --- a/src/evaluate.cpp +++ b/src/evaluate.cpp @@ -1,8 +1,6 @@ /* Stockfish, a UCI chess playing engine derived from Glaurung 2.1 - Copyright (C) 2004-2008 Tord Romstad (Glaurung author) - Copyright (C) 2008-2015 Marco Costalba, Joona Kiiski, Tord Romstad - Copyright (C) 2015-2020 Marco Costalba, Joona Kiiski, Gary Linscott, Tord Romstad + Copyright (C) 2004-2020 The Stockfish developers (see AUTHORS file) Stockfish is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -20,15 +18,50 @@ #include #include +#include #include // For std::memset #include #include +#include #include "bitboard.h" #include "evaluate.h" #include "material.h" #include "pawns.h" #include "thread.h" +#include "uci.h" + +namespace Eval { + + bool useNNUE; + std::string eval_file_loaded="None"; + + void init_NNUE() { + + useNNUE = Options["Use NNUE"]; + std::string eval_file = std::string(Options["EvalFile"]); + if (useNNUE && eval_file_loaded != eval_file) + if (Eval::NNUE::load_eval_file(eval_file)) + eval_file_loaded = eval_file; + } + + void verify_NNUE() { + + std::string eval_file = std::string(Options["EvalFile"]); + if (useNNUE && eval_file_loaded != eval_file) + { + std::cerr << "Use of NNUE evaluation, but the file " << eval_file << " was not loaded successfully. " + << "These network evaluation parameters must be available, compatible with this version of the code. " + << "The UCI option EvalFile might need to specify the full path, including the directory/folder name, to the file." << std::endl; + std::exit(EXIT_FAILURE); + } + + if (useNNUE) + sync_cout << "info string NNUE evaluation using " << eval_file << " enabled." << sync_endl; + else + sync_cout << "info string classical evaluation enabled." << sync_endl; + } +} namespace Trace { @@ -906,47 +939,62 @@ make_v: /// evaluation of the position from the point of view of the side to move. Value Eval::evaluate(const Position& pos) { - return Evaluation(pos).value(); -} + if (Eval::useNNUE) + return NNUE::evaluate(pos); + else + return Evaluation(pos).value(); +} /// trace() is like evaluate(), but instead of returning a value, it returns /// a string (suitable for outputting to stdout) that contains the detailed /// descriptions and values of each evaluation term. Useful for debugging. +/// Trace scores are from white's point of view std::string Eval::trace(const Position& pos) { if (pos.checkers()) - return "Total evaluation: none (in check)"; - - std::memset(scores, 0, sizeof(scores)); - - pos.this_thread()->contempt = SCORE_ZERO; // Reset any dynamic contempt - - Value v = Evaluation(pos).value(); - - v = pos.side_to_move() == WHITE ? v : -v; // Trace scores are from white's point of view + return "Final evaluation: none (in check)"; std::stringstream ss; - ss << std::showpoint << std::noshowpos << std::fixed << std::setprecision(2) - << " Term | White | Black | Total \n" - << " | MG EG | MG EG | MG EG \n" - << " ------------+-------------+-------------+------------\n" - << " Material | " << Term(MATERIAL) - << " Imbalance | " << Term(IMBALANCE) - << " Pawns | " << Term(PAWN) - << " Knights | " << Term(KNIGHT) - << " Bishops | " << Term(BISHOP) - << " Rooks | " << Term(ROOK) - << " Queens | " << Term(QUEEN) - << " Mobility | " << Term(MOBILITY) - << " King safety | " << Term(KING) - << " Threats | " << Term(THREAT) - << " Passed | " << Term(PASSED) - << " Space | " << Term(SPACE) - << " Winnable | " << Term(WINNABLE) - << " ------------+-------------+-------------+------------\n" - << " Total | " << Term(TOTAL); + ss << std::showpoint << std::noshowpos << std::fixed << std::setprecision(2); + + Value v; + + if (Eval::useNNUE) + { + v = NNUE::evaluate(pos); + } + else + { + std::memset(scores, 0, sizeof(scores)); + + pos.this_thread()->contempt = SCORE_ZERO; // Reset any dynamic contempt + + v = Evaluation(pos).value(); + + ss << std::showpoint << std::noshowpos << std::fixed << std::setprecision(2) + << " Term | White | Black | Total \n" + << " | MG EG | MG EG | MG EG \n" + << " ------------+-------------+-------------+------------\n" + << " Material | " << Term(MATERIAL) + << " Imbalance | " << Term(IMBALANCE) + << " Pawns | " << Term(PAWN) + << " Knights | " << Term(KNIGHT) + << " Bishops | " << Term(BISHOP) + << " Rooks | " << Term(ROOK) + << " Queens | " << Term(QUEEN) + << " Mobility | " << Term(MOBILITY) + << " King safety | " << Term(KING) + << " Threats | " << Term(THREAT) + << " Passed | " << Term(PASSED) + << " Space | " << Term(SPACE) + << " Winnable | " << Term(WINNABLE) + << " ------------+-------------+-------------+------------\n" + << " Total | " << Term(TOTAL); + } + + v = pos.side_to_move() == WHITE ? v : -v; ss << "\nFinal evaluation: " << to_cp(v) << " (white side)\n"; diff --git a/src/evaluate.h b/src/evaluate.h index 7c8a2a6f..e808068d 100644 --- a/src/evaluate.h +++ b/src/evaluate.h @@ -1,8 +1,6 @@ /* Stockfish, a UCI chess playing engine derived from Glaurung 2.1 - Copyright (C) 2004-2008 Tord Romstad (Glaurung author) - Copyright (C) 2008-2015 Marco Costalba, Joona Kiiski, Tord Romstad - Copyright (C) 2015-2020 Marco Costalba, Joona Kiiski, Gary Linscott, Tord Romstad + Copyright (C) 2004-2020 The Stockfish developers (see AUTHORS file) Stockfish is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -29,9 +27,23 @@ class Position; namespace Eval { -std::string trace(const Position& pos); + std::string trace(const Position& pos); + Value evaluate(const Position& pos); -Value evaluate(const Position& pos); -} + extern bool useNNUE; + extern std::string eval_file_loaded; + void init_NNUE(); + void verify_NNUE(); + + namespace NNUE { + + Value evaluate(const Position& pos); + Value compute_eval(const Position& pos); + void update_eval(const Position& pos); + bool load_eval_file(const std::string& evalFile); + + } // namespace NNUE + +} // namespace Eval #endif // #ifndef EVALUATE_H_INCLUDED diff --git a/src/main.cpp b/src/main.cpp index fafefee2..fbad6622 100644 --- a/src/main.cpp +++ b/src/main.cpp @@ -1,8 +1,6 @@ /* Stockfish, a UCI chess playing engine derived from Glaurung 2.1 - Copyright (C) 2004-2008 Tord Romstad (Glaurung author) - Copyright (C) 2008-2015 Marco Costalba, Joona Kiiski, Tord Romstad - Copyright (C) 2015-2020 Marco Costalba, Joona Kiiski, Gary Linscott, Tord Romstad + Copyright (C) 2004-2020 The Stockfish developers (see AUTHORS file) Stockfish is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -46,6 +44,7 @@ int main(int argc, char* argv[]) { Endgames::init(); Threads.set(size_t(Options["Threads"])); Search::clear(); // After threads are up + Eval::init_NNUE(); UCI::loop(argc, argv); diff --git a/src/material.cpp b/src/material.cpp index bb25d3ca..0ef9926f 100644 --- a/src/material.cpp +++ b/src/material.cpp @@ -1,8 +1,6 @@ /* Stockfish, a UCI chess playing engine derived from Glaurung 2.1 - Copyright (C) 2004-2008 Tord Romstad (Glaurung author) - Copyright (C) 2008-2015 Marco Costalba, Joona Kiiski, Tord Romstad - Copyright (C) 2015-2020 Marco Costalba, Joona Kiiski, Gary Linscott, Tord Romstad + Copyright (C) 2004-2020 The Stockfish developers (see AUTHORS file) Stockfish is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by diff --git a/src/material.h b/src/material.h index 21647f23..80d01655 100644 --- a/src/material.h +++ b/src/material.h @@ -1,8 +1,6 @@ /* Stockfish, a UCI chess playing engine derived from Glaurung 2.1 - Copyright (C) 2004-2008 Tord Romstad (Glaurung author) - Copyright (C) 2008-2015 Marco Costalba, Joona Kiiski, Tord Romstad - Copyright (C) 2015-2020 Marco Costalba, Joona Kiiski, Gary Linscott, Tord Romstad + Copyright (C) 2004-2020 The Stockfish developers (see AUTHORS file) Stockfish is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by diff --git a/src/misc.cpp b/src/misc.cpp index 2bc05c5b..3d7c75e5 100644 --- a/src/misc.cpp +++ b/src/misc.cpp @@ -1,8 +1,6 @@ /* Stockfish, a UCI chess playing engine derived from Glaurung 2.1 - Copyright (C) 2004-2008 Tord Romstad (Glaurung author) - Copyright (C) 2008-2015 Marco Costalba, Joona Kiiski, Tord Romstad - Copyright (C) 2015-2020 Marco Costalba, Joona Kiiski, Gary Linscott, Tord Romstad + Copyright (C) 2004-2020 The Stockfish developers (see AUTHORS file) Stockfish is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -46,6 +44,7 @@ typedef bool(*fun3_t)(HANDLE, CONST GROUP_AFFINITY*, PGROUP_AFFINITY); #include #include #include +#include #if defined(__linux__) && !defined(__ANDROID__) #include @@ -147,10 +146,8 @@ const string engine_info(bool to_uci) { ss << setw(2) << day << setw(2) << (1 + months.find(month) / 4) << year.substr(2); } - ss << (Is64Bit ? " 64" : "") - << (HasPext ? " BMI2" : (HasPopCnt ? " POPCNT" : "")) - << (to_uci ? "\nid author ": " by ") - << "T. Romstad, M. Costalba, J. Kiiski, G. Linscott"; + ss << (to_uci ? "\nid author ": " by ") + << "the Stockfish developers (see AUTHORS file)"; return ss.str(); } @@ -215,7 +212,33 @@ const std::string compiler_info() { compiler += " on unknown system"; #endif - compiler += "\n __VERSION__ macro expands to: "; + compiler += "\nCompilation settings include: "; + compiler += (Is64Bit ? " 64bit" : " 32bit"); + #if defined(USE_AVX512) + compiler += " AVX512"; + #endif + #if defined(USE_AVX2) + compiler += " AVX2"; + #endif + #if defined(USE_SSE42) + compiler += " SSE42"; + #endif + #if defined(USE_SSE41) + compiler += " SSE41"; + #endif + #if defined(USE_SSSE3) + compiler += " SSSE3"; + #endif + #if defined(USE_SSE3) + compiler += " SSE3"; + #endif + compiler += (HasPext ? " BMI2" : ""); + compiler += (HasPopCnt ? " POPCNT" : ""); + #if !defined(NDEBUG) + compiler += " DEBUG"; + #endif + + compiler += "\n__VERSION__ macro expands to: "; #ifdef __VERSION__ compiler += __VERSION__; #else @@ -293,6 +316,29 @@ void prefetch(void* addr) { #endif +/// Wrappers for systems where the c++17 implementation doesn't guarantee the availability of aligned_alloc. +/// Memory allocated with std_aligned_alloc must be freed with std_aligned_free. +/// + +void* std_aligned_alloc(size_t alignment, size_t size) { +#if defined(__APPLE__) + return aligned_alloc(alignment, size); +#elif defined(_WIN32) + return _mm_malloc(size, alignment); +#else + return std::aligned_alloc(alignment, size); +#endif +} + +void std_aligned_free(void* ptr) { +#if defined(__APPLE__) + free(ptr); +#elif defined(_WIN32) + _mm_free(ptr); +#else + free(ptr); +#endif +} /// aligned_ttmem_alloc() will return suitably aligned memory, and if possible use large pages. /// The returned pointer is the aligned one, while the mem argument is the one that needs diff --git a/src/misc.h b/src/misc.h index 373f1b77..eb4e05c0 100644 --- a/src/misc.h +++ b/src/misc.h @@ -1,8 +1,6 @@ /* Stockfish, a UCI chess playing engine derived from Glaurung 2.1 - Copyright (C) 2004-2008 Tord Romstad (Glaurung author) - Copyright (C) 2008-2015 Marco Costalba, Joona Kiiski, Tord Romstad - Copyright (C) 2015-2020 Marco Costalba, Joona Kiiski, Gary Linscott, Tord Romstad + Copyright (C) 2004-2020 The Stockfish developers (see AUTHORS file) Stockfish is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -33,6 +31,8 @@ const std::string engine_info(bool to_uci = false); const std::string compiler_info(); void prefetch(void* addr); void start_logger(const std::string& fname); +void* std_aligned_alloc(size_t alignment, size_t size); +void std_aligned_free(void* ptr); void* aligned_ttmem_alloc(size_t size, void*& mem); void aligned_ttmem_free(void* mem); // nop if mem == nullptr diff --git a/src/movegen.cpp b/src/movegen.cpp index 4ff12fc6..d74df4c3 100644 --- a/src/movegen.cpp +++ b/src/movegen.cpp @@ -1,8 +1,6 @@ /* Stockfish, a UCI chess playing engine derived from Glaurung 2.1 - Copyright (C) 2004-2008 Tord Romstad (Glaurung author) - Copyright (C) 2008-2015 Marco Costalba, Joona Kiiski, Tord Romstad - Copyright (C) 2015-2020 Marco Costalba, Joona Kiiski, Gary Linscott, Tord Romstad + Copyright (C) 2004-2020 The Stockfish developers (see AUTHORS file) Stockfish is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by diff --git a/src/movegen.h b/src/movegen.h index c2e7c3f1..fb616d00 100644 --- a/src/movegen.h +++ b/src/movegen.h @@ -1,8 +1,6 @@ /* Stockfish, a UCI chess playing engine derived from Glaurung 2.1 - Copyright (C) 2004-2008 Tord Romstad (Glaurung author) - Copyright (C) 2008-2015 Marco Costalba, Joona Kiiski, Tord Romstad - Copyright (C) 2015-2020 Marco Costalba, Joona Kiiski, Gary Linscott, Tord Romstad + Copyright (C) 2004-2020 The Stockfish developers (see AUTHORS file) Stockfish is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by diff --git a/src/movepick.cpp b/src/movepick.cpp index 5775f810..96a44449 100644 --- a/src/movepick.cpp +++ b/src/movepick.cpp @@ -1,8 +1,6 @@ /* Stockfish, a UCI chess playing engine derived from Glaurung 2.1 - Copyright (C) 2004-2008 Tord Romstad (Glaurung author) - Copyright (C) 2008-2015 Marco Costalba, Joona Kiiski, Tord Romstad - Copyright (C) 2015-2020 Marco Costalba, Joona Kiiski, Gary Linscott, Tord Romstad + Copyright (C) 2004-2020 The Stockfish developers (see AUTHORS file) Stockfish is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by diff --git a/src/movepick.h b/src/movepick.h index aaff388f..f080935a 100644 --- a/src/movepick.h +++ b/src/movepick.h @@ -1,8 +1,6 @@ /* Stockfish, a UCI chess playing engine derived from Glaurung 2.1 - Copyright (C) 2004-2008 Tord Romstad (Glaurung author) - Copyright (C) 2008-2015 Marco Costalba, Joona Kiiski, Tord Romstad - Copyright (C) 2015-2020 Marco Costalba, Joona Kiiski, Gary Linscott, Tord Romstad + Copyright (C) 2004-2020 The Stockfish developers (see AUTHORS file) Stockfish is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by diff --git a/src/nnue/architectures/halfkp_256x2-32-32.h b/src/nnue/architectures/halfkp_256x2-32-32.h new file mode 100644 index 00000000..9216bd41 --- /dev/null +++ b/src/nnue/architectures/halfkp_256x2-32-32.h @@ -0,0 +1,54 @@ +/* + Stockfish, a UCI chess playing engine derived from Glaurung 2.1 + Copyright (C) 2004-2020 The Stockfish developers (see AUTHORS file) + + Stockfish is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + Stockfish is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see . +*/ + +// Definition of input features and network structure used in NNUE evaluation function + +#ifndef NNUE_HALFKP_256X2_32_32_H_INCLUDED +#define NNUE_HALFKP_256X2_32_32_H_INCLUDED + +#include "../features/feature_set.h" +#include "../features/half_kp.h" + +#include "../layers/input_slice.h" +#include "../layers/affine_transform.h" +#include "../layers/clipped_relu.h" + +namespace Eval::NNUE { + +// Input features used in evaluation function +using RawFeatures = Features::FeatureSet< + Features::HalfKP>; + +// Number of input feature dimensions after conversion +constexpr IndexType kTransformedFeatureDimensions = 256; + +namespace Layers { + +// Define network structure +using InputLayer = InputSlice; +using HiddenLayer1 = ClippedReLU>; +using HiddenLayer2 = ClippedReLU>; +using OutputLayer = AffineTransform; + +} // namespace Layers + +using Network = Layers::OutputLayer; + +} // namespace Eval::NNUE + +#endif // #ifndef NNUE_HALFKP_256X2_32_32_H_INCLUDED diff --git a/src/nnue/evaluate_nnue.cpp b/src/nnue/evaluate_nnue.cpp new file mode 100644 index 00000000..af0894b2 --- /dev/null +++ b/src/nnue/evaluate_nnue.cpp @@ -0,0 +1,178 @@ +/* + Stockfish, a UCI chess playing engine derived from Glaurung 2.1 + Copyright (C) 2004-2020 The Stockfish developers (see AUTHORS file) + + Stockfish is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + Stockfish is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see . +*/ + +// Code for calculating NNUE evaluation function + +#include +#include +#include + +#include "../evaluate.h" +#include "../position.h" +#include "../misc.h" +#include "../uci.h" + +#include "evaluate_nnue.h" + +ExtPieceSquare kpp_board_index[PIECE_NB] = { + // convention: W - us, B - them + // viewed from other side, W and B are reversed + { PS_NONE, PS_NONE }, + { PS_W_PAWN, PS_B_PAWN }, + { PS_W_KNIGHT, PS_B_KNIGHT }, + { PS_W_BISHOP, PS_B_BISHOP }, + { PS_W_ROOK, PS_B_ROOK }, + { PS_W_QUEEN, PS_B_QUEEN }, + { PS_W_KING, PS_B_KING }, + { PS_NONE, PS_NONE }, + { PS_NONE, PS_NONE }, + { PS_B_PAWN, PS_W_PAWN }, + { PS_B_KNIGHT, PS_W_KNIGHT }, + { PS_B_BISHOP, PS_W_BISHOP }, + { PS_B_ROOK, PS_W_ROOK }, + { PS_B_QUEEN, PS_W_QUEEN }, + { PS_B_KING, PS_W_KING }, + { PS_NONE, PS_NONE } +}; + + +namespace Eval::NNUE { + + // Input feature converter + AlignedPtr feature_transformer; + + // Evaluation function + AlignedPtr network; + + // Evaluation function file name + std::string fileName; + + namespace Detail { + + // Initialize the evaluation function parameters + template + void Initialize(AlignedPtr& pointer) { + + pointer.reset(reinterpret_cast(std_aligned_alloc(alignof(T), sizeof(T)))); + std::memset(pointer.get(), 0, sizeof(T)); + } + + // Read evaluation function parameters + template + bool ReadParameters(std::istream& stream, const AlignedPtr& pointer) { + + std::uint32_t header; + stream.read(reinterpret_cast(&header), sizeof(header)); + if (!stream || header != T::GetHashValue()) return false; + return pointer->ReadParameters(stream); + } + + } // namespace Detail + + // Initialize the evaluation function parameters + void Initialize() { + + Detail::Initialize(feature_transformer); + Detail::Initialize(network); + } + + // Read network header + bool ReadHeader(std::istream& stream, + std::uint32_t* hash_value, std::string* architecture) { + + std::uint32_t version, size; + stream.read(reinterpret_cast(&version), sizeof(version)); + stream.read(reinterpret_cast(hash_value), sizeof(*hash_value)); + stream.read(reinterpret_cast(&size), sizeof(size)); + if (!stream || version != kVersion) return false; + architecture->resize(size); + stream.read(&(*architecture)[0], size); + return !stream.fail(); + } + + // Read network parameters + bool ReadParameters(std::istream& stream) { + + std::uint32_t hash_value; + std::string architecture; + if (!ReadHeader(stream, &hash_value, &architecture)) return false; + if (hash_value != kHashValue) return false; + if (!Detail::ReadParameters(stream, feature_transformer)) return false; + if (!Detail::ReadParameters(stream, network)) return false; + return stream && stream.peek() == std::ios::traits_type::eof(); + } + + // Proceed with the difference calculation if possible + static void UpdateAccumulatorIfPossible(const Position& pos) { + + feature_transformer->UpdateAccumulatorIfPossible(pos); + } + + // Calculate the evaluation value + static Value ComputeScore(const Position& pos, bool refresh) { + + auto& accumulator = pos.state()->accumulator; + if (!refresh && accumulator.computed_score) { + return accumulator.score; + } + + alignas(kCacheLineSize) TransformedFeatureType + transformed_features[FeatureTransformer::kBufferSize]; + feature_transformer->Transform(pos, transformed_features, refresh); + alignas(kCacheLineSize) char buffer[Network::kBufferSize]; + const auto output = network->Propagate(transformed_features, buffer); + + auto score = static_cast(output[0] / FV_SCALE); + + accumulator.score = score; + accumulator.computed_score = true; + return accumulator.score; + } + + // Load the evaluation function file + bool load_eval_file(const std::string& evalFile) { + + Initialize(); + fileName = evalFile; + + std::ifstream stream(evalFile, std::ios::binary); + + const bool result = ReadParameters(stream); + + return result; + } + + // Evaluation function. Perform differential calculation. + Value evaluate(const Position& pos) { + Value v = ComputeScore(pos, false); + v = Utility::clamp(v, VALUE_TB_LOSS_IN_MAX_PLY + 1, VALUE_TB_WIN_IN_MAX_PLY - 1); + + return v; + } + + // Evaluation function. Perform full calculation. + Value compute_eval(const Position& pos) { + return ComputeScore(pos, true); + } + + // Proceed with the difference calculation if possible + void update_eval(const Position& pos) { + UpdateAccumulatorIfPossible(pos); + } + +} // namespace Eval::NNUE diff --git a/src/nnue/evaluate_nnue.h b/src/nnue/evaluate_nnue.h new file mode 100644 index 00000000..5f0d1855 --- /dev/null +++ b/src/nnue/evaluate_nnue.h @@ -0,0 +1,48 @@ +/* + Stockfish, a UCI chess playing engine derived from Glaurung 2.1 + Copyright (C) 2004-2020 The Stockfish developers (see AUTHORS file) + + Stockfish is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + Stockfish is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see . +*/ + +// header used in NNUE evaluation function + +#ifndef NNUE_EVALUATE_NNUE_H_INCLUDED +#define NNUE_EVALUATE_NNUE_H_INCLUDED + +#include "nnue_feature_transformer.h" + +#include + +namespace Eval::NNUE { + + // Hash value of evaluation function structure + constexpr std::uint32_t kHashValue = + FeatureTransformer::GetHashValue() ^ Network::GetHashValue(); + + // Deleter for automating release of memory area + template + struct AlignedDeleter { + void operator()(T* ptr) const { + ptr->~T(); + std_aligned_free(ptr); + } + }; + + template + using AlignedPtr = std::unique_ptr>; + +} // namespace Eval::NNUE + +#endif // #ifndef NNUE_EVALUATE_NNUE_H_INCLUDED diff --git a/src/nnue/features/feature_set.h b/src/nnue/features/feature_set.h new file mode 100644 index 00000000..79ca83ae --- /dev/null +++ b/src/nnue/features/feature_set.h @@ -0,0 +1,135 @@ +/* + Stockfish, a UCI chess playing engine derived from Glaurung 2.1 + Copyright (C) 2004-2020 The Stockfish developers (see AUTHORS file) + + Stockfish is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + Stockfish is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see . +*/ + +// A class template that represents the input feature set of the NNUE evaluation function + +#ifndef NNUE_FEATURE_SET_H_INCLUDED +#define NNUE_FEATURE_SET_H_INCLUDED + +#include "features_common.h" +#include + +namespace Eval::NNUE::Features { + + // Class template that represents a list of values + template + struct CompileTimeList; + + template + struct CompileTimeList { + static constexpr bool Contains(T value) { + return value == First || CompileTimeList::Contains(value); + } + static constexpr std::array + kValues = {{First, Remaining...}}; + }; + + // Base class of feature set + template + class FeatureSetBase { + + public: + // Get a list of indices for active features + template + static void AppendActiveIndices( + const Position& pos, TriggerEvent trigger, IndexListType active[2]) { + + for (Color perspective : { WHITE, BLACK }) { + Derived::CollectActiveIndices( + pos, trigger, perspective, &active[perspective]); + } + } + + // Get a list of indices for recently changed features + template + static void AppendChangedIndices( + const PositionType& pos, TriggerEvent trigger, + IndexListType removed[2], IndexListType added[2], bool reset[2]) { + + const auto& dp = pos.state()->dirtyPiece; + if (dp.dirty_num == 0) return; + + for (Color perspective : { WHITE, BLACK }) { + reset[perspective] = false; + switch (trigger) { + case TriggerEvent::kFriendKingMoved: + reset[perspective] = + dp.pieceId[0] == PIECE_ID_KING + perspective; + break; + default: + assert(false); + break; + } + if (reset[perspective]) { + Derived::CollectActiveIndices( + pos, trigger, perspective, &added[perspective]); + } else { + Derived::CollectChangedIndices( + pos, trigger, perspective, + &removed[perspective], &added[perspective]); + } + } + } + }; + + // Class template that represents the feature set + template + class FeatureSet : public FeatureSetBase> { + + public: + // Hash value embedded in the evaluation file + static constexpr std::uint32_t kHashValue = FeatureType::kHashValue; + // Number of feature dimensions + static constexpr IndexType kDimensions = FeatureType::kDimensions; + // Maximum number of simultaneously active features + static constexpr IndexType kMaxActiveDimensions = + FeatureType::kMaxActiveDimensions; + // Trigger for full calculation instead of difference calculation + using SortedTriggerSet = + CompileTimeList; + static constexpr auto kRefreshTriggers = SortedTriggerSet::kValues; + + private: + // Get a list of indices for active features + static void CollectActiveIndices( + const Position& pos, const TriggerEvent trigger, const Color perspective, + IndexList* const active) { + if (FeatureType::kRefreshTrigger == trigger) { + FeatureType::AppendActiveIndices(pos, perspective, active); + } + } + + // Get a list of indices for recently changed features + static void CollectChangedIndices( + const Position& pos, const TriggerEvent trigger, const Color perspective, + IndexList* const removed, IndexList* const added) { + + if (FeatureType::kRefreshTrigger == trigger) { + FeatureType::AppendChangedIndices(pos, perspective, removed, added); + } + } + + // Make the base class and the class template that recursively uses itself a friend + friend class FeatureSetBase; + template + friend class FeatureSet; + }; + +} // namespace Eval::NNUE::Features + +#endif // #ifndef NNUE_FEATURE_SET_H_INCLUDED diff --git a/src/nnue/features/features_common.h b/src/nnue/features/features_common.h new file mode 100644 index 00000000..d00a35df --- /dev/null +++ b/src/nnue/features/features_common.h @@ -0,0 +1,45 @@ +/* + Stockfish, a UCI chess playing engine derived from Glaurung 2.1 + Copyright (C) 2004-2020 The Stockfish developers (see AUTHORS file) + + Stockfish is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + Stockfish is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see . +*/ + +//Common header of input features of NNUE evaluation function + +#ifndef NNUE_FEATURES_COMMON_H_INCLUDED +#define NNUE_FEATURES_COMMON_H_INCLUDED + +#include "../../evaluate.h" +#include "../nnue_common.h" + +namespace Eval::NNUE::Features { + + class IndexList; + + template + class FeatureSet; + + // Trigger to perform full calculations instead of difference only + enum class TriggerEvent { + kFriendKingMoved // calculate full evaluation when own king moves + }; + + enum class Side { + kFriend // side to move + }; + +} // namespace Eval::NNUE::Features + +#endif // #ifndef NNUE_FEATURES_COMMON_H_INCLUDED diff --git a/src/nnue/features/half_kp.cpp b/src/nnue/features/half_kp.cpp new file mode 100644 index 00000000..628add6e --- /dev/null +++ b/src/nnue/features/half_kp.cpp @@ -0,0 +1,92 @@ +/* + Stockfish, a UCI chess playing engine derived from Glaurung 2.1 + Copyright (C) 2004-2020 The Stockfish developers (see AUTHORS file) + + Stockfish is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + Stockfish is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see . +*/ + +//Definition of input features HalfKP of NNUE evaluation function + +#include "half_kp.h" +#include "index_list.h" + +namespace Eval::NNUE::Features { + + // Find the index of the feature quantity from the king position and PieceSquare + template + inline IndexType HalfKP::MakeIndex(Square sq_k, PieceSquare p) { + return static_cast(PS_END) * static_cast(sq_k) + p; + } + + // Get pieces information + template + inline void HalfKP::GetPieces( + const Position& pos, Color perspective, + PieceSquare** pieces, Square* sq_target_k) { + + *pieces = (perspective == BLACK) ? + pos.eval_list()->piece_list_fb() : + pos.eval_list()->piece_list_fw(); + const PieceId target = (AssociatedKing == Side::kFriend) ? + static_cast(PIECE_ID_KING + perspective) : + static_cast(PIECE_ID_KING + ~perspective); + *sq_target_k = static_cast(((*pieces)[target] - PS_W_KING) % SQUARE_NB); + } + + // Get a list of indices for active features + template + void HalfKP::AppendActiveIndices( + const Position& pos, Color perspective, IndexList* active) { + + // Do nothing if array size is small to avoid compiler warning + if (RawFeatures::kMaxActiveDimensions < kMaxActiveDimensions) return; + + PieceSquare* pieces; + Square sq_target_k; + GetPieces(pos, perspective, &pieces, &sq_target_k); + for (PieceId i = PIECE_ID_ZERO; i < PIECE_ID_KING; ++i) { + if (pieces[i] != PS_NONE) { + active->push_back(MakeIndex(sq_target_k, pieces[i])); + } + } + } + + // Get a list of indices for recently changed features + template + void HalfKP::AppendChangedIndices( + const Position& pos, Color perspective, + IndexList* removed, IndexList* added) { + + PieceSquare* pieces; + Square sq_target_k; + GetPieces(pos, perspective, &pieces, &sq_target_k); + const auto& dp = pos.state()->dirtyPiece; + for (int i = 0; i < dp.dirty_num; ++i) { + if (dp.pieceId[i] >= PIECE_ID_KING) continue; + const auto old_p = static_cast( + dp.old_piece[i].from[perspective]); + if (old_p != PS_NONE) { + removed->push_back(MakeIndex(sq_target_k, old_p)); + } + const auto new_p = static_cast( + dp.new_piece[i].from[perspective]); + if (new_p != PS_NONE) { + added->push_back(MakeIndex(sq_target_k, new_p)); + } + } + } + + template class HalfKP; + +} // namespace Eval::NNUE::Features diff --git a/src/nnue/features/half_kp.h b/src/nnue/features/half_kp.h new file mode 100644 index 00000000..99842eea --- /dev/null +++ b/src/nnue/features/half_kp.h @@ -0,0 +1,67 @@ +/* + Stockfish, a UCI chess playing engine derived from Glaurung 2.1 + Copyright (C) 2004-2020 The Stockfish developers (see AUTHORS file) + + Stockfish is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + Stockfish is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see . +*/ + +//Definition of input features HalfKP of NNUE evaluation function + +#ifndef NNUE_FEATURES_HALF_KP_H_INCLUDED +#define NNUE_FEATURES_HALF_KP_H_INCLUDED + +#include "../../evaluate.h" +#include "features_common.h" + +namespace Eval::NNUE::Features { + + // Feature HalfKP: Combination of the position of own king + // and the position of pieces other than kings + template + class HalfKP { + + public: + // Feature name + static constexpr const char* kName = "HalfKP(Friend)"; + // Hash value embedded in the evaluation file + static constexpr std::uint32_t kHashValue = + 0x5D69D5B9u ^ (AssociatedKing == Side::kFriend); + // Number of feature dimensions + static constexpr IndexType kDimensions = + static_cast(SQUARE_NB) * static_cast(PS_END); + // Maximum number of simultaneously active features + static constexpr IndexType kMaxActiveDimensions = PIECE_ID_KING; + // Trigger for full calculation instead of difference calculation + static constexpr TriggerEvent kRefreshTrigger = TriggerEvent::kFriendKingMoved; + + // Get a list of indices for active features + static void AppendActiveIndices(const Position& pos, Color perspective, + IndexList* active); + + // Get a list of indices for recently changed features + static void AppendChangedIndices(const Position& pos, Color perspective, + IndexList* removed, IndexList* added); + + // Index of a feature for a given king position and another piece on some square + static IndexType MakeIndex(Square sq_k, PieceSquare p); + + private: + // Get pieces information + static void GetPieces(const Position& pos, Color perspective, + PieceSquare** pieces, Square* sq_target_k); + }; + +} // namespace Eval::NNUE::Features + +#endif // #ifndef NNUE_FEATURES_HALF_KP_H_INCLUDED diff --git a/src/nnue/features/index_list.h b/src/nnue/features/index_list.h new file mode 100644 index 00000000..d9ad680a --- /dev/null +++ b/src/nnue/features/index_list.h @@ -0,0 +1,64 @@ +/* + Stockfish, a UCI chess playing engine derived from Glaurung 2.1 + Copyright (C) 2004-2020 The Stockfish developers (see AUTHORS file) + + Stockfish is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + Stockfish is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see . +*/ + +// Definition of index list of input features + +#ifndef NNUE_FEATURES_INDEX_LIST_H_INCLUDED +#define NNUE_FEATURES_INDEX_LIST_H_INCLUDED + +#include "../../position.h" +#include "../nnue_architecture.h" + +namespace Eval::NNUE::Features { + + // Class template used for feature index list + template + class ValueList { + + public: + std::size_t size() const { return size_; } + void resize(std::size_t size) { size_ = size; } + void push_back(const T& value) { values_[size_++] = value; } + T& operator[](std::size_t index) { return values_[index]; } + T* begin() { return values_; } + T* end() { return values_ + size_; } + const T& operator[](std::size_t index) const { return values_[index]; } + const T* begin() const { return values_; } + const T* end() const { return values_ + size_; } + + void swap(ValueList& other) { + const std::size_t max_size = std::max(size_, other.size_); + for (std::size_t i = 0; i < max_size; ++i) { + std::swap(values_[i], other.values_[i]); + } + std::swap(size_, other.size_); + } + + private: + T values_[MaxSize]; + std::size_t size_ = 0; + }; + + //Type of feature index list + class IndexList + : public ValueList { + }; + +} // namespace Eval::NNUE::Features + +#endif // NNUE_FEATURES_INDEX_LIST_H_INCLUDED diff --git a/src/nnue/layers/affine_transform.h b/src/nnue/layers/affine_transform.h new file mode 100644 index 00000000..b585bc87 --- /dev/null +++ b/src/nnue/layers/affine_transform.h @@ -0,0 +1,215 @@ +/* + Stockfish, a UCI chess playing engine derived from Glaurung 2.1 + Copyright (C) 2004-2020 The Stockfish developers (see AUTHORS file) + + Stockfish is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + Stockfish is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see . +*/ + +// Definition of layer AffineTransform of NNUE evaluation function + +#ifndef NNUE_LAYERS_AFFINE_TRANSFORM_H_INCLUDED +#define NNUE_LAYERS_AFFINE_TRANSFORM_H_INCLUDED + +#include +#include "../nnue_common.h" + +namespace Eval::NNUE::Layers { + + // Affine transformation layer + template + class AffineTransform { + public: + // Input/output type + using InputType = typename PreviousLayer::OutputType; + using OutputType = std::int32_t; + static_assert(std::is_same::value, ""); + + // Number of input/output dimensions + static constexpr IndexType kInputDimensions = + PreviousLayer::kOutputDimensions; + static constexpr IndexType kOutputDimensions = OutputDimensions; + static constexpr IndexType kPaddedInputDimensions = + CeilToMultiple(kInputDimensions, kMaxSimdWidth); + + // Size of forward propagation buffer used in this layer + static constexpr std::size_t kSelfBufferSize = + CeilToMultiple(kOutputDimensions * sizeof(OutputType), kCacheLineSize); + + // Size of the forward propagation buffer used from the input layer to this layer + static constexpr std::size_t kBufferSize = + PreviousLayer::kBufferSize + kSelfBufferSize; + + // Hash value embedded in the evaluation file + static constexpr std::uint32_t GetHashValue() { + std::uint32_t hash_value = 0xCC03DAE4u; + hash_value += kOutputDimensions; + hash_value ^= PreviousLayer::GetHashValue() >> 1; + hash_value ^= PreviousLayer::GetHashValue() << 31; + return hash_value; + } + + // Read network parameters + bool ReadParameters(std::istream& stream) { + if (!previous_layer_.ReadParameters(stream)) return false; + stream.read(reinterpret_cast(biases_), + kOutputDimensions * sizeof(BiasType)); + stream.read(reinterpret_cast(weights_), + kOutputDimensions * kPaddedInputDimensions * + sizeof(WeightType)); + return !stream.fail(); + } + + // Forward propagation + const OutputType* Propagate( + const TransformedFeatureType* transformed_features, char* buffer) const { + const auto input = previous_layer_.Propagate( + transformed_features, buffer + kSelfBufferSize); + const auto output = reinterpret_cast(buffer); + + #if defined(USE_AVX512) + constexpr IndexType kNumChunks = kPaddedInputDimensions / (kSimdWidth * 2); + const __m512i kOnes = _mm512_set1_epi16(1); + const auto input_vector = reinterpret_cast(input); + + #elif defined(USE_AVX2) + constexpr IndexType kNumChunks = kPaddedInputDimensions / kSimdWidth; + const __m256i kOnes = _mm256_set1_epi16(1); + const auto input_vector = reinterpret_cast(input); + + #elif defined(USE_SSSE3) + constexpr IndexType kNumChunks = kPaddedInputDimensions / kSimdWidth; + const __m128i kOnes = _mm_set1_epi16(1); + const auto input_vector = reinterpret_cast(input); + + #elif defined(USE_NEON) + constexpr IndexType kNumChunks = kPaddedInputDimensions / kSimdWidth; + const auto input_vector = reinterpret_cast(input); + #endif + + for (IndexType i = 0; i < kOutputDimensions; ++i) { + const IndexType offset = i * kPaddedInputDimensions; + + #if defined(USE_AVX512) + __m512i sum = _mm512_setzero_si512(); + const auto row = reinterpret_cast(&weights_[offset]); + for (IndexType j = 0; j < kNumChunks; ++j) { + + #if defined(__MINGW32__) || defined(__MINGW64__) + __m512i product = _mm512_maddubs_epi16(_mm512_loadu_si512(&input_vector[j]), _mm512_load_si512(&row[j])); + #else + __m512i product = _mm512_maddubs_epi16(_mm512_load_si512(&input_vector[j]), _mm512_load_si512(&row[j])); + #endif + + product = _mm512_madd_epi16(product, kOnes); + sum = _mm512_add_epi32(sum, product); + } + output[i] = _mm512_reduce_add_epi32(sum) + biases_[i]; + + // Note: Changing kMaxSimdWidth from 32 to 64 breaks loading existing networks. + // As a result kPaddedInputDimensions may not be an even multiple of 64(512bit) + // and we have to do one more 256bit chunk. + if (kPaddedInputDimensions != kNumChunks * kSimdWidth * 2) + { + const auto iv_256 = reinterpret_cast(input); + const auto row_256 = reinterpret_cast(&weights_[offset]); + int j = kNumChunks * 2; + + #if defined(__MINGW32__) || defined(__MINGW64__) // See HACK comment below in AVX2. + __m256i sum256 = _mm256_maddubs_epi16(_mm256_loadu_si256(&iv_256[j]), _mm256_load_si256(&row_256[j])); + #else + __m256i sum256 = _mm256_maddubs_epi16(_mm256_load_si256(&iv_256[j]), _mm256_load_si256(&row_256[j])); + #endif + + sum256 = _mm256_madd_epi16(sum256, _mm256_set1_epi16(1)); + sum256 = _mm256_hadd_epi32(sum256, sum256); + sum256 = _mm256_hadd_epi32(sum256, sum256); + const __m128i lo = _mm256_extracti128_si256(sum256, 0); + const __m128i hi = _mm256_extracti128_si256(sum256, 1); + output[i] += _mm_cvtsi128_si32(lo) + _mm_cvtsi128_si32(hi); + } + + #elif defined(USE_AVX2) + __m256i sum = _mm256_setzero_si256(); + const auto row = reinterpret_cast(&weights_[offset]); + for (IndexType j = 0; j < kNumChunks; ++j) { + __m256i product = _mm256_maddubs_epi16( + + #if defined(__MINGW32__) || defined(__MINGW64__) + // HACK: Use _mm256_loadu_si256() instead of _mm256_load_si256. Because the binary + // compiled with g++ in MSYS2 crashes here because the output memory is not aligned + // even though alignas is specified. + _mm256_loadu_si256 + #else + _mm256_load_si256 + #endif + + (&input_vector[j]), _mm256_load_si256(&row[j])); + product = _mm256_madd_epi16(product, kOnes); + sum = _mm256_add_epi32(sum, product); + } + sum = _mm256_hadd_epi32(sum, sum); + sum = _mm256_hadd_epi32(sum, sum); + const __m128i lo = _mm256_extracti128_si256(sum, 0); + const __m128i hi = _mm256_extracti128_si256(sum, 1); + output[i] = _mm_cvtsi128_si32(lo) + _mm_cvtsi128_si32(hi) + biases_[i]; + + #elif defined(USE_SSSE3) + __m128i sum = _mm_cvtsi32_si128(biases_[i]); + const auto row = reinterpret_cast(&weights_[offset]); + for (IndexType j = 0; j < kNumChunks; ++j) { + __m128i product = _mm_maddubs_epi16( + _mm_load_si128(&input_vector[j]), _mm_load_si128(&row[j])); + product = _mm_madd_epi16(product, kOnes); + sum = _mm_add_epi32(sum, product); + } + sum = _mm_hadd_epi32(sum, sum); + sum = _mm_hadd_epi32(sum, sum); + output[i] = _mm_cvtsi128_si32(sum); + + #elif defined(USE_NEON) + int32x4_t sum = {biases_[i]}; + const auto row = reinterpret_cast(&weights_[offset]); + for (IndexType j = 0; j < kNumChunks; ++j) { + int16x8_t product = vmull_s8(input_vector[j * 2], row[j * 2]); + product = vmlal_s8(product, input_vector[j * 2 + 1], row[j * 2 + 1]); + sum = vpadalq_s16(sum, product); + } + output[i] = sum[0] + sum[1] + sum[2] + sum[3]; + + #else + OutputType sum = biases_[i]; + for (IndexType j = 0; j < kInputDimensions; ++j) { + sum += weights_[offset + j] * input[j]; + } + output[i] = sum; + #endif + + } + return output; + } + + private: + using BiasType = OutputType; + using WeightType = std::int8_t; + + PreviousLayer previous_layer_; + + alignas(kCacheLineSize) BiasType biases_[kOutputDimensions]; + alignas(kCacheLineSize) + WeightType weights_[kOutputDimensions * kPaddedInputDimensions]; + }; + +} // namespace Eval::NNUE::Layers + +#endif // #ifndef NNUE_LAYERS_AFFINE_TRANSFORM_H_INCLUDED diff --git a/src/nnue/layers/clipped_relu.h b/src/nnue/layers/clipped_relu.h new file mode 100644 index 00000000..7ade598f --- /dev/null +++ b/src/nnue/layers/clipped_relu.h @@ -0,0 +1,186 @@ +/* + Stockfish, a UCI chess playing engine derived from Glaurung 2.1 + Copyright (C) 2004-2020 The Stockfish developers (see AUTHORS file) + + Stockfish is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + Stockfish is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see . +*/ + +// Definition of layer ClippedReLU of NNUE evaluation function + +#ifndef NNUE_LAYERS_CLIPPED_RELU_H_INCLUDED +#define NNUE_LAYERS_CLIPPED_RELU_H_INCLUDED + +#include "../nnue_common.h" + +namespace Eval::NNUE::Layers { + + // Clipped ReLU + template + class ClippedReLU { + public: + // Input/output type + using InputType = typename PreviousLayer::OutputType; + using OutputType = std::uint8_t; + static_assert(std::is_same::value, ""); + + // Number of input/output dimensions + static constexpr IndexType kInputDimensions = + PreviousLayer::kOutputDimensions; + static constexpr IndexType kOutputDimensions = kInputDimensions; + + // Size of forward propagation buffer used in this layer + static constexpr std::size_t kSelfBufferSize = + CeilToMultiple(kOutputDimensions * sizeof(OutputType), kCacheLineSize); + + // Size of the forward propagation buffer used from the input layer to this layer + static constexpr std::size_t kBufferSize = + PreviousLayer::kBufferSize + kSelfBufferSize; + + // Hash value embedded in the evaluation file + static constexpr std::uint32_t GetHashValue() { + std::uint32_t hash_value = 0x538D24C7u; + hash_value += PreviousLayer::GetHashValue(); + return hash_value; + } + + // Read network parameters + bool ReadParameters(std::istream& stream) { + return previous_layer_.ReadParameters(stream); + } + + // Forward propagation + const OutputType* Propagate( + const TransformedFeatureType* transformed_features, char* buffer) const { + const auto input = previous_layer_.Propagate( + transformed_features, buffer + kSelfBufferSize); + const auto output = reinterpret_cast(buffer); + + #if defined(USE_AVX2) + constexpr IndexType kNumChunks = kInputDimensions / kSimdWidth; + const __m256i kZero = _mm256_setzero_si256(); + const __m256i kOffsets = _mm256_set_epi32(7, 3, 6, 2, 5, 1, 4, 0); + const auto in = reinterpret_cast(input); + const auto out = reinterpret_cast<__m256i*>(output); + for (IndexType i = 0; i < kNumChunks; ++i) { + const __m256i words0 = _mm256_srai_epi16(_mm256_packs_epi32( + + #if defined(__MINGW32__) || defined(__MINGW64__) + // HACK: Use _mm256_loadu_si256() instead of _mm256_load_si256. Because the binary + // compiled with g++ in MSYS2 crashes here because the output memory is not aligned + // even though alignas is specified. + _mm256_loadu_si256 + #else + _mm256_load_si256 + #endif + + (&in[i * 4 + 0]), + + #if defined(__MINGW32__) || defined(__MINGW64__) + _mm256_loadu_si256 + #else + _mm256_load_si256 + #endif + + (&in[i * 4 + 1])), kWeightScaleBits); + const __m256i words1 = _mm256_srai_epi16(_mm256_packs_epi32( + + #if defined(__MINGW32__) || defined(__MINGW64__) + _mm256_loadu_si256 + #else + _mm256_load_si256 + #endif + + (&in[i * 4 + 2]), + + #if defined(__MINGW32__) || defined(__MINGW64__) + _mm256_loadu_si256 + #else + _mm256_load_si256 + #endif + + (&in[i * 4 + 3])), kWeightScaleBits); + + #if defined(__MINGW32__) || defined(__MINGW64__) + _mm256_storeu_si256 + #else + _mm256_store_si256 + #endif + + (&out[i], _mm256_permutevar8x32_epi32(_mm256_max_epi8( + _mm256_packs_epi16(words0, words1), kZero), kOffsets)); + } + constexpr IndexType kStart = kNumChunks * kSimdWidth; + + #elif defined(USE_SSSE3) + constexpr IndexType kNumChunks = kInputDimensions / kSimdWidth; + + #ifdef USE_SSE41 + const __m128i kZero = _mm_setzero_si128(); + #else + const __m128i k0x80s = _mm_set1_epi8(-128); + #endif + + const auto in = reinterpret_cast(input); + const auto out = reinterpret_cast<__m128i*>(output); + for (IndexType i = 0; i < kNumChunks; ++i) { + const __m128i words0 = _mm_srai_epi16(_mm_packs_epi32( + _mm_load_si128(&in[i * 4 + 0]), + _mm_load_si128(&in[i * 4 + 1])), kWeightScaleBits); + const __m128i words1 = _mm_srai_epi16(_mm_packs_epi32( + _mm_load_si128(&in[i * 4 + 2]), + _mm_load_si128(&in[i * 4 + 3])), kWeightScaleBits); + const __m128i packedbytes = _mm_packs_epi16(words0, words1); + _mm_store_si128(&out[i], + + #ifdef USE_SSE41 + _mm_max_epi8(packedbytes, kZero) + #else + _mm_subs_epi8(_mm_adds_epi8(packedbytes, k0x80s), k0x80s) + #endif + + ); + } + constexpr IndexType kStart = kNumChunks * kSimdWidth; + + #elif defined(USE_NEON) + constexpr IndexType kNumChunks = kInputDimensions / (kSimdWidth / 2); + const int8x8_t kZero = {0}; + const auto in = reinterpret_cast(input); + const auto out = reinterpret_cast(output); + for (IndexType i = 0; i < kNumChunks; ++i) { + int16x8_t shifted; + const auto pack = reinterpret_cast(&shifted); + pack[0] = vqshrn_n_s32(in[i * 2 + 0], kWeightScaleBits); + pack[1] = vqshrn_n_s32(in[i * 2 + 1], kWeightScaleBits); + out[i] = vmax_s8(vqmovn_s16(shifted), kZero); + } + constexpr IndexType kStart = kNumChunks * (kSimdWidth / 2); + #else + constexpr IndexType kStart = 0; + #endif + + for (IndexType i = kStart; i < kInputDimensions; ++i) { + output[i] = static_cast( + std::max(0, std::min(127, input[i] >> kWeightScaleBits))); + } + return output; + } + + private: + PreviousLayer previous_layer_; + }; + +} // namespace Eval::NNUE::Layers + +#endif // NNUE_LAYERS_CLIPPED_RELU_H_INCLUDED diff --git a/src/nnue/layers/input_slice.h b/src/nnue/layers/input_slice.h new file mode 100644 index 00000000..afca14c8 --- /dev/null +++ b/src/nnue/layers/input_slice.h @@ -0,0 +1,68 @@ +/* + Stockfish, a UCI chess playing engine derived from Glaurung 2.1 + Copyright (C) 2004-2020 The Stockfish developers (see AUTHORS file) + + Stockfish is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + Stockfish is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see . +*/ + +// NNUE evaluation function layer InputSlice definition + +#ifndef NNUE_LAYERS_INPUT_SLICE_H_INCLUDED +#define NNUE_LAYERS_INPUT_SLICE_H_INCLUDED + +#include "../nnue_common.h" + +namespace Eval::NNUE::Layers { + +// Input layer +template +class InputSlice { + public: + // Need to maintain alignment + static_assert(Offset % kMaxSimdWidth == 0, ""); + + // Output type + using OutputType = TransformedFeatureType; + + // Output dimensionality + static constexpr IndexType kOutputDimensions = OutputDimensions; + + // Size of forward propagation buffer used from the input layer to this layer + static constexpr std::size_t kBufferSize = 0; + + // Hash value embedded in the evaluation file + static constexpr std::uint32_t GetHashValue() { + std::uint32_t hash_value = 0xEC42E90Du; + hash_value ^= kOutputDimensions ^ (Offset << 10); + return hash_value; + } + + // Read network parameters + bool ReadParameters(std::istream& /*stream*/) { + return true; + } + + // Forward propagation + const OutputType* Propagate( + const TransformedFeatureType* transformed_features, + char* /*buffer*/) const { + return transformed_features + Offset; + } + + private: +}; + +} // namespace Layers + +#endif // #ifndef NNUE_LAYERS_INPUT_SLICE_H_INCLUDED diff --git a/src/nnue/nnue_accumulator.h b/src/nnue/nnue_accumulator.h new file mode 100644 index 00000000..2a354a3c --- /dev/null +++ b/src/nnue/nnue_accumulator.h @@ -0,0 +1,39 @@ +/* + Stockfish, a UCI chess playing engine derived from Glaurung 2.1 + Copyright (C) 2004-2020 The Stockfish developers (see AUTHORS file) + + Stockfish is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + Stockfish is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see . +*/ + +// Class for difference calculation of NNUE evaluation function + +#ifndef NNUE_ACCUMULATOR_H_INCLUDED +#define NNUE_ACCUMULATOR_H_INCLUDED + +#include "nnue_architecture.h" + +namespace Eval::NNUE { + + // Class that holds the result of affine transformation of input features + struct alignas(32) Accumulator { + std::int16_t + accumulation[2][kRefreshTriggers.size()][kTransformedFeatureDimensions]; + Value score; + bool computed_accumulation; + bool computed_score; + }; + +} // namespace Eval::NNUE + +#endif // NNUE_ACCUMULATOR_H_INCLUDED diff --git a/src/nnue/nnue_architecture.h b/src/nnue/nnue_architecture.h new file mode 100644 index 00000000..91cdc4bd --- /dev/null +++ b/src/nnue/nnue_architecture.h @@ -0,0 +1,38 @@ +/* + Stockfish, a UCI chess playing engine derived from Glaurung 2.1 + Copyright (C) 2004-2020 The Stockfish developers (see AUTHORS file) + + Stockfish is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + Stockfish is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see . +*/ + +// Input features and network structure used in NNUE evaluation function + +#ifndef NNUE_ARCHITECTURE_H_INCLUDED +#define NNUE_ARCHITECTURE_H_INCLUDED + +// Defines the network structure +#include "architectures/halfkp_256x2-32-32.h" + +namespace Eval::NNUE { + + static_assert(kTransformedFeatureDimensions % kMaxSimdWidth == 0, ""); + static_assert(Network::kOutputDimensions == 1, ""); + static_assert(std::is_same::value, ""); + + // Trigger for full calculation instead of difference calculation + constexpr auto kRefreshTriggers = RawFeatures::kRefreshTriggers; + +} // namespace Eval::NNUE + +#endif // #ifndef NNUE_ARCHITECTURE_H_INCLUDED diff --git a/src/nnue/nnue_common.h b/src/nnue/nnue_common.h new file mode 100644 index 00000000..972ef3e5 --- /dev/null +++ b/src/nnue/nnue_common.h @@ -0,0 +1,77 @@ +/* + Stockfish, a UCI chess playing engine derived from Glaurung 2.1 + Copyright (C) 2004-2020 The Stockfish developers (see AUTHORS file) + + Stockfish is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + Stockfish is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see . +*/ + +// Constants used in NNUE evaluation function + +#ifndef NNUE_COMMON_H_INCLUDED +#define NNUE_COMMON_H_INCLUDED + +#if defined(USE_AVX2) +#include + +#elif defined(USE_SSE41) +#include + +#elif defined(USE_SSSE3) +#include + +#elif defined(USE_SSE2) +#include + +#elif defined(USE_NEON) +#include +#endif + +namespace Eval::NNUE { + + // Version of the evaluation file + constexpr std::uint32_t kVersion = 0x7AF32F16u; + + // Constant used in evaluation value calculation + constexpr int FV_SCALE = 16; + constexpr int kWeightScaleBits = 6; + + // Size of cache line (in bytes) + constexpr std::size_t kCacheLineSize = 64; + + // SIMD width (in bytes) + #if defined(USE_AVX2) + constexpr std::size_t kSimdWidth = 32; + + #elif defined(USE_SSE2) + constexpr std::size_t kSimdWidth = 16; + + #elif defined(USE_NEON) + constexpr std::size_t kSimdWidth = 16; + #endif + + constexpr std::size_t kMaxSimdWidth = 32; + + // Type of input feature after conversion + using TransformedFeatureType = std::uint8_t; + using IndexType = std::uint32_t; + + // Round n up to be a multiple of base + template + constexpr IntType CeilToMultiple(IntType n, IntType base) { + return (n + base - 1) / base * base; + } + +} // namespace Eval::NNUE + +#endif // #ifndef NNUE_COMMON_H_INCLUDED diff --git a/src/nnue/nnue_feature_transformer.h b/src/nnue/nnue_feature_transformer.h new file mode 100644 index 00000000..1cfebbe4 --- /dev/null +++ b/src/nnue/nnue_feature_transformer.h @@ -0,0 +1,355 @@ +/* + Stockfish, a UCI chess playing engine derived from Glaurung 2.1 + Copyright (C) 2004-2020 The Stockfish developers (see AUTHORS file) + + Stockfish is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + Stockfish is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see . +*/ + +// A class that converts the input features of the NNUE evaluation function + +#ifndef NNUE_FEATURE_TRANSFORMER_H_INCLUDED +#define NNUE_FEATURE_TRANSFORMER_H_INCLUDED + +#include "nnue_common.h" +#include "nnue_architecture.h" +#include "features/index_list.h" + +#include // std::memset() + +namespace Eval::NNUE { + + // Input feature converter + class FeatureTransformer { + + private: + // Number of output dimensions for one side + static constexpr IndexType kHalfDimensions = kTransformedFeatureDimensions; + + public: + // Output type + using OutputType = TransformedFeatureType; + + // Number of input/output dimensions + static constexpr IndexType kInputDimensions = RawFeatures::kDimensions; + static constexpr IndexType kOutputDimensions = kHalfDimensions * 2; + + // Size of forward propagation buffer + static constexpr std::size_t kBufferSize = + kOutputDimensions * sizeof(OutputType); + + // Hash value embedded in the evaluation file + static constexpr std::uint32_t GetHashValue() { + return RawFeatures::kHashValue ^ kOutputDimensions; + } + + // Read network parameters + bool ReadParameters(std::istream& stream) { + stream.read(reinterpret_cast(biases_), + kHalfDimensions * sizeof(BiasType)); + stream.read(reinterpret_cast(weights_), + kHalfDimensions * kInputDimensions * sizeof(WeightType)); + return !stream.fail(); + } + + // Proceed with the difference calculation if possible + bool UpdateAccumulatorIfPossible(const Position& pos) const { + const auto now = pos.state(); + if (now->accumulator.computed_accumulation) { + return true; + } + const auto prev = now->previous; + if (prev && prev->accumulator.computed_accumulation) { + UpdateAccumulator(pos); + return true; + } + return false; + } + + // Convert input features + void Transform(const Position& pos, OutputType* output, bool refresh) const { + if (refresh || !UpdateAccumulatorIfPossible(pos)) { + RefreshAccumulator(pos); + } + const auto& accumulation = pos.state()->accumulator.accumulation; + + #if defined(USE_AVX2) + constexpr IndexType kNumChunks = kHalfDimensions / kSimdWidth; + constexpr int kControl = 0b11011000; + const __m256i kZero = _mm256_setzero_si256(); + + #elif defined(USE_SSSE3) + constexpr IndexType kNumChunks = kHalfDimensions / kSimdWidth; + + #ifdef USE_SSE41 + const __m128i kZero = _mm_setzero_si128(); + #else + const __m128i k0x80s = _mm_set1_epi8(-128); + #endif + + #elif defined(USE_NEON) + constexpr IndexType kNumChunks = kHalfDimensions / (kSimdWidth / 2); + const int8x8_t kZero = {0}; + #endif + + const Color perspectives[2] = {pos.side_to_move(), ~pos.side_to_move()}; + for (IndexType p = 0; p < 2; ++p) { + const IndexType offset = kHalfDimensions * p; + + #if defined(USE_AVX2) + auto out = reinterpret_cast<__m256i*>(&output[offset]); + for (IndexType j = 0; j < kNumChunks; ++j) { + __m256i sum0 = + + #if defined(__MINGW32__) || defined(__MINGW64__) + // HACK: Use _mm256_loadu_si256() instead of _mm256_load_si256. Because the binary + // compiled with g++ in MSYS2 crashes here because the output memory is not aligned + // even though alignas is specified. + _mm256_loadu_si256 + #else + _mm256_load_si256 + #endif + + (&reinterpret_cast( + accumulation[perspectives[p]][0])[j * 2 + 0]); + __m256i sum1 = + + #if defined(__MINGW32__) || defined(__MINGW64__) + _mm256_loadu_si256 + #else + _mm256_load_si256 + #endif + + (&reinterpret_cast( + accumulation[perspectives[p]][0])[j * 2 + 1]); + + #if defined(__MINGW32__) || defined(__MINGW64__) + _mm256_storeu_si256 + #else + _mm256_store_si256 + #endif + + (&out[j], _mm256_permute4x64_epi64(_mm256_max_epi8( + _mm256_packs_epi16(sum0, sum1), kZero), kControl)); + } + + #elif defined(USE_SSSE3) + auto out = reinterpret_cast<__m128i*>(&output[offset]); + for (IndexType j = 0; j < kNumChunks; ++j) { + __m128i sum0 = _mm_load_si128(&reinterpret_cast( + accumulation[perspectives[p]][0])[j * 2 + 0]); + __m128i sum1 = _mm_load_si128(&reinterpret_cast( + accumulation[perspectives[p]][0])[j * 2 + 1]); + const __m128i packedbytes = _mm_packs_epi16(sum0, sum1); + + _mm_store_si128(&out[j], + + #ifdef USE_SSE41 + _mm_max_epi8(packedbytes, kZero) + #else + _mm_subs_epi8(_mm_adds_epi8(packedbytes, k0x80s), k0x80s) + #endif + + ); + } + + #elif defined(USE_NEON) + const auto out = reinterpret_cast(&output[offset]); + for (IndexType j = 0; j < kNumChunks; ++j) { + int16x8_t sum = reinterpret_cast( + accumulation[perspectives[p]][0])[j]; + out[j] = vmax_s8(vqmovn_s16(sum), kZero); + } + + #else + for (IndexType j = 0; j < kHalfDimensions; ++j) { + BiasType sum = accumulation[static_cast(perspectives[p])][0][j]; + output[offset + j] = static_cast( + std::max(0, std::min(127, sum))); + } + #endif + + } + } + + private: + // Calculate cumulative value without using difference calculation + void RefreshAccumulator(const Position& pos) const { + auto& accumulator = pos.state()->accumulator; + IndexType i = 0; + Features::IndexList active_indices[2]; + RawFeatures::AppendActiveIndices(pos, kRefreshTriggers[i], + active_indices); + for (Color perspective : { WHITE, BLACK }) { + std::memcpy(accumulator.accumulation[perspective][i], biases_, + kHalfDimensions * sizeof(BiasType)); + for (const auto index : active_indices[perspective]) { + const IndexType offset = kHalfDimensions * index; + + #if defined(USE_AVX2) + auto accumulation = reinterpret_cast<__m256i*>( + &accumulator.accumulation[perspective][i][0]); + auto column = reinterpret_cast(&weights_[offset]); + constexpr IndexType kNumChunks = kHalfDimensions / (kSimdWidth / 2); + for (IndexType j = 0; j < kNumChunks; ++j) { + #if defined(__MINGW32__) || defined(__MINGW64__) + _mm256_storeu_si256(&accumulation[j], _mm256_add_epi16(_mm256_loadu_si256(&accumulation[j]), column[j])); + #else + accumulation[j] = _mm256_add_epi16(accumulation[j], column[j]); + #endif + } + + #elif defined(USE_SSE2) + auto accumulation = reinterpret_cast<__m128i*>( + &accumulator.accumulation[perspective][i][0]); + auto column = reinterpret_cast(&weights_[offset]); + constexpr IndexType kNumChunks = kHalfDimensions / (kSimdWidth / 2); + for (IndexType j = 0; j < kNumChunks; ++j) { + accumulation[j] = _mm_add_epi16(accumulation[j], column[j]); + } + + #elif defined(USE_NEON) + auto accumulation = reinterpret_cast( + &accumulator.accumulation[perspective][i][0]); + auto column = reinterpret_cast(&weights_[offset]); + constexpr IndexType kNumChunks = kHalfDimensions / (kSimdWidth / 2); + for (IndexType j = 0; j < kNumChunks; ++j) { + accumulation[j] = vaddq_s16(accumulation[j], column[j]); + } + + #else + for (IndexType j = 0; j < kHalfDimensions; ++j) { + accumulator.accumulation[perspective][i][j] += weights_[offset + j]; + } + #endif + + } + } + + accumulator.computed_accumulation = true; + accumulator.computed_score = false; + } + + // Calculate cumulative value using difference calculation + void UpdateAccumulator(const Position& pos) const { + const auto prev_accumulator = pos.state()->previous->accumulator; + auto& accumulator = pos.state()->accumulator; + IndexType i = 0; + Features::IndexList removed_indices[2], added_indices[2]; + bool reset[2]; + RawFeatures::AppendChangedIndices(pos, kRefreshTriggers[i], + removed_indices, added_indices, reset); + for (Color perspective : { WHITE, BLACK }) { + + #if defined(USE_AVX2) + constexpr IndexType kNumChunks = kHalfDimensions / (kSimdWidth / 2); + auto accumulation = reinterpret_cast<__m256i*>( + &accumulator.accumulation[perspective][i][0]); + + #elif defined(USE_SSE2) + constexpr IndexType kNumChunks = kHalfDimensions / (kSimdWidth / 2); + auto accumulation = reinterpret_cast<__m128i*>( + &accumulator.accumulation[perspective][i][0]); + + #elif defined(USE_NEON) + constexpr IndexType kNumChunks = kHalfDimensions / (kSimdWidth / 2); + auto accumulation = reinterpret_cast( + &accumulator.accumulation[perspective][i][0]); + #endif + + if (reset[perspective]) { + std::memcpy(accumulator.accumulation[perspective][i], biases_, + kHalfDimensions * sizeof(BiasType)); + } else { + std::memcpy(accumulator.accumulation[perspective][i], + prev_accumulator.accumulation[perspective][i], + kHalfDimensions * sizeof(BiasType)); + // Difference calculation for the deactivated features + for (const auto index : removed_indices[perspective]) { + const IndexType offset = kHalfDimensions * index; + + #if defined(USE_AVX2) + auto column = reinterpret_cast(&weights_[offset]); + for (IndexType j = 0; j < kNumChunks; ++j) { + accumulation[j] = _mm256_sub_epi16(accumulation[j], column[j]); + } + + #elif defined(USE_SSE2) + auto column = reinterpret_cast(&weights_[offset]); + for (IndexType j = 0; j < kNumChunks; ++j) { + accumulation[j] = _mm_sub_epi16(accumulation[j], column[j]); + } + + #elif defined(USE_NEON) + auto column = reinterpret_cast(&weights_[offset]); + for (IndexType j = 0; j < kNumChunks; ++j) { + accumulation[j] = vsubq_s16(accumulation[j], column[j]); + } + + #else + for (IndexType j = 0; j < kHalfDimensions; ++j) { + accumulator.accumulation[perspective][i][j] -= + weights_[offset + j]; + } + #endif + + } + } + { // Difference calculation for the activated features + for (const auto index : added_indices[perspective]) { + const IndexType offset = kHalfDimensions * index; + + #if defined(USE_AVX2) + auto column = reinterpret_cast(&weights_[offset]); + for (IndexType j = 0; j < kNumChunks; ++j) { + accumulation[j] = _mm256_add_epi16(accumulation[j], column[j]); + } + + #elif defined(USE_SSE2) + auto column = reinterpret_cast(&weights_[offset]); + for (IndexType j = 0; j < kNumChunks; ++j) { + accumulation[j] = _mm_add_epi16(accumulation[j], column[j]); + } + + #elif defined(USE_NEON) + auto column = reinterpret_cast(&weights_[offset]); + for (IndexType j = 0; j < kNumChunks; ++j) { + accumulation[j] = vaddq_s16(accumulation[j], column[j]); + } + + #else + for (IndexType j = 0; j < kHalfDimensions; ++j) { + accumulator.accumulation[perspective][i][j] += + weights_[offset + j]; + } + #endif + + } + } + } + + accumulator.computed_accumulation = true; + accumulator.computed_score = false; + } + + using BiasType = std::int16_t; + using WeightType = std::int16_t; + + alignas(kCacheLineSize) BiasType biases_[kHalfDimensions]; + alignas(kCacheLineSize) + WeightType weights_[kHalfDimensions * kInputDimensions]; + }; + +} // namespace Eval::NNUE + +#endif // #ifndef NNUE_FEATURE_TRANSFORMER_H_INCLUDED diff --git a/src/pawns.cpp b/src/pawns.cpp index 7f8d451a..73682529 100644 --- a/src/pawns.cpp +++ b/src/pawns.cpp @@ -1,8 +1,6 @@ /* Stockfish, a UCI chess playing engine derived from Glaurung 2.1 - Copyright (C) 2004-2008 Tord Romstad (Glaurung author) - Copyright (C) 2008-2015 Marco Costalba, Joona Kiiski, Tord Romstad - Copyright (C) 2015-2020 Marco Costalba, Joona Kiiski, Gary Linscott, Tord Romstad + Copyright (C) 2004-2020 The Stockfish developers (see AUTHORS file) Stockfish is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by diff --git a/src/pawns.h b/src/pawns.h index e6098069..5499826e 100644 --- a/src/pawns.h +++ b/src/pawns.h @@ -1,8 +1,6 @@ /* Stockfish, a UCI chess playing engine derived from Glaurung 2.1 - Copyright (C) 2004-2008 Tord Romstad (Glaurung author) - Copyright (C) 2008-2015 Marco Costalba, Joona Kiiski, Tord Romstad - Copyright (C) 2015-2020 Marco Costalba, Joona Kiiski, Gary Linscott, Tord Romstad + Copyright (C) 2004-2020 The Stockfish developers (see AUTHORS file) Stockfish is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by diff --git a/src/position.cpp b/src/position.cpp index 396bff5f..46e5d78b 100644 --- a/src/position.cpp +++ b/src/position.cpp @@ -1,8 +1,6 @@ /* Stockfish, a UCI chess playing engine derived from Glaurung 2.1 - Copyright (C) 2004-2008 Tord Romstad (Glaurung author) - Copyright (C) 2008-2015 Marco Costalba, Joona Kiiski, Tord Romstad - Copyright (C) 2015-2020 Marco Costalba, Joona Kiiski, Gary Linscott, Tord Romstad + Copyright (C) 2004-2020 The Stockfish developers (see AUTHORS file) Stockfish is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -200,6 +198,9 @@ Position& Position::set(const string& fenStr, bool isChess960, StateInfo* si, Th std::fill_n(&pieceList[0][0], sizeof(pieceList) / sizeof(Square), SQ_NONE); st = si; + // Each piece on board gets a unique ID used to track the piece later + PieceId piece_id, next_piece_id = PIECE_ID_ZERO; + ss >> std::noskipws; // 1. Piece placement @@ -213,7 +214,19 @@ Position& Position::set(const string& fenStr, bool isChess960, StateInfo* si, Th else if ((idx = PieceToChar.find(token)) != string::npos) { - put_piece(Piece(idx), sq); + auto pc = Piece(idx); + put_piece(pc, sq); + + if (Eval::useNNUE) + { + // Kings get a fixed ID, other pieces get ID in order of placement + piece_id = + (idx == W_KING) ? PIECE_ID_WKING : + (idx == B_KING) ? PIECE_ID_BKING : + next_piece_id++; + evalList.put_piece(piece_id, sq, pc); + } + ++sq; } } @@ -705,6 +718,14 @@ void Position::do_move(Move m, StateInfo& newSt, bool givesCheck) { ++st->rule50; ++st->pliesFromNull; + // Used by NNUE + st->accumulator.computed_accumulation = false; + st->accumulator.computed_score = false; + PieceId dp0 = PIECE_ID_NONE; + PieceId dp1 = PIECE_ID_NONE; + auto& dp = st->dirtyPiece; + dp.dirty_num = 1; + Color us = sideToMove; Color them = ~us; Square from = from_sq(m); @@ -752,6 +773,16 @@ void Position::do_move(Move m, StateInfo& newSt, bool givesCheck) { else st->nonPawnMaterial[them] -= PieceValue[MG][captured]; + if (Eval::useNNUE) + { + dp.dirty_num = 2; // 2 pieces moved + dp1 = piece_id_on(capsq); + dp.pieceId[1] = dp1; + dp.old_piece[1] = evalList.piece_with_id(dp1); + evalList.put_piece(dp1, capsq, NO_PIECE); + dp.new_piece[1] = evalList.piece_with_id(dp1); + } + // Update board and piece lists remove_piece(capsq); @@ -787,7 +818,18 @@ void Position::do_move(Move m, StateInfo& newSt, bool givesCheck) { // Move the piece. The tricky Chess960 castling is handled earlier if (type_of(m) != CASTLING) + { + if (Eval::useNNUE) + { + dp0 = piece_id_on(from); + dp.pieceId[0] = dp0; + dp.old_piece[0] = evalList.piece_with_id(dp0); + evalList.put_piece(dp0, to, pc); + dp.new_piece[0] = evalList.piece_with_id(dp0); + } + move_piece(from, to); + } // If the moving piece is a pawn do some special extra work if (type_of(pc) == PAWN) @@ -810,6 +852,13 @@ void Position::do_move(Move m, StateInfo& newSt, bool givesCheck) { remove_piece(to); put_piece(promotion, to); + if (Eval::useNNUE) + { + dp0 = piece_id_on(to); + evalList.put_piece(dp0, to, promotion); + dp.new_piece[0] = evalList.piece_with_id(dp0); + } + // Update hash keys k ^= Zobrist::psq[pc][to] ^ Zobrist::psq[promotion][to]; st->pawnKey ^= Zobrist::psq[pc][to]; @@ -901,6 +950,12 @@ void Position::undo_move(Move m) { { move_piece(to, from); // Put the piece back at the source square + if (Eval::useNNUE) + { + PieceId dp0 = st->dirtyPiece.pieceId[0]; + evalList.put_piece(dp0, from, pc); + } + if (st->capturedPiece) { Square capsq = to; @@ -917,6 +972,14 @@ void Position::undo_move(Move m) { } put_piece(st->capturedPiece, capsq); // Restore the captured piece + + if (Eval::useNNUE) + { + PieceId dp1 = st->dirtyPiece.pieceId[1]; + assert(evalList.piece_with_id(dp1).from[WHITE] == PS_NONE); + assert(evalList.piece_with_id(dp1).from[BLACK] == PS_NONE); + evalList.put_piece(dp1, capsq, st->capturedPiece); + } } } @@ -938,6 +1001,34 @@ void Position::do_castling(Color us, Square from, Square& to, Square& rfrom, Squ rto = relative_square(us, kingSide ? SQ_F1 : SQ_D1); to = relative_square(us, kingSide ? SQ_G1 : SQ_C1); + if (Eval::useNNUE) + { + PieceId dp0, dp1; + auto& dp = st->dirtyPiece; + dp.dirty_num = 2; // 2 pieces moved + + if (Do) + { + dp0 = piece_id_on(from); + dp1 = piece_id_on(rfrom); + dp.pieceId[0] = dp0; + dp.old_piece[0] = evalList.piece_with_id(dp0); + evalList.put_piece(dp0, to, make_piece(us, KING)); + dp.new_piece[0] = evalList.piece_with_id(dp0); + dp.pieceId[1] = dp1; + dp.old_piece[1] = evalList.piece_with_id(dp1); + evalList.put_piece(dp1, rto, make_piece(us, ROOK)); + dp.new_piece[1] = evalList.piece_with_id(dp1); + } + else + { + dp0 = piece_id_on(to); + dp1 = piece_id_on(rto); + evalList.put_piece(dp0, from, make_piece(us, KING)); + evalList.put_piece(dp1, rfrom, make_piece(us, ROOK)); + } + } + // Remove both pieces first since squares could overlap in Chess960 remove_piece(Do ? from : to); remove_piece(Do ? rfrom : rto); @@ -955,7 +1046,14 @@ void Position::do_null_move(StateInfo& newSt) { assert(!checkers()); assert(&newSt != st); - std::memcpy(&newSt, st, sizeof(StateInfo)); + if (Eval::useNNUE) + { + std::memcpy(&newSt, st, sizeof(StateInfo)); + st->accumulator.computed_score = false; + } + else + std::memcpy(&newSt, st, offsetof(StateInfo, accumulator)); + newSt.previous = st; st = &newSt; diff --git a/src/position.h b/src/position.h index 8cfa3920..a77050eb 100644 --- a/src/position.h +++ b/src/position.h @@ -1,8 +1,6 @@ /* Stockfish, a UCI chess playing engine derived from Glaurung 2.1 - Copyright (C) 2004-2008 Tord Romstad (Glaurung author) - Copyright (C) 2008-2015 Marco Costalba, Joona Kiiski, Tord Romstad - Copyright (C) 2015-2020 Marco Costalba, Joona Kiiski, Gary Linscott, Tord Romstad + Copyright (C) 2004-2020 The Stockfish developers (see AUTHORS file) Stockfish is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -27,8 +25,11 @@ #include #include "bitboard.h" +#include "evaluate.h" #include "types.h" +#include "nnue/nnue_accumulator.h" + /// StateInfo struct stores information needed to restore a Position object to /// its previous state when we retract a move. Whenever a move is made on the @@ -54,6 +55,10 @@ struct StateInfo { Bitboard pinners[COLOR_NB]; Bitboard checkSquares[PIECE_TYPE_NB]; int repetition; + + // Used by NNUE + Eval::NNUE::Accumulator accumulator; + DirtyPiece dirtyPiece; }; @@ -163,6 +168,10 @@ public: bool pos_is_ok() const; void flip(); + // Used by NNUE + StateInfo* state() const; + const EvalList* eval_list() const; + private: // Initialization helpers (used while setting up a position) void set_castling_right(Color c, Square rfrom); @@ -176,6 +185,9 @@ private: template void do_castling(Color us, Square from, Square& to, Square& rfrom, Square& rto); + // ID of a piece on a given square + PieceId piece_id_on(Square sq) const; + // Data members Piece board[SQUARE_NB]; Bitboard byTypeBB[PIECE_TYPE_NB]; @@ -192,6 +204,9 @@ private: Thread* thisThread; StateInfo* st; bool chess960; + + // List of pieces used in NNUE evaluation function + EvalList evalList; }; namespace PSQT { @@ -426,4 +441,25 @@ inline void Position::do_move(Move m, StateInfo& newSt) { do_move(m, newSt, gives_check(m)); } +inline StateInfo* Position::state() const { + + return st; +} + +inline const EvalList* Position::eval_list() const { + + return &evalList; +} + +inline PieceId Position::piece_id_on(Square sq) const +{ + + assert(piece_on(sq) != NO_PIECE); + + PieceId pid = evalList.piece_id_list[sq]; + assert(is_ok(pid)); + + return pid; +} + #endif // #ifndef POSITION_H_INCLUDED diff --git a/src/psqt.cpp b/src/psqt.cpp index 5e8dd2c7..eb36e75e 100644 --- a/src/psqt.cpp +++ b/src/psqt.cpp @@ -1,8 +1,6 @@ /* Stockfish, a UCI chess playing engine derived from Glaurung 2.1 - Copyright (C) 2004-2008 Tord Romstad (Glaurung author) - Copyright (C) 2008-2015 Marco Costalba, Joona Kiiski, Tord Romstad - Copyright (C) 2015-2020 Marco Costalba, Joona Kiiski, Gary Linscott, Tord Romstad + Copyright (C) 2004-2020 The Stockfish developers (see AUTHORS file) Stockfish is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by diff --git a/src/search.cpp b/src/search.cpp index 91ac60ad..d1dc4489 100644 --- a/src/search.cpp +++ b/src/search.cpp @@ -1,8 +1,6 @@ /* Stockfish, a UCI chess playing engine derived from Glaurung 2.1 - Copyright (C) 2004-2008 Tord Romstad (Glaurung author) - Copyright (C) 2008-2015 Marco Costalba, Joona Kiiski, Tord Romstad - Copyright (C) 2015-2020 Marco Costalba, Joona Kiiski, Gary Linscott, Tord Romstad + Copyright (C) 2004-2020 The Stockfish developers (see AUTHORS file) Stockfish is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -227,6 +225,8 @@ void MainThread::search() { Time.init(Limits, us, rootPos.game_ply()); TT.new_search(); + Eval::verify_NNUE(); + if (rootMoves.empty()) { rootMoves.emplace_back(MOVE_NONE); diff --git a/src/search.h b/src/search.h index 3e855c8b..2554f3fb 100644 --- a/src/search.h +++ b/src/search.h @@ -1,8 +1,6 @@ /* Stockfish, a UCI chess playing engine derived from Glaurung 2.1 - Copyright (C) 2004-2008 Tord Romstad (Glaurung author) - Copyright (C) 2008-2015 Marco Costalba, Joona Kiiski, Tord Romstad - Copyright (C) 2015-2020 Marco Costalba, Joona Kiiski, Gary Linscott, Tord Romstad + Copyright (C) 2004-2020 The Stockfish developers (see AUTHORS file) Stockfish is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by diff --git a/src/syzygy/tbprobe.cpp b/src/syzygy/tbprobe.cpp index 95d58945..20215b96 100644 --- a/src/syzygy/tbprobe.cpp +++ b/src/syzygy/tbprobe.cpp @@ -1,7 +1,6 @@ /* Stockfish, a UCI chess playing engine derived from Glaurung 2.1 - Copyright (c) 2013 Ronald de Man - Copyright (C) 2016-2020 Marco Costalba, Lucas Braesch + Copyright (C) 2004-2020 The Stockfish developers (see AUTHORS file) Stockfish is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by diff --git a/src/syzygy/tbprobe.h b/src/syzygy/tbprobe.h index df3ca4fe..b998989b 100644 --- a/src/syzygy/tbprobe.h +++ b/src/syzygy/tbprobe.h @@ -1,7 +1,6 @@ /* Stockfish, a UCI chess playing engine derived from Glaurung 2.1 - Copyright (c) 2013 Ronald de Man - Copyright (C) 2016-2020 Marco Costalba, Lucas Braesch + Copyright (C) 2004-2020 The Stockfish developers (see AUTHORS file) Stockfish is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by diff --git a/src/thread.cpp b/src/thread.cpp index a0ee2b25..44aea14e 100644 --- a/src/thread.cpp +++ b/src/thread.cpp @@ -1,8 +1,6 @@ /* Stockfish, a UCI chess playing engine derived from Glaurung 2.1 - Copyright (C) 2004-2008 Tord Romstad (Glaurung author) - Copyright (C) 2008-2015 Marco Costalba, Joona Kiiski, Tord Romstad - Copyright (C) 2015-2020 Marco Costalba, Joona Kiiski, Gary Linscott, Tord Romstad + Copyright (C) 2004-2020 The Stockfish developers (see AUTHORS file) Stockfish is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by diff --git a/src/thread.h b/src/thread.h index a69e1d10..46da1e34 100644 --- a/src/thread.h +++ b/src/thread.h @@ -1,8 +1,6 @@ /* Stockfish, a UCI chess playing engine derived from Glaurung 2.1 - Copyright (C) 2004-2008 Tord Romstad (Glaurung author) - Copyright (C) 2008-2015 Marco Costalba, Joona Kiiski, Tord Romstad - Copyright (C) 2015-2020 Marco Costalba, Joona Kiiski, Gary Linscott, Tord Romstad + Copyright (C) 2004-2020 The Stockfish developers (see AUTHORS file) Stockfish is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by diff --git a/src/thread_win32_osx.h b/src/thread_win32_osx.h index 0ef5c981..c4b55a48 100644 --- a/src/thread_win32_osx.h +++ b/src/thread_win32_osx.h @@ -1,8 +1,6 @@ /* Stockfish, a UCI chess playing engine derived from Glaurung 2.1 - Copyright (C) 2004-2008 Tord Romstad (Glaurung author) - Copyright (C) 2008-2015 Marco Costalba, Joona Kiiski, Tord Romstad - Copyright (C) 2015-2020 Marco Costalba, Joona Kiiski, Gary Linscott, Tord Romstad + Copyright (C) 2004-2020 The Stockfish developers (see AUTHORS file) Stockfish is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by diff --git a/src/timeman.cpp b/src/timeman.cpp index 546eadd2..df4ba9b2 100644 --- a/src/timeman.cpp +++ b/src/timeman.cpp @@ -1,8 +1,6 @@ /* Stockfish, a UCI chess playing engine derived from Glaurung 2.1 - Copyright (C) 2004-2008 Tord Romstad (Glaurung author) - Copyright (C) 2008-2015 Marco Costalba, Joona Kiiski, Tord Romstad - Copyright (C) 2015-2020 Marco Costalba, Joona Kiiski, Gary Linscott, Tord Romstad + Copyright (C) 2004-2020 The Stockfish developers (see AUTHORS file) Stockfish is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by diff --git a/src/timeman.h b/src/timeman.h index 9301dc94..5ad72b32 100644 --- a/src/timeman.h +++ b/src/timeman.h @@ -1,8 +1,6 @@ /* Stockfish, a UCI chess playing engine derived from Glaurung 2.1 - Copyright (C) 2004-2008 Tord Romstad (Glaurung author) - Copyright (C) 2008-2015 Marco Costalba, Joona Kiiski, Tord Romstad - Copyright (C) 2015-2020 Marco Costalba, Joona Kiiski, Gary Linscott, Tord Romstad + Copyright (C) 2004-2020 The Stockfish developers (see AUTHORS file) Stockfish is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by diff --git a/src/tt.cpp b/src/tt.cpp index 34590903..d494c27d 100644 --- a/src/tt.cpp +++ b/src/tt.cpp @@ -1,8 +1,6 @@ /* Stockfish, a UCI chess playing engine derived from Glaurung 2.1 - Copyright (C) 2004-2008 Tord Romstad (Glaurung author) - Copyright (C) 2008-2015 Marco Costalba, Joona Kiiski, Tord Romstad - Copyright (C) 2015-2020 Marco Costalba, Joona Kiiski, Gary Linscott, Tord Romstad + Copyright (C) 2004-2020 The Stockfish developers (see AUTHORS file) Stockfish is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by diff --git a/src/tt.h b/src/tt.h index e18db8ce..c177ca52 100644 --- a/src/tt.h +++ b/src/tt.h @@ -1,8 +1,6 @@ /* Stockfish, a UCI chess playing engine derived from Glaurung 2.1 - Copyright (C) 2004-2008 Tord Romstad (Glaurung author) - Copyright (C) 2008-2015 Marco Costalba, Joona Kiiski, Tord Romstad - Copyright (C) 2015-2020 Marco Costalba, Joona Kiiski, Gary Linscott, Tord Romstad + Copyright (C) 2004-2020 The Stockfish developers (see AUTHORS file) Stockfish is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by diff --git a/src/tune.cpp b/src/tune.cpp index c1b1c76b..e94f67f8 100644 --- a/src/tune.cpp +++ b/src/tune.cpp @@ -1,8 +1,6 @@ /* Stockfish, a UCI chess playing engine derived from Glaurung 2.1 - Copyright (C) 2004-2008 Tord Romstad (Glaurung author) - Copyright (C) 2008-2015 Marco Costalba, Joona Kiiski, Tord Romstad - Copyright (C) 2015-2020 Marco Costalba, Joona Kiiski, Gary Linscott, Tord Romstad + Copyright (C) 2004-2020 The Stockfish developers (see AUTHORS file) Stockfish is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by diff --git a/src/tune.h b/src/tune.h index 27c3f961..1489fa32 100644 --- a/src/tune.h +++ b/src/tune.h @@ -1,8 +1,6 @@ /* Stockfish, a UCI chess playing engine derived from Glaurung 2.1 - Copyright (C) 2004-2008 Tord Romstad (Glaurung author) - Copyright (C) 2008-2017 Marco Costalba, Joona Kiiski, Tord Romstad - Copyright (C) 2015-2018 Marco Costalba, Joona Kiiski, Gary Linscott, Tord Romstad + Copyright (C) 2004-2020 The Stockfish developers (see AUTHORS file) Stockfish is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by diff --git a/src/types.h b/src/types.h index c1598561..379859f7 100644 --- a/src/types.h +++ b/src/types.h @@ -1,8 +1,6 @@ /* Stockfish, a UCI chess playing engine derived from Glaurung 2.1 - Copyright (C) 2004-2008 Tord Romstad (Glaurung author) - Copyright (C) 2008-2015 Marco Costalba, Joona Kiiski, Tord Romstad - Copyright (C) 2015-2020 Marco Costalba, Joona Kiiski, Gary Linscott, Tord Romstad + Copyright (C) 2004-2020 The Stockfish developers (see AUTHORS file) Stockfish is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -203,6 +201,22 @@ enum Piece { PIECE_NB = 16 }; +// An ID used to track the pieces. Max. 32 pieces on board. +enum PieceId { + PIECE_ID_ZERO = 0, + PIECE_ID_KING = 30, + PIECE_ID_WKING = 30, + PIECE_ID_BKING = 31, + PIECE_ID_NONE = 32 +}; + +inline PieceId operator++(PieceId& d, int) { + + PieceId x = d; + d = PieceId(int(d) + 1); + return x; +} + constexpr Value PieceValue[PHASE_NB][PIECE_NB] = { { VALUE_ZERO, PawnValueMg, KnightValueMg, BishopValueMg, RookValueMg, QueenValueMg, VALUE_ZERO, VALUE_ZERO, VALUE_ZERO, PawnValueMg, KnightValueMg, BishopValueMg, RookValueMg, QueenValueMg, VALUE_ZERO, VALUE_ZERO }, @@ -232,7 +246,8 @@ enum Square : int { SQ_A8, SQ_B8, SQ_C8, SQ_D8, SQ_E8, SQ_F8, SQ_G8, SQ_H8, SQ_NONE, - SQUARE_NB = 64 + SQUARE_ZERO = 0, + SQUARE_NB = 64 }; enum Direction : int { @@ -255,6 +270,94 @@ enum Rank : int { RANK_1, RANK_2, RANK_3, RANK_4, RANK_5, RANK_6, RANK_7, RANK_8, RANK_NB }; +// unique number for each piece type on each square +enum PieceSquare : uint32_t { + PS_NONE = 0, + PS_W_PAWN = 1, + PS_B_PAWN = 1 * SQUARE_NB + 1, + PS_W_KNIGHT = 2 * SQUARE_NB + 1, + PS_B_KNIGHT = 3 * SQUARE_NB + 1, + PS_W_BISHOP = 4 * SQUARE_NB + 1, + PS_B_BISHOP = 5 * SQUARE_NB + 1, + PS_W_ROOK = 6 * SQUARE_NB + 1, + PS_B_ROOK = 7 * SQUARE_NB + 1, + PS_W_QUEEN = 8 * SQUARE_NB + 1, + PS_B_QUEEN = 9 * SQUARE_NB + 1, + PS_W_KING = 10 * SQUARE_NB + 1, + PS_END = PS_W_KING, // pieces without kings (pawns included) + PS_B_KING = 11 * SQUARE_NB + 1, + PS_END2 = 12 * SQUARE_NB + 1 +}; + +struct ExtPieceSquare { + PieceSquare from[COLOR_NB]; +}; + +// Array for finding the PieceSquare corresponding to the piece on the board +extern ExtPieceSquare kpp_board_index[PIECE_NB]; + +constexpr bool is_ok(PieceId pid); +constexpr Square rotate180(Square sq); + +// Structure holding which tracked piece (PieceId) is where (PieceSquare) +class EvalList { + +public: + // Max. number of pieces without kings is 30 but must be a multiple of 4 in AVX2 + static const int MAX_LENGTH = 32; + + // Array that holds the piece id for the pieces on the board + PieceId piece_id_list[SQUARE_NB]; + + // List of pieces, separate from White and Black POV + PieceSquare* piece_list_fw() const { return const_cast(pieceListFw); } + PieceSquare* piece_list_fb() const { return const_cast(pieceListFb); } + + // Place the piece pc with piece_id on the square sq on the board + void put_piece(PieceId piece_id, Square sq, Piece pc) + { + assert(is_ok(piece_id)); + if (pc != NO_PIECE) + { + pieceListFw[piece_id] = PieceSquare(kpp_board_index[pc].from[WHITE] + sq); + pieceListFb[piece_id] = PieceSquare(kpp_board_index[pc].from[BLACK] + rotate180(sq)); + piece_id_list[sq] = piece_id; + } + else + { + pieceListFw[piece_id] = PS_NONE; + pieceListFb[piece_id] = PS_NONE; + piece_id_list[sq] = piece_id; + } + } + + // Convert the specified piece_id piece to ExtPieceSquare type and return it + ExtPieceSquare piece_with_id(PieceId piece_id) const + { + ExtPieceSquare eps; + eps.from[WHITE] = pieceListFw[piece_id]; + eps.from[BLACK] = pieceListFb[piece_id]; + return eps; + } + +private: + PieceSquare pieceListFw[MAX_LENGTH]; + PieceSquare pieceListFb[MAX_LENGTH]; +}; + +// For differential evaluation of pieces that changed since last turn +struct DirtyPiece { + + // Number of changed pieces + int dirty_num; + + // The ids of changed pieces, max. 2 pieces can change in one move + PieceId pieceId[2]; + + // What changed from the piece with that piece number + ExtPieceSquare old_piece[2]; + ExtPieceSquare new_piece[2]; +}; /// Score enum stores a middlegame and an endgame value in a single integer (enum). /// The least significant 16 bits are used to store the middlegame value and the @@ -280,10 +383,10 @@ inline Value mg_value(Score s) { } #define ENABLE_BASE_OPERATORS_ON(T) \ -constexpr T operator+(T d1, int d2) { return T(int(d1) + d2); } \ -constexpr T operator-(T d1, int d2) { return T(int(d1) - d2); } \ +constexpr T operator+(T d1, int d2) { return T(int(d1) + d2); } \ +constexpr T operator-(T d1, int d2) { return T(int(d1) - d2); } \ constexpr T operator-(T d) { return T(-int(d)); } \ -inline T& operator+=(T& d1, int d2) { return d1 = d1 + d2; } \ +inline T& operator+=(T& d1, int d2) { return d1 = d1 + d2; } \ inline T& operator-=(T& d1, int d2) { return d1 = d1 - d2; } #define ENABLE_INCR_OPERATORS_ON(T) \ @@ -302,6 +405,9 @@ inline T& operator/=(T& d, int i) { return d = T(int(d) / i); } ENABLE_FULL_OPERATORS_ON(Value) ENABLE_FULL_OPERATORS_ON(Direction) +ENABLE_INCR_OPERATORS_ON(Piece) +ENABLE_INCR_OPERATORS_ON(PieceSquare) +ENABLE_INCR_OPERATORS_ON(PieceId) ENABLE_INCR_OPERATORS_ON(PieceType) ENABLE_INCR_OPERATORS_ON(Square) ENABLE_INCR_OPERATORS_ON(File) @@ -390,6 +496,10 @@ inline Color color_of(Piece pc) { return Color(pc >> 3); } +constexpr bool is_ok(PieceId pid) { + return pid < PIECE_ID_NONE; +} + constexpr bool is_ok(Square s) { return s >= SQ_A1 && s <= SQ_H8; } @@ -426,6 +536,11 @@ constexpr Square to_sq(Move m) { return Square(m & 0x3F); } +// Return relative square when turning the board 180 degrees +constexpr Square rotate180(Square sq) { + return (Square)(sq ^ 0x3F); +} + constexpr int from_to(Move m) { return m & 0xFFF; } diff --git a/src/uci.cpp b/src/uci.cpp index bb57c80b..d6486320 100644 --- a/src/uci.cpp +++ b/src/uci.cpp @@ -1,8 +1,6 @@ /* Stockfish, a UCI chess playing engine derived from Glaurung 2.1 - Copyright (C) 2004-2008 Tord Romstad (Glaurung author) - Copyright (C) 2008-2015 Marco Costalba, Joona Kiiski, Tord Romstad - Copyright (C) 2015-2020 Marco Costalba, Joona Kiiski, Gary Linscott, Tord Romstad + Copyright (C) 2004-2020 The Stockfish developers (see AUTHORS file) Stockfish is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -78,6 +76,20 @@ namespace { } } + // trace_eval() prints the evaluation for the current position, consistent with the UCI + // options set so far. + + void trace_eval(Position& pos) { + + StateListPtr states(new std::deque(1)); + Position p; + p.set(pos.fen(), Options["UCI_Chess960"], &states->back(), Threads.main()); + + Eval::verify_NNUE(); + + sync_cout << "\n" << Eval::trace(p) << sync_endl; + } + // setoption() is called when engine receives the "setoption" UCI command. The // function updates the UCI option ("name") to the given value ("value"). @@ -166,7 +178,7 @@ namespace { nodes += Threads.nodes_searched(); } else - sync_cout << "\n" << Eval::trace(pos) << sync_endl; + trace_eval(pos); } else if (token == "setoption") setoption(is); else if (token == "position") position(pos, is, states); @@ -261,7 +273,7 @@ void UCI::loop(int argc, char* argv[]) { else if (token == "flip") pos.flip(); else if (token == "bench") bench(pos, is, states); else if (token == "d") sync_cout << pos << sync_endl; - else if (token == "eval") sync_cout << Eval::trace(pos) << sync_endl; + else if (token == "eval") trace_eval(pos); else if (token == "compiler") sync_cout << compiler_info() << sync_endl; else sync_cout << "Unknown command: " << cmd << sync_endl; diff --git a/src/uci.h b/src/uci.h index ad954d9f..eb0b390b 100644 --- a/src/uci.h +++ b/src/uci.h @@ -1,8 +1,6 @@ /* Stockfish, a UCI chess playing engine derived from Glaurung 2.1 - Copyright (C) 2004-2008 Tord Romstad (Glaurung author) - Copyright (C) 2008-2015 Marco Costalba, Joona Kiiski, Tord Romstad - Copyright (C) 2015-2020 Marco Costalba, Joona Kiiski, Gary Linscott, Tord Romstad + Copyright (C) 2004-2020 The Stockfish developers (see AUTHORS file) Stockfish is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by diff --git a/src/ucioption.cpp b/src/ucioption.cpp index ef54ef4e..788aed17 100644 --- a/src/ucioption.cpp +++ b/src/ucioption.cpp @@ -1,8 +1,6 @@ /* Stockfish, a UCI chess playing engine derived from Glaurung 2.1 - Copyright (C) 2004-2008 Tord Romstad (Glaurung author) - Copyright (C) 2008-2015 Marco Costalba, Joona Kiiski, Tord Romstad - Copyright (C) 2015-2020 Marco Costalba, Joona Kiiski, Gary Linscott, Tord Romstad + Copyright (C) 2004-2020 The Stockfish developers (see AUTHORS file) Stockfish is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -42,7 +40,8 @@ void on_hash_size(const Option& o) { TT.resize(size_t(o)); } void on_logger(const Option& o) { start_logger(o); } void on_threads(const Option& o) { Threads.set(size_t(o)); } void on_tb_path(const Option& o) { Tablebases::init(o); } - +void on_use_NNUE(const Option& ) { Eval::init_NNUE(); } +void on_eval_file(const Option& ) { Eval::init_NNUE(); } /// Our case insensitive less() function as required by UCI protocol bool CaseInsensitiveLess::operator() (const string& s1, const string& s2) const { @@ -79,6 +78,8 @@ void init(OptionsMap& o) { o["SyzygyProbeDepth"] << Option(1, 1, 100); o["Syzygy50MoveRule"] << Option(true); o["SyzygyProbeLimit"] << Option(7, 0, 7); + o["Use NNUE"] << Option(false, on_use_NNUE); + o["EvalFile"] << Option("nn-97f742aaefcd.nnue", on_eval_file); } From 3dca13a958cd0dfea1cdea91da230c5aac9e322f Mon Sep 17 00:00:00 2001 From: MJZ1977 <37274752+MJZ1977@users.noreply.github.com> Date: Thu, 6 Aug 2020 17:39:10 +0200 Subject: [PATCH 27/86] NNUE evaluation threshold The idea is to use NNUE only on quite balanced material positions. This bring a big speedup on research since NNUE eval is slower than classical eval for most of the hardwares and specially on unbalanced positions with LazyEval. STC: https://tests.stockfishchess.org/tests/view/5f2c2680b3ebe5cbfee85b61 LLR: 2.95 (-2.94,2.94) {-0.50,1.50} Total: 3168 W: 560 L: 400 D: 2208 Ptnml(0-2): 21, 294, 819, 404, 46 LTC: https://tests.stockfishchess.org/tests/view/5f2c2ca6b3ebe5cbfee85b69 LLR: 2.98 (-2.94,2.94) {0.25,1.75} Total: 3200 W: 287 L: 183 D: 2730 Ptnml(0-2): 4, 149, 1191, 251, 5 closes https://github.com/official-stockfish/Stockfish/pull/2916 Bench 4746616 --- src/evaluate.cpp | 16 +++++++++++----- 1 file changed, 11 insertions(+), 5 deletions(-) diff --git a/src/evaluate.cpp b/src/evaluate.cpp index f43c62d6..09496fdc 100644 --- a/src/evaluate.cpp +++ b/src/evaluate.cpp @@ -107,9 +107,10 @@ using namespace Trace; namespace { // Threshold for lazy and space evaluation - constexpr Value LazyThreshold1 = Value(1400); - constexpr Value LazyThreshold2 = Value(1300); + constexpr Value LazyThreshold1 = Value(1400); + constexpr Value LazyThreshold2 = Value(1300); constexpr Value SpaceThreshold = Value(12222); + constexpr Value NNUEThreshold = Value(500); // KingAttackWeights[PieceType] contains king attack weights by piece type constexpr int KingAttackWeights[PIECE_TYPE_NB] = { 0, 0, 81, 52, 44, 10 }; @@ -941,9 +942,14 @@ make_v: Value Eval::evaluate(const Position& pos) { if (Eval::useNNUE) - return NNUE::evaluate(pos); - else - return Evaluation(pos).value(); + { + Value balance = pos.non_pawn_material(WHITE) - pos.non_pawn_material(BLACK); + balance += 200 * (pos.count(WHITE) - pos.count(BLACK)); + // Take NNUE eval only on balanced positions + if (abs(balance) < NNUEThreshold) + return NNUE::evaluate(pos); + } + return Evaluation(pos).value(); } /// trace() is like evaluate(), but instead of returning a value, it returns From 8b8412ef87825d8e341e160585307dc89843b7f6 Mon Sep 17 00:00:00 2001 From: Stefan Geschwentner Date: Fri, 7 Aug 2020 01:08:15 +0200 Subject: [PATCH 28/86] Add tempo also to NNUE eval. STC: LLR: 2.93 (-2.94,2.94) {-0.50,1.50} Total: 10608 W: 1507 L: 1358 D: 7743 Ptnml(0-2): 94, 945, 3074, 1100, 91 https://tests.stockfishchess.org/tests/view/5f2c5921b3ebe5cbfee85b8b LTC: LLR: 2.94 (-2.94,2.94) {0.25,1.75} Total: 7536 W: 556 L: 448 D: 6532 Ptnml(0-2): 9, 383, 2881, 481, 14 https://tests.stockfishchess.org/tests/view/5f2c6f4461e3b6af64881e95 closes https://github.com/official-stockfish/Stockfish/pull/2919 Bench: 4746616 --- src/evaluate.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/evaluate.cpp b/src/evaluate.cpp index 09496fdc..015efa48 100644 --- a/src/evaluate.cpp +++ b/src/evaluate.cpp @@ -947,7 +947,7 @@ Value Eval::evaluate(const Position& pos) { balance += 200 * (pos.count(WHITE) - pos.count(BLACK)); // Take NNUE eval only on balanced positions if (abs(balance) < NNUEThreshold) - return NNUE::evaluate(pos); + return NNUE::evaluate(pos) + Tempo; } return Evaluation(pos).value(); } From af935365e3e528f445c1c0f48bb43b8cf685719c Mon Sep 17 00:00:00 2001 From: FauziAkram Date: Thu, 6 Aug 2020 17:37:54 -0700 Subject: [PATCH 29/86] Tuned pawn values Passed STC: https://tests.stockfishchess.org/tests/view/5f2aa49fa5abc164f05e4d1b LLR: 2.95 (-2.94,2.94) {-0.50,1.50} Total: 40888 W: 7977 L: 7726 D: 25185 Ptnml(0-2): 665, 4806, 9333, 4893, 747 Passed LTC: https://tests.stockfishchess.org/tests/view/5f2b1059b3ebe5cbfee85ae7 LLR: 2.98 (-2.94,2.94) {0.25,1.75} Total: 51264 W: 6445 L: 6134 D: 38685 Ptnml(0-2): 328, 4564, 15580, 4789, 371 closes https://github.com/official-stockfish/Stockfish/pull/2920 bench: 4314943 --- src/pawns.cpp | 14 +++++++------- src/types.h | 2 +- 2 files changed, 8 insertions(+), 8 deletions(-) diff --git a/src/pawns.cpp b/src/pawns.cpp index 73682529..868d0c8e 100644 --- a/src/pawns.cpp +++ b/src/pawns.cpp @@ -30,21 +30,21 @@ namespace { #define S(mg, eg) make_score(mg, eg) // Pawn penalties - constexpr Score Backward = S( 9, 24); - constexpr Score Doubled = S(11, 56); - constexpr Score Isolated = S( 5, 15); - constexpr Score WeakLever = S( 0, 56); - constexpr Score WeakUnopposed = S(13, 27); + constexpr Score Backward = S( 8, 27); + constexpr Score Doubled = S(11, 55); + constexpr Score Isolated = S( 5, 17); + constexpr Score WeakLever = S( 2, 54); + constexpr Score WeakUnopposed = S(15, 25); // Bonus for blocked pawns at 5th or 6th rank - constexpr Score BlockedPawn[2] = { S(-11, -4), S(-3, 4) }; + constexpr Score BlockedPawn[2] = { S(-13, -4), S(-4, 3) }; constexpr Score BlockedStorm[RANK_NB] = { S(0, 0), S(0, 0), S(76, 78), S(-10, 15), S(-7, 10), S(-4, 6), S(-1, 2) }; // Connected pawn bonus - constexpr int Connected[RANK_NB] = { 0, 7, 8, 12, 29, 48, 86 }; + constexpr int Connected[RANK_NB] = { 0, 7, 8, 11, 24, 45, 85 }; // Strength of pawn shelter for our king by [distance from edge][rank]. // RANK_1 = 0 is used for files where we have no pawn, or pawn is behind our king. diff --git a/src/types.h b/src/types.h index 379859f7..73da41e2 100644 --- a/src/types.h +++ b/src/types.h @@ -178,7 +178,7 @@ enum Value : int { VALUE_MATE_IN_MAX_PLY = VALUE_MATE - MAX_PLY, VALUE_MATED_IN_MAX_PLY = -VALUE_MATE_IN_MAX_PLY, - PawnValueMg = 124, PawnValueEg = 206, + PawnValueMg = 126, PawnValueEg = 208, KnightValueMg = 781, KnightValueEg = 854, BishopValueMg = 825, BishopValueEg = 915, RookValueMg = 1276, RookValueEg = 1380, From 7f336dd59b3b1365943d73ee706a9610e18108bb Mon Sep 17 00:00:00 2001 From: UnaiCorzo Date: Tue, 4 Aug 2020 14:32:52 +0200 Subject: [PATCH 30/86] Remove QueenInfiltration STC https://tests.stockfishchess.org/tests/view/5f2955b1a5abc164f05e4c85 LLR: 2.96 (-2.94,2.94) {-1.50,0.50} Total: 29216 W: 5560 L: 5416 D: 18240 Ptnml(0-2): 466, 3329, 6902, 3417, 494 LTC https://tests.stockfishchess.org/tests/view/5f299154a5abc164f05e4ca1 LLR: 2.92 (-2.94,2.94) {-1.50,0.50} Total: 54144 W: 6635 L: 6594 D: 40915 Ptnml(0-2): 372, 4859, 16536, 4966, 339 closes https://github.com/official-stockfish/Stockfish/pull/2910 Bench: 4609008 --- src/evaluate.cpp | 5 ----- 1 file changed, 5 deletions(-) diff --git a/src/evaluate.cpp b/src/evaluate.cpp index 015efa48..d20c7b70 100644 --- a/src/evaluate.cpp +++ b/src/evaluate.cpp @@ -181,7 +181,6 @@ namespace { constexpr Score MinorBehindPawn = S( 18, 3); constexpr Score PassedFile = S( 11, 8); constexpr Score PawnlessFlank = S( 17, 95); - constexpr Score QueenInfiltration = S( -2, 14); constexpr Score ReachableOutpost = S( 31, 22); constexpr Score RestrictedPiece = S( 7, 7); constexpr Score RookOnKingRing = S( 16, 0); @@ -423,10 +422,6 @@ namespace { Bitboard queenPinners; if (pos.slider_blockers(pos.pieces(Them, ROOK, BISHOP), s, queenPinners)) score -= WeakQueen; - - // Bonus for queen on weak square in enemy camp - if (relative_rank(Us, s) > RANK_4 && (~pe->pawn_attacks_span(Them) & s)) - score += QueenInfiltration; } } if (T) From 615d98da2447e79ceceae205e0cd4e878115acc3 Mon Sep 17 00:00:00 2001 From: Stefan Geschwentner Date: Wed, 5 Aug 2020 09:29:27 +0200 Subject: [PATCH 31/86] Do move legality check before pruning. This alllows to simplify the code because the move counter haven't to be decremented later if a move isn't legal. As a side effect now illegal pruned moves doesn't included anymore in move counter. So slightly less pruning and reductions are done. STC: LLR: 2.94 (-2.94,2.94) {-1.50,0.50} Total: 111016 W: 21106 L: 21077 D: 68833 Ptnml(0-2): 1830, 13083, 25736, 12946, 1913 https://tests.stockfishchess.org/tests/view/5f28816fa5abc164f05e4c26 LTC: LLR: 2.94 (-2.94,2.94) {-1.50,0.50} Total: 39264 W: 4909 L: 4843 D: 29512 Ptnml(0-2): 263, 3601, 11854, 3635, 279 https://tests.stockfishchess.org/tests/view/5f297902a5abc164f05e4c8e closes https://github.com/official-stockfish/Stockfish/pull/2906 Bench: 4390086 --- src/search.cpp | 11 ++++------- 1 file changed, 4 insertions(+), 7 deletions(-) diff --git a/src/search.cpp b/src/search.cpp index d1dc4489..2f83f4f4 100644 --- a/src/search.cpp +++ b/src/search.cpp @@ -986,6 +986,10 @@ moves_loop: // When in check, search starts from here thisThread->rootMoves.begin() + thisThread->pvLast, move)) continue; + // Check for legality + if (!rootNode && !pos.legal(move)) + continue; + ss->moveCount = ++moveCount; if (rootNode && thisThread == Threads.main() && Time.elapsed() > 3000) @@ -1137,13 +1141,6 @@ moves_loop: // When in check, search starts from here // Speculative prefetch as early as possible prefetch(TT.first_entry(pos.key_after(move))); - // Check for legality just before making the move - if (!rootNode && !pos.legal(move)) - { - ss->moveCount = --moveCount; - continue; - } - // Update the current move (this must be done after singular extension search) ss->currentMove = move; ss->continuationHistory = &thisThread->continuationHistory[ss->inCheck] From 857e045ced9e20f202e15d825e47b3ab8241dcef Mon Sep 17 00:00:00 2001 From: Sergio Vieri Date: Fri, 7 Aug 2020 15:15:04 +0800 Subject: [PATCH 32/86] Update default net to nn-9931db908a9b.nnue Net created at 20200806-1802 passed STC: https://tests.stockfishchess.org/tests/view/5f2d00b461e3b6af64881f21 LLR: 2.94 (-2.94,2.94) {-0.50,1.50} Total: 6672 W: 1052 L: 898 D: 4722 Ptnml(0-2): 63, 600, 1868, 730, 75 passed LTC: https://tests.stockfishchess.org/tests/view/5f2d052a61e3b6af64881f29 LLR: 2.96 (-2.94,2.94) {0.25,1.75} Total: 7576 W: 573 L: 463 D: 6540 Ptnml(0-2): 8, 392, 2889, 480, 19 closes https://github.com/official-stockfish/Stockfish/pull/2923 Bench: 4390086 --- AUTHORS | 1 + src/ucioption.cpp | 2 +- 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/AUTHORS b/AUTHORS index 2e080e61..07e07297 100644 --- a/AUTHORS +++ b/AUTHORS @@ -151,6 +151,7 @@ Sami Kiminki (skiminki) Sebastian Buchwald (UniQP) Sergei Antonov (saproj) Sergei Ivanov (svivanov72) +Sergio Vieri (sergiovieri) sf-x Shane Booth (shane31) Shawn Varghese (xXH4CKST3RXx) diff --git a/src/ucioption.cpp b/src/ucioption.cpp index 788aed17..faeb78ae 100644 --- a/src/ucioption.cpp +++ b/src/ucioption.cpp @@ -79,7 +79,7 @@ void init(OptionsMap& o) { o["Syzygy50MoveRule"] << Option(true); o["SyzygyProbeLimit"] << Option(7, 0, 7); o["Use NNUE"] << Option(false, on_use_NNUE); - o["EvalFile"] << Option("nn-97f742aaefcd.nnue", on_eval_file); + o["EvalFile"] << Option("nn-9931db908a9b.nnue", on_eval_file); } From dc5af66eadf3cbe3c3ef106657e561c1aa8ac97f Mon Sep 17 00:00:00 2001 From: Unai Corzo Date: Sat, 8 Aug 2020 08:24:20 +0200 Subject: [PATCH 33/86] Tweak futility pruning depth. STC https://tests.stockfishchess.org/tests/view/5f2d237161e3b6af64881f43 LLR: 2.96 (-2.94,2.94) {-0.50,1.50} Total: 12712 W: 1823 L: 1664 D: 9225 Ptnml(0-2): 122, 1166, 3627, 1313, 128 LTC https://tests.stockfishchess.org/tests/view/5f2d473061e3b6af64881f6f LLR: 2.96 (-2.94,2.94) {0.25,1.75} Total: 12104 W: 912 L: 788 D: 10404 Ptnml(0-2): 13, 665, 4582, 769, 23 closes https://github.com/official-stockfish/Stockfish/pull/2930 bench: 4271421 --- src/search.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/search.cpp b/src/search.cpp index 2f83f4f4..886ed52c 100644 --- a/src/search.cpp +++ b/src/search.cpp @@ -816,7 +816,7 @@ namespace { // Step 8. Futility pruning: child node (~50 Elo) if ( !PvNode - && depth < 6 + && depth < 8 && eval - futility_margin(depth, improving) >= beta && eval < VALUE_KNOWN_WIN) // Do not return unproven wins return eval; From 5ccff25df2e8fcbee3d4c1428bbc101afa88e700 Mon Sep 17 00:00:00 2001 From: Lolligerhans Date: Fri, 7 Aug 2020 11:24:37 +0200 Subject: [PATCH 34/86] Expand outposts to minors shielded by pawns Allow any pawn in front of a minor piece to replace the pawn protection requirement for outposts. +-------+ +-------+ | . . o | | o . . | o Their pawns | . o x | | o . . | x Our pawns | o N . | | x o B | N,B New (reachable) outpost | . . . | | . _ . | _ Reachable square behind a pawn +-------+ +-------+ N outpost B reaches outpost We want outposts to be secured by pawns against major pieces. If a minor is shielded by any pawn from above, it is rarely at the same time protected by our pawn attacks from below. However, the pawn shield in itself offers some degree of protection. A pawn shield will now suffice to replace the pawn protection for the outpost (and reachable outpost) bonus. This effect stacks with the existing "minor behind pawn" bonus. STC https://tests.stockfishchess.org/tests/view/5f2bcd14b3ebe5cbfee85b2c LLR: 2.94 (-2.94,2.94) {-0.50,1.50} Total: 27248 W: 5353 L: 5119 D: 16776 Ptnml(0-2): 462, 3174, 6185, 3274, 529 LTC https://tests.stockfishchess.org/tests/view/5f2bfef5b3ebe5cbfee85b5a LLR: 2.96 (-2.94,2.94) {0.25,1.75} Total: 99432 W: 12580 L: 12130 D: 74722 Ptnml(0-2): 696, 8903, 30049, 9391, 677 Closes #2935 Bench: 4143673 --- src/evaluate.cpp | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/evaluate.cpp b/src/evaluate.cpp index d20c7b70..47b84ee6 100644 --- a/src/evaluate.cpp +++ b/src/evaluate.cpp @@ -345,7 +345,8 @@ namespace { { // Bonus if the piece is on an outpost square or can reach one // Reduced bonus for knights (BadOutpost) if few relevant targets - bb = OutpostRanks & attackedBy[Us][PAWN] & ~pe->pawn_attacks_span(Them); + bb = OutpostRanks & (attackedBy[Us][PAWN] | shift(pos.pieces(PAWN))) + & ~pe->pawn_attacks_span(Them); Bitboard targets = pos.pieces(Them) & ~pos.pieces(PAWN); if ( Pt == KNIGHT From f4c27cda1a6874550fcbf6cf991b0b9abe43ff39 Mon Sep 17 00:00:00 2001 From: SFisGOD Date: Sat, 8 Aug 2020 03:45:08 +0800 Subject: [PATCH 35/86] Reintroduce late irreversible move extension Reintroduce vondele's late irreversible move extension for fortress keeping. This was removed when we only had classical eval. Now that we have the NNUE net, it seems that this is useful again. STC: LLR: 2.93 (-2.94,2.94) {-0.50,1.50} Total: 5352 W: 787 L: 653 D: 3912 Ptnml(0-2): 34, 451, 1579, 571, 41 https://tests.stockfishchess.org/tests/view/5f2dc8ad61e3b6af64881ff0 LTC: LLR: 2.94 (-2.94,2.94) {0.25,1.75} Total: 14416 W: 1013 L: 891 D: 12512 Ptnml(0-2): 15, 722, 5623, 822, 26 https://tests.stockfishchess.org/tests/view/5f2e0e3661e3b6af6488201e closes https://github.com/official-stockfish/Stockfish/pull/2936 Bench: 4154696 --- src/search.cpp | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/src/search.cpp b/src/search.cpp index 886ed52c..8be96e29 100644 --- a/src/search.cpp +++ b/src/search.cpp @@ -1134,6 +1134,12 @@ moves_loop: // When in check, search starts from here // Castling extension if (type_of(move) == CASTLING) extension = 1; + + // Late irreversible move extension + if ( move == ttMove + && pos.rule50_count() > 80 + && (captureOrPromotion || type_of(movedPiece) == PAWN)) + extension = 2; // Add extension to new depth newDepth += extension; From 910f779eb1f432c3f90fc19c7824840e02cac837 Mon Sep 17 00:00:00 2001 From: Vizvezdenec Date: Sat, 8 Aug 2020 05:51:26 +0300 Subject: [PATCH 36/86] Do more futility pruning for parent nodes. This patch increases LMRdepth threshold for futility pruning at parent nodes so it can apply more often. With radical change to evaluation approach it seems that search is really far from optimal state, especially it parts that use static evaluation of position. passed STC https://tests.stockfishchess.org/tests/view/5f2da75661e3b6af64881fd0 LLR: 2.93 (-2.94,2.94) {-0.50,1.50} Total: 8744 W: 1305 L: 1156 D: 6283 Ptnml(0-2): 75, 789, 2500, 928, 80 passed LTC https://tests.stockfishchess.org/tests/view/5f2dcb2a61e3b6af64881ff3 LLR: 2.98 (-2.94,2.94) {0.25,1.75} Total: 17728 W: 1256 L: 1117 D: 15355 Ptnml(0-2): 22, 961, 6774, 1070, 37 Bench: 4067325 --- src/search.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/search.cpp b/src/search.cpp index 8be96e29..4a9bd7de 100644 --- a/src/search.cpp +++ b/src/search.cpp @@ -1028,7 +1028,7 @@ moves_loop: // When in check, search starts from here continue; // Futility pruning: parent node (~5 Elo) - if ( lmrDepth < 6 + if ( lmrDepth < 8 && !ss->inCheck && ss->staticEval + 284 + 188 * lmrDepth <= alpha && (*contHist[0])[movedPiece][to_sq(move)] From 23ecf3d5c6ffbcfbe45acd2afcf503929474a4db Mon Sep 17 00:00:00 2001 From: "U-DESKTOP-3900\\Mark" Date: Fri, 7 Aug 2020 19:53:18 -0400 Subject: [PATCH 37/86] simplified and increased threshold to switch between NNUE and classical STC https://tests.stockfishchess.org/tests/view/5f2deb1661e3b6af6488200f LLR: 2.96 (-2.94,2.94) {-1.50,0.50} Total: 10376 W: 1481 L: 1359 D: 7536 Ptnml(0-2): 91, 953, 2981, 1069, 94 LTC: https://tests.stockfishchess.org/html/live_elo.html?5f2e0a0461e3b6af64882019 LLR: 2.99 (-2.94,2.94) {-1.50,0.50} Total: 5040 W: 375 L: 315 D: 4350 Ptnml(0-2): 7, 263, 1926, 311, 13 closes https://github.com/official-stockfish/Stockfish/pull/2934 Bench: 4067325 --- src/evaluate.cpp | 7 +++---- src/search.cpp | 2 +- 2 files changed, 4 insertions(+), 5 deletions(-) diff --git a/src/evaluate.cpp b/src/evaluate.cpp index 47b84ee6..1ae6cb3a 100644 --- a/src/evaluate.cpp +++ b/src/evaluate.cpp @@ -110,7 +110,7 @@ namespace { constexpr Value LazyThreshold1 = Value(1400); constexpr Value LazyThreshold2 = Value(1300); constexpr Value SpaceThreshold = Value(12222); - constexpr Value NNUEThreshold = Value(500); + constexpr Value NNUEThreshold = Value(520); // KingAttackWeights[PieceType] contains king attack weights by piece type constexpr int KingAttackWeights[PIECE_TYPE_NB] = { 0, 0, 81, 52, 44, 10 }; @@ -939,10 +939,9 @@ Value Eval::evaluate(const Position& pos) { if (Eval::useNNUE) { - Value balance = pos.non_pawn_material(WHITE) - pos.non_pawn_material(BLACK); - balance += 200 * (pos.count(WHITE) - pos.count(BLACK)); + Value v = eg_value(pos.psq_score()); // Take NNUE eval only on balanced positions - if (abs(balance) < NNUEThreshold) + if (abs(v) < NNUEThreshold) return NNUE::evaluate(pos) + Tempo; } return Evaluation(pos).value(); diff --git a/src/search.cpp b/src/search.cpp index 4a9bd7de..4a993b01 100644 --- a/src/search.cpp +++ b/src/search.cpp @@ -1134,7 +1134,7 @@ moves_loop: // When in check, search starts from here // Castling extension if (type_of(move) == CASTLING) extension = 1; - + // Late irreversible move extension if ( move == ttMove && pos.rule50_count() > 80 From 450b60a303b0c59b0cc5dd22d95b9a983dfc4f96 Mon Sep 17 00:00:00 2001 From: mckx00 Date: Sat, 8 Aug 2020 03:07:07 -0700 Subject: [PATCH 38/86] Remove unnecessay legality check Possible after the recent reording pos.legal(move) check https://github.com/official-stockfish/Stockfish/pull/2941 No functional change. --- AUTHORS | 1 + src/search.cpp | 3 +-- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/AUTHORS b/AUTHORS index 07e07297..21ef3e50 100644 --- a/AUTHORS +++ b/AUTHORS @@ -79,6 +79,7 @@ Jean Gauthier (OuaisBla) Jean-Francois Romang (jromang) Jekaa Jerry Donald Watson (jerrydonaldwatson) +jjoshua2 Jonathan Calovski (Mysseno) Jonathan Dumale (SFisGOD) Joost VandeVondele (vondele) diff --git a/src/search.cpp b/src/search.cpp index 4a993b01..e5d18f77 100644 --- a/src/search.cpp +++ b/src/search.cpp @@ -1079,8 +1079,7 @@ moves_loop: // When in check, search starts from here /* && ttValue != VALUE_NONE Already implicit in the next condition */ && abs(ttValue) < VALUE_KNOWN_WIN && (tte->bound() & BOUND_LOWER) - && tte->depth() >= depth - 3 - && pos.legal(move)) + && tte->depth() >= depth - 3) { Value singularBeta = ttValue - ((formerPv + 4) * depth) / 2; Depth singularDepth = (depth - 1 + 3 * formerPv) / 2; From 3368d0328591b2741ca32e57cfa0a35a7144fdd1 Mon Sep 17 00:00:00 2001 From: Moez Jellouli <37274752+MJZ1977@users.noreply.github.com> Date: Sat, 8 Aug 2020 12:35:34 +0200 Subject: [PATCH 39/86] update Null Move Pruning parameters STC: https://tests.stockfishchess.org/tests/view/5f2dc38561e3b6af64881fec LLR: 2.99 (-2.94,2.94) {-0.50,1.50} Total: 6120 W: 903 L: 758 D: 4459 Ptnml(0-2): 44, 535, 1775, 644, 62 LTC: https://tests.stockfishchess.org/tests/view/5f2dd55f61e3b6af64882003 LLR: 2.95 (-2.94,2.94) {0.25,1.75} Total: 7424 W: 577 L: 463 D: 6384 Ptnml(0-2): 16, 375, 2824, 473, 24 closes https://github.com/official-stockfish/Stockfish/pull/2942 bench 4107833 --- src/search.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/search.cpp b/src/search.cpp index e5d18f77..9cdc7046 100644 --- a/src/search.cpp +++ b/src/search.cpp @@ -827,7 +827,7 @@ namespace { && (ss-1)->statScore < 23824 && eval >= beta && eval >= ss->staticEval - && ss->staticEval >= beta - 33 * depth - 33 * improving + 112 * ttPv + 311 + && ss->staticEval >= beta - 28 * depth - 28 * improving + 94 * ttPv + 200 && !excludedMove && pos.non_pawn_material(us) && (ss->ply >= thisThread->nmpMinPly || us != thisThread->nmpColor)) From e663bc533020183c0c52eaf877a91422c9c80742 Mon Sep 17 00:00:00 2001 From: Vizvezdenec Date: Sat, 8 Aug 2020 17:43:41 +0300 Subject: [PATCH 40/86] Do more aggressive futility pruning for captures This patch lines up with other patches which use better eval to produce more aggressive cutoffs based on static evaluation of position, it allows more aggressive futility pruning for captures - so now we will be producing them with bigger evaluation of position, so more often. passed STC https://tests.stockfishchess.org/tests/view/5f2da79e61e3b6af64881fd2 LLR: 3.87 (-2.94,2.94) {-0.50,1.50} Total: 27256 W: 3809 L: 3593 D: 19854 Ptnml(0-2): 221, 2578, 7830, 2762, 237 passed LTC https://tests.stockfishchess.org/tests/view/5f2df92061e3b6af64882012 LLR: 4.97 (-2.94,2.94) {0.25,1.75} Total: 43624 W: 3095 L: 2820 D: 37709 Ptnml(0-2): 66, 2410, 16608, 2639, 89 closes https://github.com/official-stockfish/Stockfish/pull/2946 Bench: 4272280 --- src/search.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/search.cpp b/src/search.cpp index 9cdc7046..201cd974 100644 --- a/src/search.cpp +++ b/src/search.cpp @@ -1055,7 +1055,7 @@ moves_loop: // When in check, search starts from here && !(PvNode && abs(bestValue) < 2) && PieceValue[MG][type_of(movedPiece)] >= PieceValue[MG][type_of(pos.piece_on(to_sq(move)))] && !ss->inCheck - && ss->staticEval + 267 + 391 * lmrDepth + && ss->staticEval + 178 + 261 * lmrDepth + PieceValue[MG][type_of(pos.piece_on(to_sq(move)))] <= alpha) continue; From 6d6267c378aa0aa354e203e5025361d9a4e0d449 Mon Sep 17 00:00:00 2001 From: Guy Vreuls Date: Sat, 8 Aug 2020 12:45:10 +0200 Subject: [PATCH 41/86] Parallelize Link Time Optimization for GCC, CLANG and MINGW This patch tries to run multiple LTO threads in parallel, speeding up the build process of optimized builds if the -j make parameter is used. This mitigates the longer linking times of optimized builds since the integration of the NNUE code. Roughly 2x build speedup. I've tried a similar patch some two years ago but it ran into trouble with old compiler versions then. Since we're on the C++17 standard now these old compilers should be obsolete. closes https://github.com/official-stockfish/Stockfish/pull/2943 No functional change. --- src/Makefile | 21 +++++++++++++++++---- 1 file changed, 17 insertions(+), 4 deletions(-) diff --git a/src/Makefile b/src/Makefile index 4741e722..cab7a7e5 100644 --- a/src/Makefile +++ b/src/Makefile @@ -282,6 +282,9 @@ ifeq ($(COMP),gcc) ifneq ($(KERNEL),Darwin) LDFLAGS += -Wl,--no-as-needed endif + + gccversion = $(shell $(CXX) --version) + gccisclang = $(findstring clang,$(gccversion)) endif ifeq ($(COMP),mingw) @@ -496,18 +499,28 @@ endif ### needs access to the optimization flags. ifeq ($(optimize),yes) ifeq ($(debug), no) - ifeq ($(comp),$(filter $(comp),gcc clang)) + ifeq ($(comp),clang) + CXXFLAGS += -flto=thin + LDFLAGS += $(CXXFLAGS) + +# GCC and CLANG use different methods for parallelizing LTO and CLANG pretends to be +# GCC on some systems. + else ifeq ($(comp),gcc) + ifeq ($(gccisclang),) CXXFLAGS += -flto + LDFLAGS += $(CXXFLAGS) -flto=jobserver + else + CXXFLAGS += -flto=thin LDFLAGS += $(CXXFLAGS) endif # To use LTO and static linking on windows, the tool chain requires a recent gcc: # gcc version 10.1 in msys2 or TDM-GCC version 9.2 are know to work, older might not. # So, only enable it for a cross from Linux by default. - ifeq ($(comp),mingw) + else ifeq ($(comp),mingw) ifeq ($(KERNEL),Linux) CXXFLAGS += -flto - LDFLAGS += $(CXXFLAGS) + LDFLAGS += $(CXXFLAGS) -flto=jobserver endif endif endif @@ -693,7 +706,7 @@ config-sanity: @test "$(comp)" = "gcc" || test "$(comp)" = "icc" || test "$(comp)" = "mingw" || test "$(comp)" = "clang" $(EXE): $(OBJS) - $(CXX) -o $@ $(OBJS) $(LDFLAGS) + +$(CXX) -o $@ $(OBJS) $(LDFLAGS) clang-profile-make: $(MAKE) ARCH=$(ARCH) COMP=$(COMP) \ From 1949eb8604853e2ad8f85400590e6a1e2ce7e451 Mon Sep 17 00:00:00 2001 From: Unai Corzo Date: Sat, 8 Aug 2020 22:03:37 +0200 Subject: [PATCH 42/86] Singular extension search tweak Tweak depth. STC https://tests.stockfishchess.org/tests/view/5f2d22ec61e3b6af64881f40 LLR: 2.94 (-2.94,2.94) {-0.50,1.50} Total: 17984 W: 2603 L: 2441 D: 12940 Ptnml(0-2): 133, 1751, 5094, 1849, 165 LTC https://tests.stockfishchess.org/tests/view/5f2d5a6a61e3b6af64881f7f LLR: 2.95 (-2.94,2.94) {0.25,1.75} Total: 85808 W: 5956 L: 5621 D: 74231 Ptnml(0-2): 149, 4748, 32785, 5063, 159 closes https://github.com/official-stockfish/Stockfish/pull/2950 fixes two README.md typos: fixes https://github.com/official-stockfish/Stockfish/issues/2932 bench: 4022669 --- README.md | 4 ++-- src/search.cpp | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/README.md b/README.md index f71a8b34..7b6ddf4c 100644 --- a/README.md +++ b/README.md @@ -58,7 +58,7 @@ Currently, Stockfish has the following UCI options: * #### Use NNUE Toggle between the NNUE and classical evaluation functions. If set to "true", - the network parameters must be availabe to load from file (see also EvalFile). + the network parameters must be available to load from file (see also EvalFile). * #### EvalFile The name of the file of the NNUE evaluation parameters. Depending on the GUI the @@ -138,7 +138,7 @@ Currently, Stockfish has the following UCI options: * #### Debug Log File Write all communication to and from the engine into a text file. -## classical and NNUE evaluation +## Classical and NNUE evaluation Both approaches assign a value to a position that is used in alpha-beta (PVS) search to find the best move. The classical evaluation computes this value as a function diff --git a/src/search.cpp b/src/search.cpp index 201cd974..37e3ff22 100644 --- a/src/search.cpp +++ b/src/search.cpp @@ -1072,7 +1072,7 @@ moves_loop: // When in check, search starts from here // then that move is singular and should be extended. To verify this we do // a reduced search on all the other moves but the ttMove and if the // result is lower than ttValue minus a margin, then we will extend the ttMove. - if ( depth >= 6 + if ( depth >= 7 && move == ttMove && !rootNode && !excludedMove // Avoid recursive singular search From add890a10b8fe03e091520cd0af7383615c6c386 Mon Sep 17 00:00:00 2001 From: Unai Corzo Date: Sat, 8 Aug 2020 22:08:40 +0200 Subject: [PATCH 43/86] LMR search tweak All credit to Vizvezdenec, the original author of the idea. STC https://tests.stockfishchess.org/tests/view/5f2d606a61e3b6af64881f88 LLR: 2.95 (-2.94,2.94) {-0.50,1.50} Total: 8440 W: 1191 L: 1048 D: 6201 Ptnml(0-2): 59, 754, 2467, 865, 75 LTC https://tests.stockfishchess.org/tests/view/5f2d84ad61e3b6af64881fbd LLR: 2.95 (-2.94,2.94) {0.25,1.75} Total: 21896 W: 1557 L: 1406 D: 18933 Ptnml(0-2): 33, 1185, 8378, 1302, 50 closes https://github.com/official-stockfish/Stockfish/pull/2951 bench: 4084753 --- src/search.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/search.cpp b/src/search.cpp index 37e3ff22..0a2519b6 100644 --- a/src/search.cpp +++ b/src/search.cpp @@ -1159,7 +1159,7 @@ moves_loop: // When in check, search starts from here // Step 16. Reduced depth search (LMR, ~200 Elo). If the move fails high it will be // re-searched at full depth. if ( depth >= 3 - && moveCount > 1 + 2 * rootNode + && moveCount > 1 + 2 * rootNode + 2 * (PvNode && abs(bestValue) < 2) && (!rootNode || thisThread->best_move_count(move) == 0) && ( !captureOrPromotion || moveCountPruning From d7a26899a973536ab9d3ce4771d8276d1a4dc55c Mon Sep 17 00:00:00 2001 From: Daniel Dugovic Date: Sat, 8 Aug 2020 15:39:29 -0500 Subject: [PATCH 44/86] Use fallback implementation for C++ aligned_alloc fixes https://github.com/official-stockfish/Stockfish/issues/2921 closes https://github.com/official-stockfish/Stockfish/pull/2927 No functional change --- src/Makefile | 4 ++-- src/misc.cpp | 8 ++++---- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/src/Makefile b/src/Makefile index cab7a7e5..b7585a17 100644 --- a/src/Makefile +++ b/src/Makefile @@ -354,8 +354,8 @@ endif endif ifeq ($(KERNEL),Darwin) - CXXFLAGS += -arch $(arch) -mmacosx-version-min=10.15 - LDFLAGS += -arch $(arch) -mmacosx-version-min=10.15 + CXXFLAGS += -arch $(arch) -mmacosx-version-min=10.13 + LDFLAGS += -arch $(arch) -mmacosx-version-min=10.13 endif ### Travis CI script uses COMPILER to overwrite CXX diff --git a/src/misc.cpp b/src/misc.cpp index 3d7c75e5..05f79b45 100644 --- a/src/misc.cpp +++ b/src/misc.cpp @@ -321,9 +321,9 @@ void prefetch(void* addr) { /// void* std_aligned_alloc(size_t alignment, size_t size) { -#if defined(__APPLE__) +#if (defined(__APPLE__) && defined(_LIBCPP_HAS_C11_FEATURES)) || defined(__ANDROID__) || defined(__OpenBSD__) || (defined(__GLIBCXX__) && !defined(_GLIBCXX_HAVE_ALIGNED_ALLOC) ) return aligned_alloc(alignment, size); -#elif defined(_WIN32) +#elif (defined(_WIN32) || (defined(__APPLE__) && !defined(_LIBCPP_HAS_C11_FEATURES))) return _mm_malloc(size, alignment); #else return std::aligned_alloc(alignment, size); @@ -331,9 +331,9 @@ void* std_aligned_alloc(size_t alignment, size_t size) { } void std_aligned_free(void* ptr) { -#if defined(__APPLE__) +#if (defined(__APPLE__) && defined(_LIBCPP_HAS_C11_FEATURES)) || defined(__ANDROID__) || defined(__OpenBSD__) || (defined(__GLIBCXX__) && !defined(_GLIBCXX_HAVE_ALIGNED_ALLOC) ) free(ptr); -#elif defined(_WIN32) +#elif (defined(_WIN32) || (defined(__APPLE__) && !defined(_LIBCPP_HAS_C11_FEATURES))) _mm_free(ptr); #else free(ptr); From 320fa1b2f082a7db67363e468e7e241d7cedcc64 Mon Sep 17 00:00:00 2001 From: Joost VandeVondele Date: Sun, 9 Aug 2020 11:05:07 +0200 Subject: [PATCH 45/86] Improve error message on missing net. small rewording, but also print the download url for the default net. closes https://github.com/official-stockfish/Stockfish/pull/2954 No functional change --- src/evaluate.cpp | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/src/evaluate.cpp b/src/evaluate.cpp index 1ae6cb3a..a642357e 100644 --- a/src/evaluate.cpp +++ b/src/evaluate.cpp @@ -50,9 +50,13 @@ namespace Eval { std::string eval_file = std::string(Options["EvalFile"]); if (useNNUE && eval_file_loaded != eval_file) { - std::cerr << "Use of NNUE evaluation, but the file " << eval_file << " was not loaded successfully. " - << "These network evaluation parameters must be available, compatible with this version of the code. " - << "The UCI option EvalFile might need to specify the full path, including the directory/folder name, to the file." << std::endl; + UCI::OptionsMap defaults; + UCI::init(defaults); + + std::cerr << "NNUE evaluation used, but the network file " << eval_file << " was not loaded successfully. " + << "These network evaluation parameters must be available, and compatible with this version of the code. " + << "The UCI option EvalFile might need to specify the full path, including the directory/folder name, to the file. " + << "The default net can be downloaded from: https://tests.stockfishchess.org/api/nn/"+std::string(defaults["EvalFile"]) << std::endl; std::exit(EXIT_FAILURE); } From cd1bb27dd452f336d434a45131bfbe43f8a8c5b3 Mon Sep 17 00:00:00 2001 From: Joost VandeVondele Date: Sun, 9 Aug 2020 19:08:47 +0200 Subject: [PATCH 46/86] Fix aligned_alloc on MinGW introduced with d7a26899a973536ab9d3ce4771d8276d1a4dc55c closes https://github.com/official-stockfish/Stockfish/pull/2959 No functional change. --- src/misc.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/misc.cpp b/src/misc.cpp index 05f79b45..bdd7bccb 100644 --- a/src/misc.cpp +++ b/src/misc.cpp @@ -321,7 +321,7 @@ void prefetch(void* addr) { /// void* std_aligned_alloc(size_t alignment, size_t size) { -#if (defined(__APPLE__) && defined(_LIBCPP_HAS_C11_FEATURES)) || defined(__ANDROID__) || defined(__OpenBSD__) || (defined(__GLIBCXX__) && !defined(_GLIBCXX_HAVE_ALIGNED_ALLOC) ) +#if (defined(__APPLE__) && defined(_LIBCPP_HAS_C11_FEATURES)) || defined(__ANDROID__) || defined(__OpenBSD__) || (defined(__GLIBCXX__) && !defined(_GLIBCXX_HAVE_ALIGNED_ALLOC) && !defined(_WIN32)) return aligned_alloc(alignment, size); #elif (defined(_WIN32) || (defined(__APPLE__) && !defined(_LIBCPP_HAS_C11_FEATURES))) return _mm_malloc(size, alignment); @@ -331,7 +331,7 @@ void* std_aligned_alloc(size_t alignment, size_t size) { } void std_aligned_free(void* ptr) { -#if (defined(__APPLE__) && defined(_LIBCPP_HAS_C11_FEATURES)) || defined(__ANDROID__) || defined(__OpenBSD__) || (defined(__GLIBCXX__) && !defined(_GLIBCXX_HAVE_ALIGNED_ALLOC) ) +#if (defined(__APPLE__) && defined(_LIBCPP_HAS_C11_FEATURES)) || defined(__ANDROID__) || defined(__OpenBSD__) || (defined(__GLIBCXX__) && !defined(_GLIBCXX_HAVE_ALIGNED_ALLOC) && !defined(_WIN32)) free(ptr); #elif (defined(_WIN32) || (defined(__APPLE__) && !defined(_LIBCPP_HAS_C11_FEATURES))) _mm_free(ptr); From 2bfde5542919c2ed624b5b62883616e325ccb942 Mon Sep 17 00:00:00 2001 From: Vizvezdenec Date: Sun, 9 Aug 2020 21:39:46 +0300 Subject: [PATCH 47/86] Adjust NNUE usage based on number of pawns in position The idea of this patch is that positions are usually more complex and hard to evaluate even if there are more pawns. This patch adjusts NNUE threshold usage depending on number of pawns in position, if pawn count is <3 we use the classical evaluation more often, for pawn count = 3 patch the is non-functional, with pawn count > 3 NNUE evaluation is used more often. passed STC https://tests.stockfishchess.org/tests/view/5f2f02d09081672066536b1f LLR: 2.96 (-2.94,2.94) {-0.50,1.50} Total: 36520 W: 5011 L: 4823 D: 26686 Ptnml(0-2): 299, 3482, 10548, 3594, 337 passed LTC https://tests.stockfishchess.org/tests/view/5f2f4c329081672066536b5c LLR: 2.98 (-2.94,2.94) {0.25,1.75} Total: 39272 W: 2630 L: 2433 D: 34209 Ptnml(0-2): 53, 2066, 15218, 2229, 70 closes https://github.com/official-stockfish/Stockfish/pull/2960 bench 4084753 --- src/evaluate.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/evaluate.cpp b/src/evaluate.cpp index a642357e..ce35c630 100644 --- a/src/evaluate.cpp +++ b/src/evaluate.cpp @@ -114,7 +114,7 @@ namespace { constexpr Value LazyThreshold1 = Value(1400); constexpr Value LazyThreshold2 = Value(1300); constexpr Value SpaceThreshold = Value(12222); - constexpr Value NNUEThreshold = Value(520); + constexpr Value NNUEThreshold = Value(460); // KingAttackWeights[PieceType] contains king attack weights by piece type constexpr int KingAttackWeights[PIECE_TYPE_NB] = { 0, 0, 81, 52, 44, 10 }; @@ -945,7 +945,7 @@ Value Eval::evaluate(const Position& pos) { { Value v = eg_value(pos.psq_score()); // Take NNUE eval only on balanced positions - if (abs(v) < NNUEThreshold) + if (abs(v) < NNUEThreshold + 20 * pos.count()) return NNUE::evaluate(pos) + Tempo; } return Evaluation(pos).value(); From a6e89293df5af35931b61d86b6de3872a981c100 Mon Sep 17 00:00:00 2001 From: Dariusz Orzechowski Date: Sun, 9 Aug 2020 14:32:24 -0700 Subject: [PATCH 48/86] Avoid special casing for MinGW after some testing, no version of MinGW/gcc has been found where this code is still necessary. Probably older code (pre-c++17?) closes https://github.com/official-stockfish/Stockfish/pull/2891 No functional change --- src/nnue/layers/affine_transform.h | 29 +++-------------- src/nnue/layers/clipped_relu.h | 49 ++++------------------------- src/nnue/nnue_feature_transformer.h | 34 ++------------------ 3 files changed, 14 insertions(+), 98 deletions(-) diff --git a/src/nnue/layers/affine_transform.h b/src/nnue/layers/affine_transform.h index b585bc87..ecc3008a 100644 --- a/src/nnue/layers/affine_transform.h +++ b/src/nnue/layers/affine_transform.h @@ -104,13 +104,8 @@ namespace Eval::NNUE::Layers { __m512i sum = _mm512_setzero_si512(); const auto row = reinterpret_cast(&weights_[offset]); for (IndexType j = 0; j < kNumChunks; ++j) { - - #if defined(__MINGW32__) || defined(__MINGW64__) - __m512i product = _mm512_maddubs_epi16(_mm512_loadu_si512(&input_vector[j]), _mm512_load_si512(&row[j])); - #else - __m512i product = _mm512_maddubs_epi16(_mm512_load_si512(&input_vector[j]), _mm512_load_si512(&row[j])); - #endif - + __m512i product = _mm512_maddubs_epi16( + _mm512_load_si512(&input_vector[j]), _mm512_load_si512(&row[j])); product = _mm512_madd_epi16(product, kOnes); sum = _mm512_add_epi32(sum, product); } @@ -125,12 +120,8 @@ namespace Eval::NNUE::Layers { const auto row_256 = reinterpret_cast(&weights_[offset]); int j = kNumChunks * 2; - #if defined(__MINGW32__) || defined(__MINGW64__) // See HACK comment below in AVX2. - __m256i sum256 = _mm256_maddubs_epi16(_mm256_loadu_si256(&iv_256[j]), _mm256_load_si256(&row_256[j])); - #else - __m256i sum256 = _mm256_maddubs_epi16(_mm256_load_si256(&iv_256[j]), _mm256_load_si256(&row_256[j])); - #endif - + __m256i sum256 = _mm256_maddubs_epi16( + _mm256_load_si256(&iv_256[j]), _mm256_load_si256(&row_256[j])); sum256 = _mm256_madd_epi16(sum256, _mm256_set1_epi16(1)); sum256 = _mm256_hadd_epi32(sum256, sum256); sum256 = _mm256_hadd_epi32(sum256, sum256); @@ -144,17 +135,7 @@ namespace Eval::NNUE::Layers { const auto row = reinterpret_cast(&weights_[offset]); for (IndexType j = 0; j < kNumChunks; ++j) { __m256i product = _mm256_maddubs_epi16( - - #if defined(__MINGW32__) || defined(__MINGW64__) - // HACK: Use _mm256_loadu_si256() instead of _mm256_load_si256. Because the binary - // compiled with g++ in MSYS2 crashes here because the output memory is not aligned - // even though alignas is specified. - _mm256_loadu_si256 - #else - _mm256_load_si256 - #endif - - (&input_vector[j]), _mm256_load_si256(&row[j])); + _mm256_load_si256(&input_vector[j]), _mm256_load_si256(&row[j])); product = _mm256_madd_epi16(product, kOnes); sum = _mm256_add_epi32(sum, product); } diff --git a/src/nnue/layers/clipped_relu.h b/src/nnue/layers/clipped_relu.h index 7ade598f..7e5fcf4a 100644 --- a/src/nnue/layers/clipped_relu.h +++ b/src/nnue/layers/clipped_relu.h @@ -74,50 +74,13 @@ namespace Eval::NNUE::Layers { const auto out = reinterpret_cast<__m256i*>(output); for (IndexType i = 0; i < kNumChunks; ++i) { const __m256i words0 = _mm256_srai_epi16(_mm256_packs_epi32( - - #if defined(__MINGW32__) || defined(__MINGW64__) - // HACK: Use _mm256_loadu_si256() instead of _mm256_load_si256. Because the binary - // compiled with g++ in MSYS2 crashes here because the output memory is not aligned - // even though alignas is specified. - _mm256_loadu_si256 - #else - _mm256_load_si256 - #endif - - (&in[i * 4 + 0]), - - #if defined(__MINGW32__) || defined(__MINGW64__) - _mm256_loadu_si256 - #else - _mm256_load_si256 - #endif - - (&in[i * 4 + 1])), kWeightScaleBits); + _mm256_load_si256(&in[i * 4 + 0]), + _mm256_load_si256(&in[i * 4 + 1])), kWeightScaleBits); const __m256i words1 = _mm256_srai_epi16(_mm256_packs_epi32( - - #if defined(__MINGW32__) || defined(__MINGW64__) - _mm256_loadu_si256 - #else - _mm256_load_si256 - #endif - - (&in[i * 4 + 2]), - - #if defined(__MINGW32__) || defined(__MINGW64__) - _mm256_loadu_si256 - #else - _mm256_load_si256 - #endif - - (&in[i * 4 + 3])), kWeightScaleBits); - - #if defined(__MINGW32__) || defined(__MINGW64__) - _mm256_storeu_si256 - #else - _mm256_store_si256 - #endif - - (&out[i], _mm256_permutevar8x32_epi32(_mm256_max_epi8( + _mm256_load_si256(&in[i * 4 + 2]), + _mm256_load_si256(&in[i * 4 + 3])), kWeightScaleBits); + _mm256_store_si256( + &out[i], _mm256_permutevar8x32_epi32(_mm256_max_epi8( _mm256_packs_epi16(words0, words1), kZero), kOffsets)); } constexpr IndexType kStart = kNumChunks * kSimdWidth; diff --git a/src/nnue/nnue_feature_transformer.h b/src/nnue/nnue_feature_transformer.h index 1cfebbe4..f899d761 100644 --- a/src/nnue/nnue_feature_transformer.h +++ b/src/nnue/nnue_feature_transformer.h @@ -110,36 +110,12 @@ namespace Eval::NNUE { auto out = reinterpret_cast<__m256i*>(&output[offset]); for (IndexType j = 0; j < kNumChunks; ++j) { __m256i sum0 = - - #if defined(__MINGW32__) || defined(__MINGW64__) - // HACK: Use _mm256_loadu_si256() instead of _mm256_load_si256. Because the binary - // compiled with g++ in MSYS2 crashes here because the output memory is not aligned - // even though alignas is specified. - _mm256_loadu_si256 - #else - _mm256_load_si256 - #endif - - (&reinterpret_cast( + _mm256_load_si256(&reinterpret_cast( accumulation[perspectives[p]][0])[j * 2 + 0]); __m256i sum1 = - - #if defined(__MINGW32__) || defined(__MINGW64__) - _mm256_loadu_si256 - #else - _mm256_load_si256 - #endif - - (&reinterpret_cast( + _mm256_load_si256(&reinterpret_cast( accumulation[perspectives[p]][0])[j * 2 + 1]); - - #if defined(__MINGW32__) || defined(__MINGW64__) - _mm256_storeu_si256 - #else - _mm256_store_si256 - #endif - - (&out[j], _mm256_permute4x64_epi64(_mm256_max_epi8( + _mm256_store_si256(&out[j], _mm256_permute4x64_epi64(_mm256_max_epi8( _mm256_packs_epi16(sum0, sum1), kZero), kControl)); } @@ -202,11 +178,7 @@ namespace Eval::NNUE { auto column = reinterpret_cast(&weights_[offset]); constexpr IndexType kNumChunks = kHalfDimensions / (kSimdWidth / 2); for (IndexType j = 0; j < kNumChunks; ++j) { - #if defined(__MINGW32__) || defined(__MINGW64__) - _mm256_storeu_si256(&accumulation[j], _mm256_add_epi16(_mm256_loadu_si256(&accumulation[j]), column[j])); - #else accumulation[j] = _mm256_add_epi16(accumulation[j], column[j]); - #endif } #elif defined(USE_SSE2) From 27b593a94477a821f80a041320683f805114d4a3 Mon Sep 17 00:00:00 2001 From: Joost VandeVondele Date: Sun, 9 Aug 2020 18:11:38 +0200 Subject: [PATCH 49/86] Fix a data race for NNUE the stateInfo at the rootPos is no longer read-only, as the NNUE accumulator is part of it. Threads can thus not share this object and need their own copy. tested for no regression https://tests.stockfishchess.org/tests/view/5f3022239081672066536bce LLR: 2.96 (-2.94,2.94) {-1.50,0.50} Total: 52800 W: 6843 L: 6802 D: 39155 Ptnml(0-2): 336, 4646, 16399, 4679, 340 closes https://github.com/official-stockfish/Stockfish/pull/2957 fixes https://github.com/official-stockfish/Stockfish/issues/2933 No functional change --- src/Makefile | 4 ++-- src/thread.cpp | 13 +++++-------- src/thread.h | 1 + 3 files changed, 8 insertions(+), 10 deletions(-) diff --git a/src/Makefile b/src/Makefile index b7585a17..571172b2 100644 --- a/src/Makefile +++ b/src/Makefile @@ -354,8 +354,8 @@ endif endif ifeq ($(KERNEL),Darwin) - CXXFLAGS += -arch $(arch) -mmacosx-version-min=10.13 - LDFLAGS += -arch $(arch) -mmacosx-version-min=10.13 + CXXFLAGS += -arch $(arch) -mmacosx-version-min=10.14 + LDFLAGS += -arch $(arch) -mmacosx-version-min=10.14 endif ### Travis CI script uses COMPILER to overwrite CXX diff --git a/src/thread.cpp b/src/thread.cpp index 44aea14e..1aa66a81 100644 --- a/src/thread.cpp +++ b/src/thread.cpp @@ -204,21 +204,18 @@ void ThreadPool::start_thinking(Position& pos, StateListPtr& states, // We use Position::set() to set root position across threads. But there are // some StateInfo fields (previous, pliesFromNull, capturedPiece) that cannot - // be deduced from a fen string, so set() clears them and to not lose the info - // we need to backup and later restore setupStates->back(). Note that setupStates - // is shared by threads but is accessed in read-only mode. - StateInfo tmp = setupStates->back(); - + // be deduced from a fen string, so set() clears them and they are set from + // setupStates->back() later. The rootState is per thread, earlier states are shared + // since they are read-only. for (Thread* th : *this) { th->nodes = th->tbHits = th->nmpMinPly = th->bestMoveChanges = 0; th->rootDepth = th->completedDepth = 0; th->rootMoves = rootMoves; - th->rootPos.set(pos.fen(), pos.is_chess960(), &setupStates->back(), th); + th->rootPos.set(pos.fen(), pos.is_chess960(), &th->rootState, th); + th->rootState = setupStates->back(); } - setupStates->back() = tmp; - main()->start_searching(); } diff --git a/src/thread.h b/src/thread.h index 46da1e34..042bc2e9 100644 --- a/src/thread.h +++ b/src/thread.h @@ -65,6 +65,7 @@ public: std::atomic nodes, tbHits, bestMoveChanges; Position rootPos; + StateInfo rootState; Search::RootMoves rootMoves; Depth rootDepth, completedDepth; CounterMoveHistory counterMoves; From 651ec3b31ee68db50f38ccd8fcdedbd6673cd9ed Mon Sep 17 00:00:00 2001 From: Joost VandeVondele Date: Mon, 10 Aug 2020 07:18:15 +0200 Subject: [PATCH 50/86] Revert "Avoid special casing for MinGW" This reverts commit a6e89293df5af35931b61d86b6de3872a981c100. The offending setup has been found as gcc/mingw 7.3 (on Ubuntu 18.04). fixes https://github.com/official-stockfish/Stockfish/issues/2963 closes https://github.com/official-stockfish/Stockfish/issues/2968 No functional change. --- src/nnue/layers/affine_transform.h | 29 ++++++++++++++--- src/nnue/layers/clipped_relu.h | 49 +++++++++++++++++++++++++---- src/nnue/nnue_feature_transformer.h | 34 ++++++++++++++++++-- 3 files changed, 98 insertions(+), 14 deletions(-) diff --git a/src/nnue/layers/affine_transform.h b/src/nnue/layers/affine_transform.h index ecc3008a..b585bc87 100644 --- a/src/nnue/layers/affine_transform.h +++ b/src/nnue/layers/affine_transform.h @@ -104,8 +104,13 @@ namespace Eval::NNUE::Layers { __m512i sum = _mm512_setzero_si512(); const auto row = reinterpret_cast(&weights_[offset]); for (IndexType j = 0; j < kNumChunks; ++j) { - __m512i product = _mm512_maddubs_epi16( - _mm512_load_si512(&input_vector[j]), _mm512_load_si512(&row[j])); + + #if defined(__MINGW32__) || defined(__MINGW64__) + __m512i product = _mm512_maddubs_epi16(_mm512_loadu_si512(&input_vector[j]), _mm512_load_si512(&row[j])); + #else + __m512i product = _mm512_maddubs_epi16(_mm512_load_si512(&input_vector[j]), _mm512_load_si512(&row[j])); + #endif + product = _mm512_madd_epi16(product, kOnes); sum = _mm512_add_epi32(sum, product); } @@ -120,8 +125,12 @@ namespace Eval::NNUE::Layers { const auto row_256 = reinterpret_cast(&weights_[offset]); int j = kNumChunks * 2; - __m256i sum256 = _mm256_maddubs_epi16( - _mm256_load_si256(&iv_256[j]), _mm256_load_si256(&row_256[j])); + #if defined(__MINGW32__) || defined(__MINGW64__) // See HACK comment below in AVX2. + __m256i sum256 = _mm256_maddubs_epi16(_mm256_loadu_si256(&iv_256[j]), _mm256_load_si256(&row_256[j])); + #else + __m256i sum256 = _mm256_maddubs_epi16(_mm256_load_si256(&iv_256[j]), _mm256_load_si256(&row_256[j])); + #endif + sum256 = _mm256_madd_epi16(sum256, _mm256_set1_epi16(1)); sum256 = _mm256_hadd_epi32(sum256, sum256); sum256 = _mm256_hadd_epi32(sum256, sum256); @@ -135,7 +144,17 @@ namespace Eval::NNUE::Layers { const auto row = reinterpret_cast(&weights_[offset]); for (IndexType j = 0; j < kNumChunks; ++j) { __m256i product = _mm256_maddubs_epi16( - _mm256_load_si256(&input_vector[j]), _mm256_load_si256(&row[j])); + + #if defined(__MINGW32__) || defined(__MINGW64__) + // HACK: Use _mm256_loadu_si256() instead of _mm256_load_si256. Because the binary + // compiled with g++ in MSYS2 crashes here because the output memory is not aligned + // even though alignas is specified. + _mm256_loadu_si256 + #else + _mm256_load_si256 + #endif + + (&input_vector[j]), _mm256_load_si256(&row[j])); product = _mm256_madd_epi16(product, kOnes); sum = _mm256_add_epi32(sum, product); } diff --git a/src/nnue/layers/clipped_relu.h b/src/nnue/layers/clipped_relu.h index 7e5fcf4a..7ade598f 100644 --- a/src/nnue/layers/clipped_relu.h +++ b/src/nnue/layers/clipped_relu.h @@ -74,13 +74,50 @@ namespace Eval::NNUE::Layers { const auto out = reinterpret_cast<__m256i*>(output); for (IndexType i = 0; i < kNumChunks; ++i) { const __m256i words0 = _mm256_srai_epi16(_mm256_packs_epi32( - _mm256_load_si256(&in[i * 4 + 0]), - _mm256_load_si256(&in[i * 4 + 1])), kWeightScaleBits); + + #if defined(__MINGW32__) || defined(__MINGW64__) + // HACK: Use _mm256_loadu_si256() instead of _mm256_load_si256. Because the binary + // compiled with g++ in MSYS2 crashes here because the output memory is not aligned + // even though alignas is specified. + _mm256_loadu_si256 + #else + _mm256_load_si256 + #endif + + (&in[i * 4 + 0]), + + #if defined(__MINGW32__) || defined(__MINGW64__) + _mm256_loadu_si256 + #else + _mm256_load_si256 + #endif + + (&in[i * 4 + 1])), kWeightScaleBits); const __m256i words1 = _mm256_srai_epi16(_mm256_packs_epi32( - _mm256_load_si256(&in[i * 4 + 2]), - _mm256_load_si256(&in[i * 4 + 3])), kWeightScaleBits); - _mm256_store_si256( - &out[i], _mm256_permutevar8x32_epi32(_mm256_max_epi8( + + #if defined(__MINGW32__) || defined(__MINGW64__) + _mm256_loadu_si256 + #else + _mm256_load_si256 + #endif + + (&in[i * 4 + 2]), + + #if defined(__MINGW32__) || defined(__MINGW64__) + _mm256_loadu_si256 + #else + _mm256_load_si256 + #endif + + (&in[i * 4 + 3])), kWeightScaleBits); + + #if defined(__MINGW32__) || defined(__MINGW64__) + _mm256_storeu_si256 + #else + _mm256_store_si256 + #endif + + (&out[i], _mm256_permutevar8x32_epi32(_mm256_max_epi8( _mm256_packs_epi16(words0, words1), kZero), kOffsets)); } constexpr IndexType kStart = kNumChunks * kSimdWidth; diff --git a/src/nnue/nnue_feature_transformer.h b/src/nnue/nnue_feature_transformer.h index f899d761..1cfebbe4 100644 --- a/src/nnue/nnue_feature_transformer.h +++ b/src/nnue/nnue_feature_transformer.h @@ -110,12 +110,36 @@ namespace Eval::NNUE { auto out = reinterpret_cast<__m256i*>(&output[offset]); for (IndexType j = 0; j < kNumChunks; ++j) { __m256i sum0 = - _mm256_load_si256(&reinterpret_cast( + + #if defined(__MINGW32__) || defined(__MINGW64__) + // HACK: Use _mm256_loadu_si256() instead of _mm256_load_si256. Because the binary + // compiled with g++ in MSYS2 crashes here because the output memory is not aligned + // even though alignas is specified. + _mm256_loadu_si256 + #else + _mm256_load_si256 + #endif + + (&reinterpret_cast( accumulation[perspectives[p]][0])[j * 2 + 0]); __m256i sum1 = - _mm256_load_si256(&reinterpret_cast( + + #if defined(__MINGW32__) || defined(__MINGW64__) + _mm256_loadu_si256 + #else + _mm256_load_si256 + #endif + + (&reinterpret_cast( accumulation[perspectives[p]][0])[j * 2 + 1]); - _mm256_store_si256(&out[j], _mm256_permute4x64_epi64(_mm256_max_epi8( + + #if defined(__MINGW32__) || defined(__MINGW64__) + _mm256_storeu_si256 + #else + _mm256_store_si256 + #endif + + (&out[j], _mm256_permute4x64_epi64(_mm256_max_epi8( _mm256_packs_epi16(sum0, sum1), kZero), kControl)); } @@ -178,7 +202,11 @@ namespace Eval::NNUE { auto column = reinterpret_cast(&weights_[offset]); constexpr IndexType kNumChunks = kHalfDimensions / (kSimdWidth / 2); for (IndexType j = 0; j < kNumChunks; ++j) { + #if defined(__MINGW32__) || defined(__MINGW64__) + _mm256_storeu_si256(&accumulation[j], _mm256_add_epi16(_mm256_loadu_si256(&accumulation[j]), column[j])); + #else accumulation[j] = _mm256_add_epi16(accumulation[j], column[j]); + #endif } #elif defined(USE_SSE2) From bcdf41dadc8a5f8a23116236a0f449a08b46dc6b Mon Sep 17 00:00:00 2001 From: Sergio Vieri Date: Mon, 10 Aug 2020 08:47:52 +0800 Subject: [PATCH 51/86] Update default net to nn-112bb1c8cdb5.nnue First trained net using search eval instead of pv leaf static eval. Net created at: 20200810-0744 passed STC: https://tests.stockfishchess.org/tests/view/5f30995d90816720665373f8 LLR: 2.93 (-2.94,2.94) {-0.50,1.50} Total: 15416 W: 2071 L: 1920 D: 11425 Ptnml(0-2): 123, 1376, 4563, 1519, 127 passed LTC: https://tests.stockfishchess.org/tests/view/5f30a104908167206653742b LLR: 2.93 (-2.94,2.94) {0.25,1.75} Total: 29792 W: 2003 L: 1834 D: 25955 Ptnml(0-2): 50, 1541, 11550, 1700, 55 closes https://github.com/official-stockfish/Stockfish/pull/2966 Bench: 4084753 --- src/ucioption.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/ucioption.cpp b/src/ucioption.cpp index faeb78ae..b0689d6d 100644 --- a/src/ucioption.cpp +++ b/src/ucioption.cpp @@ -79,7 +79,7 @@ void init(OptionsMap& o) { o["Syzygy50MoveRule"] << Option(true); o["SyzygyProbeLimit"] << Option(7, 0, 7); o["Use NNUE"] << Option(false, on_use_NNUE); - o["EvalFile"] << Option("nn-9931db908a9b.nnue", on_eval_file); + o["EvalFile"] << Option("nn-112bb1c8cdb5.nnue", on_eval_file); } From a54f9011c3bf3581fe7daffef6be2d586e6662c1 Mon Sep 17 00:00:00 2001 From: jjoshua2 Date: Sun, 9 Aug 2020 16:16:04 -0400 Subject: [PATCH 52/86] simplying hybrid condition STC https://tests.stockfishchess.org/tests/view/5f3059d1908167206653736b: LLR: 2.94 (-2.94,2.94) {-1.50,0.50} Total: 12520 W: 766 L: 727 D: 11027 Ptnml(0-2): 13, 624, 4949, 659, 15 LTC: https://tests.stockfishchess.org/tests/view/5f30863a90816720665373d1 LLR: 2.94 (-2.94,2.94) {-1.50,0.50} Total: 12520 W: 766 L: 727 D: 11027 Ptnml(0-2): 13, 624, 4949, 659, 15 closes: https://github.com/official-stockfish/Stockfish/pull/2965 Bench: 4084753 --- src/evaluate.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/evaluate.cpp b/src/evaluate.cpp index ce35c630..caab2979 100644 --- a/src/evaluate.cpp +++ b/src/evaluate.cpp @@ -114,7 +114,7 @@ namespace { constexpr Value LazyThreshold1 = Value(1400); constexpr Value LazyThreshold2 = Value(1300); constexpr Value SpaceThreshold = Value(12222); - constexpr Value NNUEThreshold = Value(460); + constexpr Value NNUEThreshold = Value(575); // KingAttackWeights[PieceType] contains king attack weights by piece type constexpr int KingAttackWeights[PIECE_TYPE_NB] = { 0, 0, 81, 52, 44, 10 }; @@ -945,7 +945,7 @@ Value Eval::evaluate(const Position& pos) { { Value v = eg_value(pos.psq_score()); // Take NNUE eval only on balanced positions - if (abs(v) < NNUEThreshold + 20 * pos.count()) + if (abs(v) < NNUEThreshold) return NNUE::evaluate(pos) + Tempo; } return Evaluation(pos).value(); From 875183b310a8249922c2155e82cb4cecfae2097e Mon Sep 17 00:00:00 2001 From: mstembera Date: Sun, 9 Aug 2020 23:50:59 -0700 Subject: [PATCH 53/86] Workaround using unaligned loads for gcc < 9 despite usage of alignas, the generated (avx2/avx512) code with older compilers needs to use unaligned loads with older gcc (e.g. confirmed crash with gcc 7.3/mingw on abrok). Better performance thus requires gcc >= 9 on hardware supporting avx2/avx512 closes https://github.com/official-stockfish/Stockfish/pull/2969 No functional change --- src/nnue/layers/affine_transform.h | 32 +++---------------- src/nnue/layers/clipped_relu.h | 48 +++-------------------------- src/nnue/nnue_common.h | 21 +++++++++++++ src/nnue/nnue_feature_transformer.h | 42 ++++--------------------- 4 files changed, 36 insertions(+), 107 deletions(-) diff --git a/src/nnue/layers/affine_transform.h b/src/nnue/layers/affine_transform.h index b585bc87..20ec2f12 100644 --- a/src/nnue/layers/affine_transform.h +++ b/src/nnue/layers/affine_transform.h @@ -104,13 +104,7 @@ namespace Eval::NNUE::Layers { __m512i sum = _mm512_setzero_si512(); const auto row = reinterpret_cast(&weights_[offset]); for (IndexType j = 0; j < kNumChunks; ++j) { - - #if defined(__MINGW32__) || defined(__MINGW64__) - __m512i product = _mm512_maddubs_epi16(_mm512_loadu_si512(&input_vector[j]), _mm512_load_si512(&row[j])); - #else - __m512i product = _mm512_maddubs_epi16(_mm512_load_si512(&input_vector[j]), _mm512_load_si512(&row[j])); - #endif - + __m512i product = _mm512_maddubs_epi16(_mm512_loadA_si512(&input_vector[j]), _mm512_load_si512(&row[j])); product = _mm512_madd_epi16(product, kOnes); sum = _mm512_add_epi32(sum, product); } @@ -124,13 +118,7 @@ namespace Eval::NNUE::Layers { const auto iv_256 = reinterpret_cast(input); const auto row_256 = reinterpret_cast(&weights_[offset]); int j = kNumChunks * 2; - - #if defined(__MINGW32__) || defined(__MINGW64__) // See HACK comment below in AVX2. - __m256i sum256 = _mm256_maddubs_epi16(_mm256_loadu_si256(&iv_256[j]), _mm256_load_si256(&row_256[j])); - #else - __m256i sum256 = _mm256_maddubs_epi16(_mm256_load_si256(&iv_256[j]), _mm256_load_si256(&row_256[j])); - #endif - + __m256i sum256 = _mm256_maddubs_epi16(_mm256_loadA_si256(&iv_256[j]), _mm256_load_si256(&row_256[j])); sum256 = _mm256_madd_epi16(sum256, _mm256_set1_epi16(1)); sum256 = _mm256_hadd_epi32(sum256, sum256); sum256 = _mm256_hadd_epi32(sum256, sum256); @@ -143,18 +131,7 @@ namespace Eval::NNUE::Layers { __m256i sum = _mm256_setzero_si256(); const auto row = reinterpret_cast(&weights_[offset]); for (IndexType j = 0; j < kNumChunks; ++j) { - __m256i product = _mm256_maddubs_epi16( - - #if defined(__MINGW32__) || defined(__MINGW64__) - // HACK: Use _mm256_loadu_si256() instead of _mm256_load_si256. Because the binary - // compiled with g++ in MSYS2 crashes here because the output memory is not aligned - // even though alignas is specified. - _mm256_loadu_si256 - #else - _mm256_load_si256 - #endif - - (&input_vector[j]), _mm256_load_si256(&row[j])); + __m256i product = _mm256_maddubs_epi16(_mm256_loadA_si256(&input_vector[j]), _mm256_load_si256(&row[j])); product = _mm256_madd_epi16(product, kOnes); sum = _mm256_add_epi32(sum, product); } @@ -168,8 +145,7 @@ namespace Eval::NNUE::Layers { __m128i sum = _mm_cvtsi32_si128(biases_[i]); const auto row = reinterpret_cast(&weights_[offset]); for (IndexType j = 0; j < kNumChunks; ++j) { - __m128i product = _mm_maddubs_epi16( - _mm_load_si128(&input_vector[j]), _mm_load_si128(&row[j])); + __m128i product = _mm_maddubs_epi16(_mm_load_si128(&input_vector[j]), _mm_load_si128(&row[j])); product = _mm_madd_epi16(product, kOnes); sum = _mm_add_epi32(sum, product); } diff --git a/src/nnue/layers/clipped_relu.h b/src/nnue/layers/clipped_relu.h index 7ade598f..13196ec2 100644 --- a/src/nnue/layers/clipped_relu.h +++ b/src/nnue/layers/clipped_relu.h @@ -74,50 +74,12 @@ namespace Eval::NNUE::Layers { const auto out = reinterpret_cast<__m256i*>(output); for (IndexType i = 0; i < kNumChunks; ++i) { const __m256i words0 = _mm256_srai_epi16(_mm256_packs_epi32( - - #if defined(__MINGW32__) || defined(__MINGW64__) - // HACK: Use _mm256_loadu_si256() instead of _mm256_load_si256. Because the binary - // compiled with g++ in MSYS2 crashes here because the output memory is not aligned - // even though alignas is specified. - _mm256_loadu_si256 - #else - _mm256_load_si256 - #endif - - (&in[i * 4 + 0]), - - #if defined(__MINGW32__) || defined(__MINGW64__) - _mm256_loadu_si256 - #else - _mm256_load_si256 - #endif - - (&in[i * 4 + 1])), kWeightScaleBits); + _mm256_loadA_si256(&in[i * 4 + 0]), + _mm256_loadA_si256(&in[i * 4 + 1])), kWeightScaleBits); const __m256i words1 = _mm256_srai_epi16(_mm256_packs_epi32( - - #if defined(__MINGW32__) || defined(__MINGW64__) - _mm256_loadu_si256 - #else - _mm256_load_si256 - #endif - - (&in[i * 4 + 2]), - - #if defined(__MINGW32__) || defined(__MINGW64__) - _mm256_loadu_si256 - #else - _mm256_load_si256 - #endif - - (&in[i * 4 + 3])), kWeightScaleBits); - - #if defined(__MINGW32__) || defined(__MINGW64__) - _mm256_storeu_si256 - #else - _mm256_store_si256 - #endif - - (&out[i], _mm256_permutevar8x32_epi32(_mm256_max_epi8( + _mm256_loadA_si256(&in[i * 4 + 2]), + _mm256_loadA_si256(&in[i * 4 + 3])), kWeightScaleBits); + _mm256_storeA_si256(&out[i], _mm256_permutevar8x32_epi32(_mm256_max_epi8( _mm256_packs_epi16(words0, words1), kZero), kOffsets)); } constexpr IndexType kStart = kNumChunks * kSimdWidth; diff --git a/src/nnue/nnue_common.h b/src/nnue/nnue_common.h index 972ef3e5..e7ce84f7 100644 --- a/src/nnue/nnue_common.h +++ b/src/nnue/nnue_common.h @@ -37,6 +37,27 @@ #include #endif +// HACK: Use _mm256_loadu_si256() instead of _mm256_load_si256. Otherwise a binary +// compiled with older g++ crashes because the output memory is not aligned +// even though alignas is specified. +#if defined(USE_AVX2) +#if defined(__GNUC__ ) && (__GNUC__ < 9) +#define _mm256_loadA_si256 _mm256_loadu_si256 +#define _mm256_storeA_si256 _mm256_storeu_si256 +#else +#define _mm256_loadA_si256 _mm256_load_si256 +#define _mm256_storeA_si256 _mm256_store_si256 +#endif +#endif + +#if defined(USE_AVX512) +#if defined(__GNUC__ ) && (__GNUC__ < 9) +#define _mm512_loadA_si512 _mm512_loadu_si512 +#else +#define _mm512_loadA_si512 _mm512_load_si512 +#endif +#endif + namespace Eval::NNUE { // Version of the evaluation file diff --git a/src/nnue/nnue_feature_transformer.h b/src/nnue/nnue_feature_transformer.h index 1cfebbe4..cbcc26f3 100644 --- a/src/nnue/nnue_feature_transformer.h +++ b/src/nnue/nnue_feature_transformer.h @@ -109,37 +109,11 @@ namespace Eval::NNUE { #if defined(USE_AVX2) auto out = reinterpret_cast<__m256i*>(&output[offset]); for (IndexType j = 0; j < kNumChunks; ++j) { - __m256i sum0 = - - #if defined(__MINGW32__) || defined(__MINGW64__) - // HACK: Use _mm256_loadu_si256() instead of _mm256_load_si256. Because the binary - // compiled with g++ in MSYS2 crashes here because the output memory is not aligned - // even though alignas is specified. - _mm256_loadu_si256 - #else - _mm256_load_si256 - #endif - - (&reinterpret_cast( - accumulation[perspectives[p]][0])[j * 2 + 0]); - __m256i sum1 = - - #if defined(__MINGW32__) || defined(__MINGW64__) - _mm256_loadu_si256 - #else - _mm256_load_si256 - #endif - - (&reinterpret_cast( - accumulation[perspectives[p]][0])[j * 2 + 1]); - - #if defined(__MINGW32__) || defined(__MINGW64__) - _mm256_storeu_si256 - #else - _mm256_store_si256 - #endif - - (&out[j], _mm256_permute4x64_epi64(_mm256_max_epi8( + __m256i sum0 = _mm256_loadA_si256( + &reinterpret_cast(accumulation[perspectives[p]][0])[j * 2 + 0]); + __m256i sum1 = _mm256_loadA_si256( + &reinterpret_cast(accumulation[perspectives[p]][0])[j * 2 + 1]); + _mm256_storeA_si256(&out[j], _mm256_permute4x64_epi64(_mm256_max_epi8( _mm256_packs_epi16(sum0, sum1), kZero), kControl)); } @@ -202,11 +176,7 @@ namespace Eval::NNUE { auto column = reinterpret_cast(&weights_[offset]); constexpr IndexType kNumChunks = kHalfDimensions / (kSimdWidth / 2); for (IndexType j = 0; j < kNumChunks; ++j) { - #if defined(__MINGW32__) || defined(__MINGW64__) - _mm256_storeu_si256(&accumulation[j], _mm256_add_epi16(_mm256_loadu_si256(&accumulation[j]), column[j])); - #else - accumulation[j] = _mm256_add_epi16(accumulation[j], column[j]); - #endif + _mm256_storeA_si256(&accumulation[j], _mm256_add_epi16(_mm256_loadA_si256(&accumulation[j]), column[j])); } #elif defined(USE_SSE2) From ad2ad4c65706c18a5383506d361f1f23fc6a26ab Mon Sep 17 00:00:00 2001 From: SFisGOD Date: Mon, 10 Aug 2020 15:39:22 +0800 Subject: [PATCH 54/86] Modify castling extension Extend castling only if there are few friendly pieces on the castling side. Inspired by silversolver1's (Rahul Dsilva) test https://tests.stockfishchess.org/tests/view/5f0fef560640035f9d2978cf STC: LLR: 2.94 (-2.94,2.94) {-0.50,1.50} Total: 7096 W: 947 L: 818 D: 5331 Ptnml(0-2): 32, 604, 2181, 665, 66 https://tests.stockfishchess.org/tests/view/5f309f729081672066537426 LTC: LLR: 2.96 (-2.94,2.94) {0.25,1.75} Total: 4712 W: 300 L: 215 D: 4197 Ptnml(0-2): 2, 190, 1895, 259, 10 https://tests.stockfishchess.org/tests/view/5f30a2039081672066537430 closes https://github.com/official-stockfish/Stockfish/pull/2970 Bench: 4094850 --- src/search.cpp | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/search.cpp b/src/search.cpp index 0a2519b6..3d2bb422 100644 --- a/src/search.cpp +++ b/src/search.cpp @@ -1131,7 +1131,8 @@ moves_loop: // When in check, search starts from here extension = 1; // Castling extension - if (type_of(move) == CASTLING) + if ( type_of(move) == CASTLING + && popcount(pos.pieces(us) & ~pos.pieces(PAWN) & (to_sq(move) & KingSide ? KingSide : QueenSide)) <= 3) extension = 1; // Late irreversible move extension From cb0504028e8830dbc71be53cbd701d78c3d562a1 Mon Sep 17 00:00:00 2001 From: sf-x Date: Sun, 9 Aug 2020 18:01:18 +0300 Subject: [PATCH 55/86] Makefile rework/cleanup Makefile targets x86-64-sse42, x86-sse3 are removed; x86-64-sse41 is renamed to x86-64-sse41-popcnt (it did enable popcnt). Makefile variables sse3, sse42, their associated compilation flags and code in misc.cpp are removed. closes https://github.com/official-stockfish/Stockfish/pull/2922 No functional change --- src/Makefile | 58 +++------------------------------------------------- src/misc.cpp | 6 ------ 2 files changed, 3 insertions(+), 61 deletions(-) diff --git a/src/Makefile b/src/Makefile index 571172b2..a48e7dcb 100644 --- a/src/Makefile +++ b/src/Makefile @@ -68,10 +68,8 @@ endif # prefetch = yes/no --- -DUSE_PREFETCH --- Use prefetch asm-instruction # popcnt = yes/no --- -DUSE_POPCNT --- Use popcnt asm-instruction # sse = yes/no --- -msse --- Use Intel Streaming SIMD Extensions -# sse3 = yes/no --- -msse3 --- Use Intel Streaming SIMD Extensions 3 # ssse3 = yes/no --- -mssse3 --- Use Intel Supplemental Streaming SIMD Extensions 3 # sse41 = yes/no --- -msse4.1 --- Use Intel Streaming SIMD Extensions 4.1 -# sse42 = yes/no --- -msse4.2 --- Use Intel Streaming SIMD Extensions 4.2 # avx2 = yes/no --- -mavx2 --- Use Intel Advanced Vector Extensions 2 # pext = yes/no --- -DUSE_PEXT --- Use pext x86_64 asm-instruction # avx512 = yes/no --- -mavx512bw --- Use Intel Advanced Vector Extensions 512 @@ -89,10 +87,8 @@ bits = 64 prefetch = no popcnt = no sse = no -sse3 = no ssse3 = no sse41 = no -sse42 = no avx2 = no pext = no avx512 = no @@ -127,18 +123,10 @@ ifeq ($(ARCH),x86-64) sse = yes endif -ifeq ($(ARCH),x86-64-sse3) - arch = x86_64 - prefetch = yes - sse = yes - sse3 = yes -endif - ifeq ($(ARCH),x86-64-sse3-popcnt) arch = x86_64 prefetch = yes sse = yes - sse3 = yes popcnt = yes endif @@ -146,39 +134,25 @@ ifeq ($(ARCH),x86-64-ssse3) arch = x86_64 prefetch = yes sse = yes - sse3 = yes ssse3 = yes endif -ifeq ($(ARCH),x86-64-sse41) - arch = x86_64 - prefetch = yes - popcnt = yes - sse = yes - sse3 = yes - ssse3 = yes - sse41 = yes -endif - ifeq ($(ARCH),x86-64-modern) arch = x86_64 prefetch = yes popcnt = yes sse = yes - sse3 = yes ssse3 = yes sse41 = yes endif -ifeq ($(ARCH),x86-64-sse42) +ifeq ($(ARCH),x86-64-sse41-popcnt) arch = x86_64 prefetch = yes popcnt = yes sse = yes - sse3 = yes ssse3 = yes sse41 = yes - sse42 = yes endif ifeq ($(ARCH),x86-64-avx2) @@ -186,10 +160,8 @@ ifeq ($(ARCH),x86-64-avx2) prefetch = yes popcnt = yes sse = yes - sse3 = yes ssse3 = yes sse41 = yes - sse42 = yes avx2 = yes endif @@ -198,10 +170,8 @@ ifeq ($(ARCH),x86-64-bmi2) prefetch = yes popcnt = yes sse = yes - sse3 = yes ssse3 = yes sse41 = yes - sse42 = yes avx2 = yes pext = yes endif @@ -211,10 +181,8 @@ ifeq ($(ARCH),x86-64-avx512) prefetch = yes popcnt = yes sse = yes - sse3 = yes ssse3 = yes sse41 = yes - sse42 = yes avx2 = yes pext = yes avx512 = yes @@ -450,13 +418,6 @@ ifeq ($(avx512),yes) endif endif -ifeq ($(sse42),yes) - CXXFLAGS += -DUSE_SSE42 - ifeq ($(comp),$(filter $(comp),gcc clang mingw)) - CXXFLAGS += -msse4.2 - endif -endif - ifeq ($(sse41),yes) CXXFLAGS += -DUSE_SSE41 ifeq ($(comp),$(filter $(comp),gcc clang mingw)) @@ -471,13 +432,6 @@ ifeq ($(ssse3),yes) endif endif -ifeq ($(sse3),yes) - CXXFLAGS += -DUSE_SSE3 - ifeq ($(comp),$(filter $(comp),gcc clang mingw)) - CXXFLAGS += -msse3 - endif -endif - ifeq ($(neon),yes) CXXFLAGS += -DUSE_NEON endif @@ -557,12 +511,10 @@ help: @echo "x86-64-avx512 > x86 64-bit with avx512 support" @echo "x86-64-bmi2 > x86 64-bit with bmi2 support" @echo "x86-64-avx2 > x86 64-bit with avx2 support" - @echo "x86-64-sse42 > x86 64-bit with sse42 support" - @echo "x86-64-modern > x86 64-bit with sse41 support (x86-64-sse41)" - @echo "x86-64-sse41 > x86 64-bit with sse41 support" + @echo "x86-64-sse41-popcnt > x86 64-bit with sse41 and popcnt support" + @echo "x86-64-modern > the same as previous (x86-64-sse41-popcnt)" @echo "x86-64-ssse3 > x86 64-bit with ssse3 support" @echo "x86-64-sse3-popcnt > x86 64-bit with sse3 and popcnt support" - @echo "x86-64-sse3 > x86 64-bit with sse3 support" @echo "x86-64 > x86 64-bit generic" @echo "x86-32 > x86 32-bit (also enables SSE)" @echo "x86-32-old > x86 32-bit fall back for old hardware" @@ -669,10 +621,8 @@ config-sanity: @echo "prefetch: '$(prefetch)'" @echo "popcnt: '$(popcnt)'" @echo "sse: '$(sse)'" - @echo "sse3: '$(sse3)'" @echo "ssse3: '$(ssse3)'" @echo "sse41: '$(sse41)'" - @echo "sse42: '$(sse42)'" @echo "avx2: '$(avx2)'" @echo "pext: '$(pext)'" @echo "avx512: '$(avx512)'" @@ -695,10 +645,8 @@ config-sanity: @test "$(prefetch)" = "yes" || test "$(prefetch)" = "no" @test "$(popcnt)" = "yes" || test "$(popcnt)" = "no" @test "$(sse)" = "yes" || test "$(sse)" = "no" - @test "$(sse3)" = "yes" || test "$(sse3)" = "no" @test "$(ssse3)" = "yes" || test "$(ssse3)" = "no" @test "$(sse41)" = "yes" || test "$(sse41)" = "no" - @test "$(sse42)" = "yes" || test "$(sse42)" = "no" @test "$(avx2)" = "yes" || test "$(avx2)" = "no" @test "$(pext)" = "yes" || test "$(pext)" = "no" @test "$(avx512)" = "yes" || test "$(avx512)" = "no" diff --git a/src/misc.cpp b/src/misc.cpp index bdd7bccb..5061ae13 100644 --- a/src/misc.cpp +++ b/src/misc.cpp @@ -220,17 +220,11 @@ const std::string compiler_info() { #if defined(USE_AVX2) compiler += " AVX2"; #endif - #if defined(USE_SSE42) - compiler += " SSE42"; - #endif #if defined(USE_SSE41) compiler += " SSE41"; #endif #if defined(USE_SSSE3) compiler += " SSSE3"; - #endif - #if defined(USE_SSE3) - compiler += " SSE3"; #endif compiler += (HasPext ? " BMI2" : ""); compiler += (HasPopCnt ? " POPCNT" : ""); From f948cd008d3a289ebbadc463271f84888e8069ba Mon Sep 17 00:00:00 2001 From: mstembera Date: Sun, 9 Aug 2020 16:23:33 -0700 Subject: [PATCH 56/86] Cleanup and optimize SSE/AVX code AVX512 +4% faster AVX2 +1% faster SSSE3 +5% faster passed non-regression STC: STC https://tests.stockfishchess.org/tests/view/5f31249f90816720665374f6 LLR: 2.96 (-2.94,2.94) {-1.50,0.50} Total: 17576 W: 2344 L: 2245 D: 12987 Ptnml(0-2): 127, 1570, 5292, 1675, 124 closes https://github.com/official-stockfish/Stockfish/pull/2962 No functional change --- src/nnue/layers/affine_transform.h | 46 +++++++++++++++-------------- src/nnue/nnue_accumulator.h | 2 +- src/nnue/nnue_common.h | 6 ++-- src/nnue/nnue_feature_transformer.h | 21 +++++++------ 4 files changed, 41 insertions(+), 34 deletions(-) diff --git a/src/nnue/layers/affine_transform.h b/src/nnue/layers/affine_transform.h index 20ec2f12..89cfaad7 100644 --- a/src/nnue/layers/affine_transform.h +++ b/src/nnue/layers/affine_transform.h @@ -108,24 +108,19 @@ namespace Eval::NNUE::Layers { product = _mm512_madd_epi16(product, kOnes); sum = _mm512_add_epi32(sum, product); } - output[i] = _mm512_reduce_add_epi32(sum) + biases_[i]; // Note: Changing kMaxSimdWidth from 32 to 64 breaks loading existing networks. // As a result kPaddedInputDimensions may not be an even multiple of 64(512bit) // and we have to do one more 256bit chunk. if (kPaddedInputDimensions != kNumChunks * kSimdWidth * 2) { - const auto iv_256 = reinterpret_cast(input); - const auto row_256 = reinterpret_cast(&weights_[offset]); - int j = kNumChunks * 2; - __m256i sum256 = _mm256_maddubs_epi16(_mm256_loadA_si256(&iv_256[j]), _mm256_load_si256(&row_256[j])); - sum256 = _mm256_madd_epi16(sum256, _mm256_set1_epi16(1)); - sum256 = _mm256_hadd_epi32(sum256, sum256); - sum256 = _mm256_hadd_epi32(sum256, sum256); - const __m128i lo = _mm256_extracti128_si256(sum256, 0); - const __m128i hi = _mm256_extracti128_si256(sum256, 1); - output[i] += _mm_cvtsi128_si32(lo) + _mm_cvtsi128_si32(hi); + const auto iv256 = reinterpret_cast(&input_vector[kNumChunks]); + const auto row256 = reinterpret_cast(&row[kNumChunks]); + __m256i product256 = _mm256_maddubs_epi16(_mm256_loadA_si256(&iv256[0]), _mm256_load_si256(&row256[0])); + product256 = _mm256_madd_epi16(product256, _mm256_set1_epi16(1)); + sum = _mm512_add_epi32(sum, _mm512_zextsi256_si512(product256)); } + output[i] = _mm512_reduce_add_epi32(sum) + biases_[i]; #elif defined(USE_AVX2) __m256i sum = _mm256_setzero_si256(); @@ -135,23 +130,30 @@ namespace Eval::NNUE::Layers { product = _mm256_madd_epi16(product, kOnes); sum = _mm256_add_epi32(sum, product); } - sum = _mm256_hadd_epi32(sum, sum); - sum = _mm256_hadd_epi32(sum, sum); - const __m128i lo = _mm256_extracti128_si256(sum, 0); - const __m128i hi = _mm256_extracti128_si256(sum, 1); - output[i] = _mm_cvtsi128_si32(lo) + _mm_cvtsi128_si32(hi) + biases_[i]; + __m128i sum128 = _mm_add_epi32(_mm256_castsi256_si128(sum), _mm256_extracti128_si256(sum, 1)); + sum128 = _mm_add_epi32(sum128, _mm_shuffle_epi32(sum128, _MM_PERM_BADC)); + sum128 = _mm_add_epi32(sum128, _mm_shuffle_epi32(sum128, _MM_PERM_CDAB)); + output[i] = _mm_cvtsi128_si32(sum128) + biases_[i]; #elif defined(USE_SSSE3) - __m128i sum = _mm_cvtsi32_si128(biases_[i]); + __m128i sum = _mm_setzero_si128(); const auto row = reinterpret_cast(&weights_[offset]); - for (IndexType j = 0; j < kNumChunks; ++j) { - __m128i product = _mm_maddubs_epi16(_mm_load_si128(&input_vector[j]), _mm_load_si128(&row[j])); + for (int j = 0; j < (int)kNumChunks - 1; j += 2) { + __m128i product0 = _mm_maddubs_epi16(_mm_load_si128(&input_vector[j]), _mm_load_si128(&row[j])); + product0 = _mm_madd_epi16(product0, kOnes); + sum = _mm_add_epi32(sum, product0); + __m128i product1 = _mm_maddubs_epi16(_mm_load_si128(&input_vector[j+1]), _mm_load_si128(&row[j+1])); + product1 = _mm_madd_epi16(product1, kOnes); + sum = _mm_add_epi32(sum, product1); + } + if (kNumChunks & 0x1) { + __m128i product = _mm_maddubs_epi16(_mm_load_si128(&input_vector[kNumChunks-1]), _mm_load_si128(&row[kNumChunks-1])); product = _mm_madd_epi16(product, kOnes); sum = _mm_add_epi32(sum, product); } - sum = _mm_hadd_epi32(sum, sum); - sum = _mm_hadd_epi32(sum, sum); - output[i] = _mm_cvtsi128_si32(sum); + sum = _mm_add_epi32(sum, _mm_shuffle_epi32(sum, 0x4E)); //_MM_PERM_BADC + sum = _mm_add_epi32(sum, _mm_shuffle_epi32(sum, 0xB1)); //_MM_PERM_CDAB + output[i] = _mm_cvtsi128_si32(sum) + biases_[i]; #elif defined(USE_NEON) int32x4_t sum = {biases_[i]}; diff --git a/src/nnue/nnue_accumulator.h b/src/nnue/nnue_accumulator.h index 2a354a3c..69dfaad2 100644 --- a/src/nnue/nnue_accumulator.h +++ b/src/nnue/nnue_accumulator.h @@ -26,7 +26,7 @@ namespace Eval::NNUE { // Class that holds the result of affine transformation of input features - struct alignas(32) Accumulator { + struct alignas(kCacheLineSize) Accumulator { std::int16_t accumulation[2][kRefreshTriggers.size()][kTransformedFeatureDimensions]; Value score; diff --git a/src/nnue/nnue_common.h b/src/nnue/nnue_common.h index e7ce84f7..ff33cc79 100644 --- a/src/nnue/nnue_common.h +++ b/src/nnue/nnue_common.h @@ -52,9 +52,11 @@ #if defined(USE_AVX512) #if defined(__GNUC__ ) && (__GNUC__ < 9) -#define _mm512_loadA_si512 _mm512_loadu_si512 +#define _mm512_loadA_si512 _mm512_loadu_si512 +#define _mm512_storeA_si512 _mm512_storeu_si512 #else -#define _mm512_loadA_si512 _mm512_load_si512 +#define _mm512_loadA_si512 _mm512_load_si512 +#define _mm512_storeA_si512 _mm512_store_si512 #endif #endif diff --git a/src/nnue/nnue_feature_transformer.h b/src/nnue/nnue_feature_transformer.h index cbcc26f3..3818e444 100644 --- a/src/nnue/nnue_feature_transformer.h +++ b/src/nnue/nnue_feature_transformer.h @@ -169,38 +169,41 @@ namespace Eval::NNUE { kHalfDimensions * sizeof(BiasType)); for (const auto index : active_indices[perspective]) { const IndexType offset = kHalfDimensions * index; + #if defined(USE_AVX512) + auto accumulation = reinterpret_cast<__m512i*>( + &accumulator.accumulation[perspective][i][0]); + auto column = reinterpret_cast(&weights_[offset]); + constexpr IndexType kNumChunks = kHalfDimensions / kSimdWidth; + for (IndexType j = 0; j < kNumChunks; ++j) + _mm512_storeA_si512(&accumulation[j], _mm512_add_epi16(_mm512_loadA_si512(&accumulation[j]), column[j])); - #if defined(USE_AVX2) + #elif defined(USE_AVX2) auto accumulation = reinterpret_cast<__m256i*>( &accumulator.accumulation[perspective][i][0]); auto column = reinterpret_cast(&weights_[offset]); constexpr IndexType kNumChunks = kHalfDimensions / (kSimdWidth / 2); - for (IndexType j = 0; j < kNumChunks; ++j) { + for (IndexType j = 0; j < kNumChunks; ++j) _mm256_storeA_si256(&accumulation[j], _mm256_add_epi16(_mm256_loadA_si256(&accumulation[j]), column[j])); - } #elif defined(USE_SSE2) auto accumulation = reinterpret_cast<__m128i*>( &accumulator.accumulation[perspective][i][0]); auto column = reinterpret_cast(&weights_[offset]); constexpr IndexType kNumChunks = kHalfDimensions / (kSimdWidth / 2); - for (IndexType j = 0; j < kNumChunks; ++j) { + for (IndexType j = 0; j < kNumChunks; ++j) accumulation[j] = _mm_add_epi16(accumulation[j], column[j]); - } #elif defined(USE_NEON) auto accumulation = reinterpret_cast( &accumulator.accumulation[perspective][i][0]); auto column = reinterpret_cast(&weights_[offset]); constexpr IndexType kNumChunks = kHalfDimensions / (kSimdWidth / 2); - for (IndexType j = 0; j < kNumChunks; ++j) { + for (IndexType j = 0; j < kNumChunks; ++j) accumulation[j] = vaddq_s16(accumulation[j], column[j]); - } #else - for (IndexType j = 0; j < kHalfDimensions; ++j) { + for (IndexType j = 0; j < kHalfDimensions; ++j) accumulator.accumulation[perspective][i][j] += weights_[offset + j]; - } #endif } From 21df37d7fd4dcc9b4a9c319382cc43685c0259c8 Mon Sep 17 00:00:00 2001 From: Fanael Linithien Date: Sun, 9 Aug 2020 16:20:45 +0200 Subject: [PATCH 57/86] Provide vectorized NNUE code for SSE2 and MMX targets This patch allows old x86 CPUs, from AMD K8 (which the x86-64 baseline targets) all the way down to the Pentium MMX, to benefit from NNUE with comparable performance hit versus hand-written eval as on more modern processors. NPS of the bench with NNUE enabled on a Pentium III 1.13 GHz (using the MMX code): master: 38951 this patch: 80586 NPS of the bench with NNUE enabled using baseline x86-64 arch, which is how linux distros are likely to package stockfish, on a modern CPU (using the SSE2 code): master: 882584 this patch: 1203945 closes https://github.com/official-stockfish/Stockfish/pull/2956 No functional change. --- AUTHORS | 1 + src/Makefile | 13 ++++++- src/misc.cpp | 3 ++ src/nnue/layers/affine_transform.h | 59 ++++++++++++++++++++++++++++- src/nnue/layers/clipped_relu.h | 20 +++++++++- src/nnue/nnue_common.h | 6 +++ src/nnue/nnue_feature_transformer.h | 54 +++++++++++++++++++++++++- 7 files changed, 150 insertions(+), 6 deletions(-) diff --git a/AUTHORS b/AUTHORS index 21ef3e50..41b89705 100644 --- a/AUTHORS +++ b/AUTHORS @@ -53,6 +53,7 @@ Ernesto Gatti Linmiao Xu (linrock) Fabian Beuke (madnight) Fabian Fichter (ianfab) +Fanael Linithien (Fanael) fanon Fauzi Akram Dabat (FauziAkram) Felix Wittmann diff --git a/src/Makefile b/src/Makefile index a48e7dcb..3d84f482 100644 --- a/src/Makefile +++ b/src/Makefile @@ -86,6 +86,7 @@ sanitize = no bits = 64 prefetch = no popcnt = no +mmx = no sse = no ssse3 = no sse41 = no @@ -110,6 +111,7 @@ ifeq ($(ARCH),x86-32) arch = i386 bits = 32 prefetch = yes + mmx = yes sse = yes endif @@ -250,7 +252,7 @@ ifeq ($(COMP),gcc) ifneq ($(KERNEL),Darwin) LDFLAGS += -Wl,--no-as-needed endif - + gccversion = $(shell $(CXX) --version) gccisclang = $(findstring clang,$(gccversion)) endif @@ -432,6 +434,13 @@ ifeq ($(ssse3),yes) endif endif +ifeq ($(mmx),yes) + CXXFLAGS += -DUSE_MMX + ifeq ($(comp),$(filter $(comp),gcc clang mingw)) + CXXFLAGS += -mmmx + endif +endif + ifeq ($(neon),yes) CXXFLAGS += -DUSE_NEON endif @@ -516,7 +525,7 @@ help: @echo "x86-64-ssse3 > x86 64-bit with ssse3 support" @echo "x86-64-sse3-popcnt > x86 64-bit with sse3 and popcnt support" @echo "x86-64 > x86 64-bit generic" - @echo "x86-32 > x86 32-bit (also enables SSE)" + @echo "x86-32 > x86 32-bit (also enables MMX and SSE)" @echo "x86-32-old > x86 32-bit fall back for old hardware" @echo "ppc-64 > PPC 64-bit" @echo "ppc-32 > PPC 32-bit" diff --git a/src/misc.cpp b/src/misc.cpp index 5061ae13..401a6505 100644 --- a/src/misc.cpp +++ b/src/misc.cpp @@ -228,6 +228,9 @@ const std::string compiler_info() { #endif compiler += (HasPext ? " BMI2" : ""); compiler += (HasPopCnt ? " POPCNT" : ""); + #if defined(USE_MMX) + compiler += " MMX"; + #endif #if !defined(NDEBUG) compiler += " DEBUG"; #endif diff --git a/src/nnue/layers/affine_transform.h b/src/nnue/layers/affine_transform.h index 89cfaad7..985ee71a 100644 --- a/src/nnue/layers/affine_transform.h +++ b/src/nnue/layers/affine_transform.h @@ -87,11 +87,20 @@ namespace Eval::NNUE::Layers { const __m256i kOnes = _mm256_set1_epi16(1); const auto input_vector = reinterpret_cast(input); - #elif defined(USE_SSSE3) + #elif defined(USE_SSE2) constexpr IndexType kNumChunks = kPaddedInputDimensions / kSimdWidth; + #ifndef USE_SSSE3 + const __m128i kZeros = _mm_setzero_si128(); + #else const __m128i kOnes = _mm_set1_epi16(1); + #endif const auto input_vector = reinterpret_cast(input); + #elif defined(USE_MMX) + constexpr IndexType kNumChunks = kPaddedInputDimensions / kSimdWidth; + const __m64 kZeros = _mm_setzero_si64(); + const auto input_vector = reinterpret_cast(input); + #elif defined(USE_NEON) constexpr IndexType kNumChunks = kPaddedInputDimensions / kSimdWidth; const auto input_vector = reinterpret_cast(input); @@ -155,6 +164,51 @@ namespace Eval::NNUE::Layers { sum = _mm_add_epi32(sum, _mm_shuffle_epi32(sum, 0xB1)); //_MM_PERM_CDAB output[i] = _mm_cvtsi128_si32(sum) + biases_[i]; + #elif defined(USE_SSE2) + __m128i sum_lo = _mm_cvtsi32_si128(biases_[i]); + __m128i sum_hi = kZeros; + const auto row = reinterpret_cast(&weights_[offset]); + for (IndexType j = 0; j < kNumChunks; ++j) { + __m128i row_j = _mm_load_si128(&row[j]); + __m128i input_j = _mm_load_si128(&input_vector[j]); + __m128i row_signs = _mm_cmpgt_epi8(kZeros, row_j); + __m128i extended_row_lo = _mm_unpacklo_epi8(row_j, row_signs); + __m128i extended_row_hi = _mm_unpackhi_epi8(row_j, row_signs); + __m128i extended_input_lo = _mm_unpacklo_epi8(input_j, kZeros); + __m128i extended_input_hi = _mm_unpackhi_epi8(input_j, kZeros); + __m128i product_lo = _mm_madd_epi16(extended_row_lo, extended_input_lo); + __m128i product_hi = _mm_madd_epi16(extended_row_hi, extended_input_hi); + sum_lo = _mm_add_epi32(sum_lo, product_lo); + sum_hi = _mm_add_epi32(sum_hi, product_hi); + } + __m128i sum = _mm_add_epi32(sum_lo, sum_hi); + __m128i sum_high_64 = _mm_shuffle_epi32(sum, _MM_SHUFFLE(1, 0, 3, 2)); + sum = _mm_add_epi32(sum, sum_high_64); + __m128i sum_second_32 = _mm_shufflelo_epi16(sum, _MM_SHUFFLE(1, 0, 3, 2)); + sum = _mm_add_epi32(sum, sum_second_32); + output[i] = _mm_cvtsi128_si32(sum); + + #elif defined(USE_MMX) + __m64 sum_lo = _mm_cvtsi32_si64(biases_[i]); + __m64 sum_hi = kZeros; + const auto row = reinterpret_cast(&weights_[offset]); + for (IndexType j = 0; j < kNumChunks; ++j) { + __m64 row_j = row[j]; + __m64 input_j = input_vector[j]; + __m64 row_signs = _mm_cmpgt_pi8(kZeros, row_j); + __m64 extended_row_lo = _mm_unpacklo_pi8(row_j, row_signs); + __m64 extended_row_hi = _mm_unpackhi_pi8(row_j, row_signs); + __m64 extended_input_lo = _mm_unpacklo_pi8(input_j, kZeros); + __m64 extended_input_hi = _mm_unpackhi_pi8(input_j, kZeros); + __m64 product_lo = _mm_madd_pi16(extended_row_lo, extended_input_lo); + __m64 product_hi = _mm_madd_pi16(extended_row_hi, extended_input_hi); + sum_lo = _mm_add_pi32(sum_lo, product_lo); + sum_hi = _mm_add_pi32(sum_hi, product_hi); + } + __m64 sum = _mm_add_pi32(sum_lo, sum_hi); + sum = _mm_add_pi32(sum, _mm_unpackhi_pi32(sum, sum)); + output[i] = _mm_cvtsi64_si32(sum); + #elif defined(USE_NEON) int32x4_t sum = {biases_[i]}; const auto row = reinterpret_cast(&weights_[offset]); @@ -174,6 +228,9 @@ namespace Eval::NNUE::Layers { #endif } + #if defined(USE_MMX) + _mm_empty(); + #endif return output; } diff --git a/src/nnue/layers/clipped_relu.h b/src/nnue/layers/clipped_relu.h index 13196ec2..44d8a7de 100644 --- a/src/nnue/layers/clipped_relu.h +++ b/src/nnue/layers/clipped_relu.h @@ -84,7 +84,7 @@ namespace Eval::NNUE::Layers { } constexpr IndexType kStart = kNumChunks * kSimdWidth; - #elif defined(USE_SSSE3) + #elif defined(USE_SSE2) constexpr IndexType kNumChunks = kInputDimensions / kSimdWidth; #ifdef USE_SSE41 @@ -115,6 +115,24 @@ namespace Eval::NNUE::Layers { } constexpr IndexType kStart = kNumChunks * kSimdWidth; + #elif defined(USE_MMX) + constexpr IndexType kNumChunks = kInputDimensions / kSimdWidth; + const __m64 k0x80s = _mm_set1_pi8(-128); + const auto in = reinterpret_cast(input); + const auto out = reinterpret_cast<__m64*>(output); + for (IndexType i = 0; i < kNumChunks; ++i) { + const __m64 words0 = _mm_srai_pi16( + _mm_packs_pi32(in[i * 4 + 0], in[i * 4 + 1]), + kWeightScaleBits); + const __m64 words1 = _mm_srai_pi16( + _mm_packs_pi32(in[i * 4 + 2], in[i * 4 + 3]), + kWeightScaleBits); + const __m64 packedbytes = _mm_packs_pi16(words0, words1); + out[i] = _mm_subs_pi8(_mm_adds_pi8(packedbytes, k0x80s), k0x80s); + } + _mm_empty(); + constexpr IndexType kStart = kNumChunks * kSimdWidth; + #elif defined(USE_NEON) constexpr IndexType kNumChunks = kInputDimensions / (kSimdWidth / 2); const int8x8_t kZero = {0}; diff --git a/src/nnue/nnue_common.h b/src/nnue/nnue_common.h index ff33cc79..cb1251c5 100644 --- a/src/nnue/nnue_common.h +++ b/src/nnue/nnue_common.h @@ -33,6 +33,9 @@ #elif defined(USE_SSE2) #include +#elif defined(USE_MMX) +#include + #elif defined(USE_NEON) #include #endif @@ -79,6 +82,9 @@ namespace Eval::NNUE { #elif defined(USE_SSE2) constexpr std::size_t kSimdWidth = 16; + #elif defined(USE_MMX) + constexpr std::size_t kSimdWidth = 8; + #elif defined(USE_NEON) constexpr std::size_t kSimdWidth = 16; #endif diff --git a/src/nnue/nnue_feature_transformer.h b/src/nnue/nnue_feature_transformer.h index 3818e444..40f2603d 100644 --- a/src/nnue/nnue_feature_transformer.h +++ b/src/nnue/nnue_feature_transformer.h @@ -88,7 +88,7 @@ namespace Eval::NNUE { constexpr int kControl = 0b11011000; const __m256i kZero = _mm256_setzero_si256(); - #elif defined(USE_SSSE3) + #elif defined(USE_SSE2) constexpr IndexType kNumChunks = kHalfDimensions / kSimdWidth; #ifdef USE_SSE41 @@ -97,6 +97,10 @@ namespace Eval::NNUE { const __m128i k0x80s = _mm_set1_epi8(-128); #endif + #elif defined(USE_MMX) + constexpr IndexType kNumChunks = kHalfDimensions / kSimdWidth; + const __m64 k0x80s = _mm_set1_pi8(-128); + #elif defined(USE_NEON) constexpr IndexType kNumChunks = kHalfDimensions / (kSimdWidth / 2); const int8x8_t kZero = {0}; @@ -117,7 +121,7 @@ namespace Eval::NNUE { _mm256_packs_epi16(sum0, sum1), kZero), kControl)); } - #elif defined(USE_SSSE3) + #elif defined(USE_SSE2) auto out = reinterpret_cast<__m128i*>(&output[offset]); for (IndexType j = 0; j < kNumChunks; ++j) { __m128i sum0 = _mm_load_si128(&reinterpret_cast( @@ -137,6 +141,17 @@ namespace Eval::NNUE { ); } + #elif defined(USE_MMX) + auto out = reinterpret_cast<__m64*>(&output[offset]); + for (IndexType j = 0; j < kNumChunks; ++j) { + __m64 sum0 = *(&reinterpret_cast( + accumulation[perspectives[p]][0])[j * 2 + 0]); + __m64 sum1 = *(&reinterpret_cast( + accumulation[perspectives[p]][0])[j * 2 + 1]); + const __m64 packedbytes = _mm_packs_pi16(sum0, sum1); + out[j] = _mm_subs_pi8(_mm_adds_pi8(packedbytes, k0x80s), k0x80s); + } + #elif defined(USE_NEON) const auto out = reinterpret_cast(&output[offset]); for (IndexType j = 0; j < kNumChunks; ++j) { @@ -154,6 +169,9 @@ namespace Eval::NNUE { #endif } + #if defined(USE_MMX) + _mm_empty(); + #endif } private: @@ -193,6 +211,15 @@ namespace Eval::NNUE { for (IndexType j = 0; j < kNumChunks; ++j) accumulation[j] = _mm_add_epi16(accumulation[j], column[j]); + #elif defined(USE_MMX) + auto accumulation = reinterpret_cast<__m64*>( + &accumulator.accumulation[perspective][i][0]); + auto column = reinterpret_cast(&weights_[offset]); + constexpr IndexType kNumChunks = kHalfDimensions / (kSimdWidth / 2); + for (IndexType j = 0; j < kNumChunks; ++j) { + accumulation[j] = _mm_add_pi16(accumulation[j], column[j]); + } + #elif defined(USE_NEON) auto accumulation = reinterpret_cast( &accumulator.accumulation[perspective][i][0]); @@ -208,6 +235,9 @@ namespace Eval::NNUE { } } + #if defined(USE_MMX) + _mm_empty(); + #endif accumulator.computed_accumulation = true; accumulator.computed_score = false; @@ -234,6 +264,11 @@ namespace Eval::NNUE { auto accumulation = reinterpret_cast<__m128i*>( &accumulator.accumulation[perspective][i][0]); + #elif defined(USE_MMX) + constexpr IndexType kNumChunks = kHalfDimensions / (kSimdWidth / 2); + auto accumulation = reinterpret_cast<__m64*>( + &accumulator.accumulation[perspective][i][0]); + #elif defined(USE_NEON) constexpr IndexType kNumChunks = kHalfDimensions / (kSimdWidth / 2); auto accumulation = reinterpret_cast( @@ -263,6 +298,12 @@ namespace Eval::NNUE { accumulation[j] = _mm_sub_epi16(accumulation[j], column[j]); } + #elif defined(USE_MMX) + auto column = reinterpret_cast(&weights_[offset]); + for (IndexType j = 0; j < kNumChunks; ++j) { + accumulation[j] = _mm_sub_pi16(accumulation[j], column[j]); + } + #elif defined(USE_NEON) auto column = reinterpret_cast(&weights_[offset]); for (IndexType j = 0; j < kNumChunks; ++j) { @@ -294,6 +335,12 @@ namespace Eval::NNUE { accumulation[j] = _mm_add_epi16(accumulation[j], column[j]); } + #elif defined(USE_MMX) + auto column = reinterpret_cast(&weights_[offset]); + for (IndexType j = 0; j < kNumChunks; ++j) { + accumulation[j] = _mm_add_pi16(accumulation[j], column[j]); + } + #elif defined(USE_NEON) auto column = reinterpret_cast(&weights_[offset]); for (IndexType j = 0; j < kNumChunks; ++j) { @@ -310,6 +357,9 @@ namespace Eval::NNUE { } } } + #if defined(USE_MMX) + _mm_empty(); + #endif accumulator.computed_accumulation = true; accumulator.computed_score = false; From 220ef1d27d9cd006a30b07ab726999c8181d10f0 Mon Sep 17 00:00:00 2001 From: Unai Corzo Date: Mon, 10 Aug 2020 15:38:44 +0200 Subject: [PATCH 58/86] Assorted search parameter tune STC https://tests.stockfishchess.org/tests/view/5f31219090816720665374ec LLR: 2.96 (-2.94,2.94) {-0.50,1.50} Total: 3376 W: 487 L: 359 D: 2530 Ptnml(0-2): 17, 253, 1042, 337, 39 LTC https://tests.stockfishchess.org/tests/view/5f3127f79081672066537502 LLR: 2.93 (-2.94,2.94) {0.25,1.75} Total: 8360 W: 581 L: 475 D: 7304 Ptnml(0-2): 11, 407, 3238, 513, 11 closes https://github.com/official-stockfish/Stockfish/pull/2971 bench: 4733874 --- src/search.cpp | 60 +++++++++++++++++++++++++------------------------- 1 file changed, 30 insertions(+), 30 deletions(-) diff --git a/src/search.cpp b/src/search.cpp index 3d2bb422..676427f7 100644 --- a/src/search.cpp +++ b/src/search.cpp @@ -63,9 +63,9 @@ namespace { constexpr uint64_t TtHitAverageResolution = 1024; // Razor and futility margins - constexpr int RazorMargin = 527; + constexpr int RazorMargin = 510; Value futility_margin(Depth d, bool improving) { - return Value(227 * (d - improving)); + return Value(223 * (d - improving)); } // Reductions lookup table, initialized at startup @@ -73,7 +73,7 @@ namespace { Depth reduction(bool i, Depth d, int mn) { int r = Reductions[d] * Reductions[mn]; - return (r + 570) / 1024 + (!i && r > 1018); + return (r + 509) / 1024 + (!i && r > 894); } constexpr int futility_move_count(bool improving, Depth depth) { @@ -82,7 +82,7 @@ namespace { // History and stats update bonus, based on depth int stat_bonus(Depth d) { - return d > 15 ? 27 : 17 * d * d + 133 * d - 134; + return d > 13 ? 29 : 17 * d * d + 134 * d - 134; } // Add a small random component to draw evaluations to avoid 3fold-blindness @@ -192,7 +192,7 @@ namespace { void Search::init() { for (int i = 1; i < MAX_MOVES; ++i) - Reductions[i] = int((24.8 + std::log(Threads.size())) * std::log(i)); + Reductions[i] = int((22.0 + std::log(Threads.size())) * std::log(i)); } @@ -403,12 +403,12 @@ void Thread::search() { if (rootDepth >= 4) { Value prev = rootMoves[pvIdx].previousScore; - delta = Value(19); + delta = Value(17); alpha = std::max(prev - delta,-VALUE_INFINITE); beta = std::min(prev + delta, VALUE_INFINITE); // Adjust contempt based on root move's previousScore (dynamic contempt) - int dct = ct + (110 - ct / 2) * prev / (abs(prev) + 140); + int dct = ct + (105 - ct / 2) * prev / (abs(prev) + 149); contempt = (us == WHITE ? make_score(dct, dct / 2) : -make_score(dct, dct / 2)); @@ -506,13 +506,13 @@ void Thread::search() { && !Threads.stop && !mainThread->stopOnPonderhit) { - double fallingEval = (296 + 6 * (mainThread->bestPreviousScore - bestValue) - + 6 * (mainThread->iterValue[iterIdx] - bestValue)) / 725.0; + double fallingEval = (318 + 6 * (mainThread->bestPreviousScore - bestValue) + + 6 * (mainThread->iterValue[iterIdx] - bestValue)) / 825.0; fallingEval = Utility::clamp(fallingEval, 0.5, 1.5); // If the bestMove is stable over several iterations, reduce time accordingly - timeReduction = lastBestMoveDepth + 10 < completedDepth ? 1.92 : 0.95; - double reduction = (1.47 + mainThread->previousTimeReduction) / (2.22 * timeReduction); + timeReduction = lastBestMoveDepth + 9 < completedDepth ? 1.92 : 0.95; + double reduction = (1.47 + mainThread->previousTimeReduction) / (2.32 * timeReduction); // Use part of the gained time from a previous stable move for the current move for (Thread* th : Threads) @@ -537,7 +537,7 @@ void Thread::search() { } else if ( Threads.increaseDepth && !mainThread->ponder - && Time.elapsed() > totalTime * 0.56) + && Time.elapsed() > totalTime * 0.58) Threads.increaseDepth = false; else Threads.increaseDepth = true; @@ -824,10 +824,10 @@ namespace { // Step 9. Null move search with verification search (~40 Elo) if ( !PvNode && (ss-1)->currentMove != MOVE_NULL - && (ss-1)->statScore < 23824 + && (ss-1)->statScore < 22977 && eval >= beta && eval >= ss->staticEval - && ss->staticEval >= beta - 28 * depth - 28 * improving + 94 * ttPv + 200 + && ss->staticEval >= beta - 30 * depth - 28 * improving + 84 * ttPv + 182 && !excludedMove && pos.non_pawn_material(us) && (ss->ply >= thisThread->nmpMinPly || us != thisThread->nmpColor)) @@ -835,7 +835,7 @@ namespace { assert(eval - beta >= 0); // Null move dynamic reduction based on depth and value - Depth R = (737 + 77 * depth) / 246 + std::min(int(eval - beta) / 192, 3); + Depth R = (817 + 71 * depth) / 213 + std::min(int(eval - beta) / 192, 3); ss->currentMove = MOVE_NULL; ss->continuationHistory = &thisThread->continuationHistory[0][0][NO_PIECE][0]; @@ -1028,17 +1028,17 @@ moves_loop: // When in check, search starts from here continue; // Futility pruning: parent node (~5 Elo) - if ( lmrDepth < 8 + if ( lmrDepth < 7 && !ss->inCheck - && ss->staticEval + 284 + 188 * lmrDepth <= alpha + && ss->staticEval + 283 + 170 * lmrDepth <= alpha && (*contHist[0])[movedPiece][to_sq(move)] + (*contHist[1])[movedPiece][to_sq(move)] + (*contHist[3])[movedPiece][to_sq(move)] - + (*contHist[5])[movedPiece][to_sq(move)] / 2 < 28388) + + (*contHist[5])[movedPiece][to_sq(move)] / 2 < 27376) continue; // Prune moves with negative SEE (~20 Elo) - if (!pos.see_ge(move, Value(-(29 - std::min(lmrDepth, 17)) * lmrDepth * lmrDepth))) + if (!pos.see_ge(move, Value(-(29 - std::min(lmrDepth, 18)) * lmrDepth * lmrDepth))) continue; } else @@ -1055,12 +1055,12 @@ moves_loop: // When in check, search starts from here && !(PvNode && abs(bestValue) < 2) && PieceValue[MG][type_of(movedPiece)] >= PieceValue[MG][type_of(pos.piece_on(to_sq(move)))] && !ss->inCheck - && ss->staticEval + 178 + 261 * lmrDepth + && ss->staticEval + 169 + 244 * lmrDepth + PieceValue[MG][type_of(pos.piece_on(to_sq(move)))] <= alpha) continue; // See based pruning - if (!pos.see_ge(move, Value(-202) * depth)) // (~25 Elo) + if (!pos.see_ge(move, Value(-221) * depth)) // (~25 Elo) continue; } } @@ -1166,7 +1166,7 @@ moves_loop: // When in check, search starts from here || moveCountPruning || ss->staticEval + PieceValue[EG][pos.captured_piece()] <= alpha || cutNode - || thisThread->ttHitAverage < 415 * TtHitAverageResolution * TtHitAverageWindow / 1024)) + || thisThread->ttHitAverage < 427 * TtHitAverageResolution * TtHitAverageWindow / 1024)) { Depth r = reduction(improving, depth, moveCount); @@ -1178,7 +1178,7 @@ moves_loop: // When in check, search starts from here r--; // Decrease reduction if the ttHit running average is large - if (thisThread->ttHitAverage > 473 * TtHitAverageResolution * TtHitAverageWindow / 1024) + if (thisThread->ttHitAverage > 509 * TtHitAverageResolution * TtHitAverageWindow / 1024) r--; // Reduction if other threads are searching this position @@ -1221,17 +1221,17 @@ moves_loop: // When in check, search starts from here + (*contHist[0])[movedPiece][to_sq(move)] + (*contHist[1])[movedPiece][to_sq(move)] + (*contHist[3])[movedPiece][to_sq(move)] - - 4826; + - 5287; // Decrease/increase reduction by comparing opponent's stat score (~10 Elo) - if (ss->statScore >= -100 && (ss-1)->statScore < -112) + if (ss->statScore >= -106 && (ss-1)->statScore < -104) r--; - else if ((ss-1)->statScore >= -125 && ss->statScore < -138) + else if ((ss-1)->statScore >= -119 && ss->statScore < -140) r++; // Decrease/increase reduction for moves with a good/bad history (~30 Elo) - r -= ss->statScore / 14615; + r -= ss->statScore / 14884; } else { @@ -1241,7 +1241,7 @@ moves_loop: // When in check, search starts from here // Unless giving check, this capture is likely bad if ( !givesCheck - && ss->staticEval + PieceValue[EG][pos.captured_piece()] + 211 * depth <= alpha) + && ss->staticEval + PieceValue[EG][pos.captured_piece()] + 213 * depth <= alpha) r++; } @@ -1503,7 +1503,7 @@ moves_loop: // When in check, search starts from here if (PvNode && bestValue > alpha) alpha = bestValue; - futilityBase = bestValue + 141; + futilityBase = bestValue + 145; } const PieceToHistory* contHist[] = { (ss-1)->continuationHistory, (ss-2)->continuationHistory, @@ -1754,7 +1754,7 @@ moves_loop: // When in check, search starts from here } if (depth > 11 && ss->ply < MAX_LPH) - thisThread->lowPlyHistory[ss->ply][from_to(move)] << stat_bonus(depth - 6); + thisThread->lowPlyHistory[ss->ply][from_to(move)] << stat_bonus(depth - 7); } // When playing with strength handicap, choose best move among a set of RootMoves From a72cec1ff854a77a92452c2afe2001e05f06e6d4 Mon Sep 17 00:00:00 2001 From: Vizvezdenec Date: Sat, 18 Jul 2020 16:30:00 +0300 Subject: [PATCH 59/86] Add comments to probCut code and rename a variable closes https://github.com/official-stockfish/Stockfish/pull/2819 No functional change --- src/search.cpp | 33 ++++++++++++++++++++------------- 1 file changed, 20 insertions(+), 13 deletions(-) diff --git a/src/search.cpp b/src/search.cpp index 676427f7..ef47fd22 100644 --- a/src/search.cpp +++ b/src/search.cpp @@ -596,7 +596,7 @@ namespace { Key posKey; Move ttMove, move, excludedMove, bestMove; Depth extension, newDepth; - Value bestValue, value, ttValue, eval, maxValue, probcutBeta; + Value bestValue, value, ttValue, eval, maxValue, probCutBeta; bool ttHit, ttPv, formerPv, givesCheck, improving, didLMR, priorCapture; bool captureOrPromotion, doFullDepthSearch, moveCountPruning, ttCapture, singularQuietLMR; @@ -871,7 +871,7 @@ namespace { } } - probcutBeta = beta + 176 - 49 * improving; + probCutBeta = beta + 176 - 49 * improving; // Step 10. ProbCut (~10 Elo) // If we have a good enough capture and a reduced search returns a value @@ -879,21 +879,27 @@ namespace { if ( !PvNode && depth > 4 && abs(beta) < VALUE_TB_WIN_IN_MAX_PLY - && !( ttHit - && tte->depth() >= depth - 3 + // if value from transposition table is lower than probCutBeta, don't attempt probCut + // there and in further interactions with transposition table cutoff depth is set to depth - 3 + // because probCut search has depth set to depth - 4 but we also do a move before it + // so effective depth is equal to depth - 3 + && !( ttHit + && tte->depth() >= depth - 3 && ttValue != VALUE_NONE - && ttValue < probcutBeta)) + && ttValue < probCutBeta)) { + // if ttMove is a capture and value from transposition table is good enough produce probCut + // cutoff without digging into actual probCut search if ( ttHit && tte->depth() >= depth - 3 && ttValue != VALUE_NONE - && ttValue >= probcutBeta + && ttValue >= probCutBeta && ttMove && pos.capture_or_promotion(ttMove)) - return probcutBeta; + return probCutBeta; - assert(probcutBeta < VALUE_INFINITE); - MovePicker mp(pos, ttMove, probcutBeta - ss->staticEval, &captureHistory); + assert(probCutBeta < VALUE_INFINITE); + MovePicker mp(pos, ttMove, probCutBeta - ss->staticEval, &captureHistory); int probCutCount = 0; while ( (move = mp.next_move()) != MOVE_NONE @@ -915,16 +921,17 @@ namespace { pos.do_move(move, st); // Perform a preliminary qsearch to verify that the move holds - value = -qsearch(pos, ss+1, -probcutBeta, -probcutBeta+1); + value = -qsearch(pos, ss+1, -probCutBeta, -probCutBeta+1); // If the qsearch held, perform the regular search - if (value >= probcutBeta) - value = -search(pos, ss+1, -probcutBeta, -probcutBeta+1, depth - 4, !cutNode); + if (value >= probCutBeta) + value = -search(pos, ss+1, -probCutBeta, -probCutBeta+1, depth - 4, !cutNode); pos.undo_move(move); - if (value >= probcutBeta) + if (value >= probCutBeta) { + // if transposition table doesn't have equal or more deep info write probCut data into it if ( !(ttHit && tte->depth() >= depth - 3 && ttValue != VALUE_NONE)) From 4ab8b0b738fe4ae58588efb421fd7b1643b2ef66 Mon Sep 17 00:00:00 2001 From: Guy Vreuls Date: Tue, 11 Aug 2020 04:38:38 +0200 Subject: [PATCH 60/86] Fix parallel LTO issues on Windows This adds -save-temps to the linker flags when parallel LTO is used on MinGW/MSYS. fixes #2977 closes https://github.com/official-stockfish/Stockfish/pull/2978 No functional change. --- src/Makefile | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/src/Makefile b/src/Makefile index 3d84f482..fd2618a4 100644 --- a/src/Makefile +++ b/src/Makefile @@ -472,6 +472,11 @@ ifeq ($(debug), no) ifeq ($(gccisclang),) CXXFLAGS += -flto LDFLAGS += $(CXXFLAGS) -flto=jobserver + ifneq ($(findstring MINGW,$(KERNEL)),) + LDFLAGS += -save-temps + else ifneq ($(findstring MSYS,$(KERNEL)),) + LDFLAGS += -save-temps + endif else CXXFLAGS += -flto=thin LDFLAGS += $(CXXFLAGS) @@ -605,7 +610,7 @@ objclean: # clean auxiliary profiling files profileclean: @rm -rf profdir - @rm -f bench.txt *.gcda *.gcno ./syzygy/*.gcda ./nnue/*.gcda ./nnue/features/*.gcda + @rm -f bench.txt *.gcda *.gcno ./syzygy/*.gcda ./nnue/*.gcda ./nnue/features/*.gcda *.s @rm -f stockfish.profdata *.profraw default: From 399cddf444666cf1671c5281f7a8e78887b4f400 Mon Sep 17 00:00:00 2001 From: Joost VandeVondele Date: Mon, 10 Aug 2020 16:14:17 +0200 Subject: [PATCH 61/86] More aligned_alloc changes to support Android Move to posix_memalign for those platforms, in particular android, that do not fully support c++17 std::aligned_alloc() (and are not windows) see https://github.com/official-stockfish/Stockfish/issues/2860 closes https://github.com/official-stockfish/Stockfish/pull/2973 No functional change --- src/misc.cpp | 14 +++++++++++--- 1 file changed, 11 insertions(+), 3 deletions(-) diff --git a/src/misc.cpp b/src/misc.cpp index 401a6505..fc3746cf 100644 --- a/src/misc.cpp +++ b/src/misc.cpp @@ -51,6 +51,11 @@ typedef bool(*fun3_t)(HANDLE, CONST GROUP_AFFINITY*, PGROUP_AFFINITY); #include #endif +#if (defined(__APPLE__) && defined(_LIBCPP_HAS_C11_FEATURES)) || defined(__ANDROID__) || defined(__OpenBSD__) || (defined(__GLIBCXX__) && !defined(_GLIBCXX_HAVE_ALIGNED_ALLOC) && !defined(_WIN32)) +#define POSIXALIGNEDALLOC +#include +#endif + #include "misc.h" #include "thread.h" @@ -318,8 +323,11 @@ void prefetch(void* addr) { /// void* std_aligned_alloc(size_t alignment, size_t size) { -#if (defined(__APPLE__) && defined(_LIBCPP_HAS_C11_FEATURES)) || defined(__ANDROID__) || defined(__OpenBSD__) || (defined(__GLIBCXX__) && !defined(_GLIBCXX_HAVE_ALIGNED_ALLOC) && !defined(_WIN32)) - return aligned_alloc(alignment, size); +#if defined(POSIXALIGNEDALLOC) + void *pointer; + if(posix_memalign(&pointer, alignment, size) == 0) + return pointer; + return nullptr; #elif (defined(_WIN32) || (defined(__APPLE__) && !defined(_LIBCPP_HAS_C11_FEATURES))) return _mm_malloc(size, alignment); #else @@ -328,7 +336,7 @@ void* std_aligned_alloc(size_t alignment, size_t size) { } void std_aligned_free(void* ptr) { -#if (defined(__APPLE__) && defined(_LIBCPP_HAS_C11_FEATURES)) || defined(__ANDROID__) || defined(__OpenBSD__) || (defined(__GLIBCXX__) && !defined(_GLIBCXX_HAVE_ALIGNED_ALLOC) && !defined(_WIN32)) +#if defined(POSIXALIGNEDALLOC) free(ptr); #elif (defined(_WIN32) || (defined(__APPLE__) && !defined(_LIBCPP_HAS_C11_FEATURES))) _mm_free(ptr); From f46c73040c16a078b884825c203feee6b0a8850b Mon Sep 17 00:00:00 2001 From: mstembera Date: Mon, 10 Aug 2020 12:52:46 -0700 Subject: [PATCH 62/86] Fix AVX512 build with older compilers avoids an intrinsic that is missing in gcc < 10. For this target, might trigger another gcc bug on windows that requires up-to-date gcc 8, 9, or 10, or usage of clang. Fixes https://github.com/official-stockfish/Stockfish/issues/2975 closes https://github.com/official-stockfish/Stockfish/pull/2976 No functional change --- src/Makefile | 2 +- src/nnue/layers/affine_transform.h | 3 +-- 2 files changed, 2 insertions(+), 3 deletions(-) diff --git a/src/Makefile b/src/Makefile index fd2618a4..e34fbf61 100644 --- a/src/Makefile +++ b/src/Makefile @@ -416,7 +416,7 @@ endif ifeq ($(avx512),yes) CXXFLAGS += -DUSE_AVX512 ifeq ($(comp),$(filter $(comp),gcc clang mingw)) - CXXFLAGS += -mavx512bw + CXXFLAGS += -mavx512f -mavx512bw endif endif diff --git a/src/nnue/layers/affine_transform.h b/src/nnue/layers/affine_transform.h index 985ee71a..8d2acd18 100644 --- a/src/nnue/layers/affine_transform.h +++ b/src/nnue/layers/affine_transform.h @@ -126,8 +126,7 @@ namespace Eval::NNUE::Layers { const auto iv256 = reinterpret_cast(&input_vector[kNumChunks]); const auto row256 = reinterpret_cast(&row[kNumChunks]); __m256i product256 = _mm256_maddubs_epi16(_mm256_loadA_si256(&iv256[0]), _mm256_load_si256(&row256[0])); - product256 = _mm256_madd_epi16(product256, _mm256_set1_epi16(1)); - sum = _mm512_add_epi32(sum, _mm512_zextsi256_si512(product256)); + sum = _mm512_add_epi32(sum, _mm512_cvtepi16_epi32(product256)); } output[i] = _mm512_reduce_add_epi32(sum) + biases_[i]; From ea6220f3813e5b76b444a02905eaf2c556bdb368 Mon Sep 17 00:00:00 2001 From: Guy Vreuls Date: Fri, 7 Aug 2020 17:07:46 +0200 Subject: [PATCH 63/86] This commit enables a mixed bench, to improve CI and allow for PGO (profile-build) of the NNUE part of the code. Joint work gvreuls / vondele * Download the default NNUE net in AppVeyor * Download net in travis CI `make net` * Adjust tests to cover more archs, speedup instrumented testing * Introduce 'mixed' bench as default, with further options: classical, NNUE, mixed. mixed (default) and NNUE require the default net to be present, which can be obtained with ``` make net ``` Further examples (first is equivalent to `./stockfish bench`): ``` ./stockfish bench 16 1 13 default depth mixed ./stockfish bench 16 1 13 default depth classical ./stockfish bench 16 1 13 default depth NNUE ``` The net is now downloaded automatically if needed for `profile-build` (usual `build` works fine without net present) PGO gives a nice speedup on fishtest: passed STC: LLR: 2.93 (-2.94,2.94) {-0.50,1.50} Total: 3360 W: 469 L: 343 D: 2548 Ptnml(0-2): 20, 246, 1030, 356, 28 https://tests.stockfishchess.org/tests/view/5f31b5499081672066537569 passed LTC: LLR: 2.97 (-2.94,2.94) {0.25,1.75} Total: 8824 W: 609 L: 502 D: 7713 Ptnml(0-2): 8, 430, 3438, 519, 17 https://tests.stockfishchess.org/tests/view/5f31c87b908167206653757c closes https://github.com/official-stockfish/Stockfish/pull/2931 fixes https://github.com/official-stockfish/Stockfish/issues/2907 requires fishtest updates before commit Bench: 4290577 --- .travis.yml | 27 +++++++++++++++++++++------ appveyor.yml | 14 ++++++++++++++ src/Makefile | 2 +- src/benchmark.cpp | 13 +++++++++++-- tests/instrumented.sh | 8 ++++---- 5 files changed, 51 insertions(+), 13 deletions(-) diff --git a/.travis.yml b/.travis.yml index d563a1e1..0dd38047 100644 --- a/.travis.yml +++ b/.travis.yml @@ -43,6 +43,9 @@ before_script: - cd src script: + # Download net + - make net + # Obtain bench reference from git log - git log HEAD | grep "\b[Bb]ench[ :]\+[0-9]\{7\}" | head -n 1 | sed "s/[^0-9]*\([0-9]*\).*/\1/g" > git_sig - export benchref=$(cat git_sig) @@ -55,14 +58,26 @@ script: # # Verify bench number against various builds - export CXXFLAGS="-Werror -D_GLIBCXX_DEBUG" - - make clean && make -j2 ARCH=x86-64 optimize=no debug=yes build && ../tests/signature.sh $benchref + - make clean && make -j2 ARCH=x86-64-modern optimize=no debug=yes build && ../tests/signature.sh $benchref + - export CXXFLAGS="-Werror" + - make clean && make -j2 ARCH=x86-64-modern build && ../tests/signature.sh $benchref + - make clean && make -j2 ARCH=x86-64-ssse3 build && ../tests/signature.sh $benchref + - make clean && make -j2 ARCH=x86-64-sse3-popcnt build && ../tests/signature.sh $benchref + - make clean && make -j2 ARCH=x86-64 build && ../tests/signature.sh $benchref - if [[ "$TRAVIS_OS_NAME" == "linux" ]]; then make clean && make -j2 ARCH=x86-32 optimize=no debug=yes build && ../tests/signature.sh $benchref; fi - if [[ "$TRAVIS_OS_NAME" == "linux" ]]; then make clean && make -j2 ARCH=x86-32 build && ../tests/signature.sh $benchref; fi + - if [[ "$TRAVIS_OS_NAME" == "linux" ]]; then make clean && make -j2 ARCH=x86-32-old build && ../tests/signature.sh $benchref; fi + - if [[ "$TRAVIS_OS_NAME" == "linux" && "$COMP" == "gcc" ]]; then make clean && make -j2 ARCH=x86-64-modern profile-build && ../tests/signature.sh $benchref; fi + + # compile only for some more advanced architectures (might not run in travis) + - make clean && make -j2 ARCH=x86-64-avx2 build + - make clean && make -j2 ARCH=x86-64-bmi2 build + # needs gcc 10 to compile + - if [[ "$COMPILER" != "g++-8" ]]; then make clean && make -j2 ARCH=x86-64-avx512 build; fi # # Check perft and reproducible search - - export CXXFLAGS="-Werror" - - make clean && make -j2 ARCH=x86-64 build + - make clean && make -j2 ARCH=x86-64-modern build - ../tests/perft.sh - ../tests/reprosearch.sh @@ -70,11 +85,11 @@ script: # Valgrind # - export CXXFLAGS="-O1 -fno-inline" - - if [ -x "$(command -v valgrind )" ]; then make clean && make -j2 ARCH=x86-64 debug=yes optimize=no build > /dev/null && ../tests/instrumented.sh --valgrind; fi + - if [ -x "$(command -v valgrind )" ]; then make clean && make -j2 ARCH=x86-64-modern debug=yes optimize=no build > /dev/null && ../tests/instrumented.sh --valgrind; fi - if [ -x "$(command -v valgrind )" ]; then ../tests/instrumented.sh --valgrind-thread; fi # # Sanitizer # - - if [[ "$TRAVIS_OS_NAME" == "linux" ]]; then make clean && make -j2 ARCH=x86-64 sanitize=undefined optimize=no debug=yes build > /dev/null && ../tests/instrumented.sh --sanitizer-undefined; fi - - if [[ "$TRAVIS_OS_NAME" == "linux" ]]; then make clean && make -j2 ARCH=x86-64 sanitize=thread optimize=no debug=yes build > /dev/null && ../tests/instrumented.sh --sanitizer-thread; fi + - if [[ "$TRAVIS_OS_NAME" == "linux" ]]; then make clean && make -j2 ARCH=x86-64-modern sanitize=undefined optimize=no debug=yes build > /dev/null && ../tests/instrumented.sh --sanitizer-undefined; fi + - if [[ "$TRAVIS_OS_NAME" == "linux" ]]; then make clean && make -j2 ARCH=x86-64-modern sanitize=thread optimize=no debug=yes build > /dev/null && ../tests/instrumented.sh --sanitizer-thread; fi diff --git a/appveyor.yml b/appveyor.yml index d356ba2f..a3732a23 100644 --- a/appveyor.yml +++ b/appveyor.yml @@ -61,6 +61,20 @@ before_build: build_script: - cmake --build . --config %CONFIGURATION% -- /verbosity:minimal + - ps: | + # Download default NNUE net from fishtest + $nnuenet = Get-Content -Path src\ucioption.cpp | Select-String -CaseSensitive -Pattern "Option" | Select-String -CaseSensitive -Pattern "nn-[a-z0-9]{12}.nnue" + $dummy = $nnuenet -match "(?nn-[a-z0-9]{12}.nnue)" + $nnuenet = $Matches.nnuenet + Write-Host "Default net:" $nnuenet + $nnuedownloadurl = "https://tests.stockfishchess.org/api/nn/$nnuenet" + $nnuefilepath = "src\${env:CONFIGURATION}\$nnuenet" + if (Test-Path -Path $nnuefilepath) { + Write-Host "Already available." + } else { + Write-Host "Downloading $nnuedownloadurl to $nnuefilepath" + Invoke-WebRequest -Uri $nnuedownloadurl -OutFile $nnuefilepath + } before_test: - cd src/%CONFIGURATION% diff --git a/src/Makefile b/src/Makefile index e34fbf61..c00b60b5 100644 --- a/src/Makefile +++ b/src/Makefile @@ -569,7 +569,7 @@ help: build: config-sanity $(MAKE) ARCH=$(ARCH) COMP=$(COMP) all -profile-build: config-sanity objclean profileclean +profile-build: config-sanity objclean profileclean net @echo "" @echo "Step 1/4. Building instrumented executable ..." $(MAKE) ARCH=$(ARCH) COMP=$(COMP) $(profile_make) diff --git a/src/benchmark.cpp b/src/benchmark.cpp index 6041d642..806e9840 100644 --- a/src/benchmark.cpp +++ b/src/benchmark.cpp @@ -95,8 +95,9 @@ const vector Defaults = { /// setup_bench() builds a list of UCI commands to be run by bench. There /// are five parameters: TT size in MB, number of search threads that /// should be used, the limit value spent for each position, a file name -/// where to look for positions in FEN format and the type of the limit: -/// depth, perft, nodes and movetime (in millisecs). +/// where to look for positions in FEN format, the type of the limit: +/// depth, perft, nodes and movetime (in millisecs), and evaluation type +/// mixed (default), classical, NNUE. /// /// bench -> search default positions up to depth 13 /// bench 64 1 15 -> search default positions up to depth 15 (TT = 64MB) @@ -115,6 +116,7 @@ vector setup_bench(const Position& current, istream& is) { string limit = (is >> token) ? token : "13"; string fenFile = (is >> token) ? token : "default"; string limitType = (is >> token) ? token : "depth"; + string evalType = (is >> token) ? token : "mixed"; go = limitType == "eval" ? "eval" : "go " + limitType + " " + limit; @@ -146,13 +148,20 @@ vector setup_bench(const Position& current, istream& is) { list.emplace_back("setoption name Hash value " + ttSize); list.emplace_back("ucinewgame"); + size_t posCounter = 0; + for (const string& fen : fens) if (fen.find("setoption") != string::npos) list.emplace_back(fen); else { + if (evalType == "classical" || (evalType == "mixed" && posCounter % 2 == 0)) + list.emplace_back("setoption name Use NNUE value false"); + else if (evalType == "NNUE" || (evalType == "mixed" && posCounter % 2 != 0)) + list.emplace_back("setoption name Use NNUE value true"); list.emplace_back("position fen " + fen); list.emplace_back(go); + ++posCounter; } return list; diff --git a/tests/instrumented.sh b/tests/instrumented.sh index ae6d5c4b..03ded74a 100755 --- a/tests/instrumented.sh +++ b/tests/instrumented.sh @@ -70,7 +70,7 @@ for args in "eval" \ "go depth 10" \ "go movetime 1000" \ "go wtime 8000 btime 8000 winc 500 binc 500" \ - "bench 128 $threads 10 default depth" + "bench 128 $threads 8 default depth" do echo "$prefix $exeprefix ./stockfish $args $postfix" @@ -80,7 +80,7 @@ done # more general testing, following an uci protocol exchange cat << EOF > game.exp - set timeout 10 + set timeout 240 spawn $exeprefix ./stockfish send "uci\n" @@ -98,7 +98,7 @@ cat << EOF > game.exp expect "bestmove" send "position fen 5rk1/1K4p1/8/8/3B4/8/8/8 b - - 0 1\n" - send "go depth 30\n" + send "go depth 20\n" expect "bestmove" send "quit\n" @@ -121,7 +121,7 @@ cat << EOF > syzygy.exp send "uci\n" send "setoption name SyzygyPath value ../tests/syzygy/\n" expect "info string Found 35 tablebases" {} timeout {exit 1} - send "bench 128 1 10 default depth\n" + send "bench 128 1 8 default depth\n" send "quit\n" expect eof From ee060464129f8d3af184efa013177a4ef387a394 Mon Sep 17 00:00:00 2001 From: SFisGOD Date: Mon, 10 Aug 2020 21:13:56 +0800 Subject: [PATCH 64/86] Tweak castling extension Change condition from three friendly pieces to two. This now means that we only extend castling on the king side if there are no other friendly pieces aside from king and rook. For the queen side, we only extend if there is only a rook and another friendly piece or if there is only a single rook and no other friendly piece but this is very rare. STC: LLR: 3.20 (-2.94,2.94) {-0.50,1.50} Total: 31144 W: 4086 L: 3903 D: 23155 Ptnml(0-2): 227, 2843, 9278, 2968, 256 https://tests.stockfishchess.org/tests/view/5f31487f9081672066537516 LTC: LLR: 2.93 (-2.94,2.94) {0.25,1.75} Total: 57816 W: 3786 L: 3538 D: 50492 Ptnml(0-2): 92, 2991, 22488, 3251, 86 https://tests.stockfishchess.org/tests/view/5f3167c3908167206653753d closes https://github.com/official-stockfish/Stockfish/pull/2980 Bench: 4244812 --- src/search.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/search.cpp b/src/search.cpp index ef47fd22..c5b4332f 100644 --- a/src/search.cpp +++ b/src/search.cpp @@ -1139,7 +1139,7 @@ moves_loop: // When in check, search starts from here // Castling extension if ( type_of(move) == CASTLING - && popcount(pos.pieces(us) & ~pos.pieces(PAWN) & (to_sq(move) & KingSide ? KingSide : QueenSide)) <= 3) + && popcount(pos.pieces(us) & ~pos.pieces(PAWN) & (to_sq(move) & KingSide ? KingSide : QueenSide)) <= 2) extension = 1; // Late irreversible move extension From 992f549ae7f4f73b025429c44bdbbc65de917f6c Mon Sep 17 00:00:00 2001 From: Joost VandeVondele Date: Tue, 11 Aug 2020 21:11:17 +0200 Subject: [PATCH 65/86] Restrict avx2 hack to windows target this workaround is possibly rather a windows & gcc specific problem. See e.g. https://gcc.gnu.org/bugzilla/show_bug.cgi?id=54412#c25 on Linux with gcc 8 this patch brings roughly a 8% speedup. However, probably needs some testing in the wild. includes a workaround for an old msys make (3.81) installation (fixes #2984) No functional change --- src/Makefile | 2 +- src/nnue/nnue_common.h | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/src/Makefile b/src/Makefile index c00b60b5..e82b066b 100644 --- a/src/Makefile +++ b/src/Makefile @@ -569,7 +569,7 @@ help: build: config-sanity $(MAKE) ARCH=$(ARCH) COMP=$(COMP) all -profile-build: config-sanity objclean profileclean net +profile-build: net config-sanity objclean profileclean @echo "" @echo "Step 1/4. Building instrumented executable ..." $(MAKE) ARCH=$(ARCH) COMP=$(COMP) $(profile_make) diff --git a/src/nnue/nnue_common.h b/src/nnue/nnue_common.h index cb1251c5..eab7d258 100644 --- a/src/nnue/nnue_common.h +++ b/src/nnue/nnue_common.h @@ -44,7 +44,7 @@ // compiled with older g++ crashes because the output memory is not aligned // even though alignas is specified. #if defined(USE_AVX2) -#if defined(__GNUC__ ) && (__GNUC__ < 9) +#if defined(__GNUC__ ) && (__GNUC__ < 9) && defined(_WIN32) #define _mm256_loadA_si256 _mm256_loadu_si256 #define _mm256_storeA_si256 _mm256_storeu_si256 #else @@ -54,7 +54,7 @@ #endif #if defined(USE_AVX512) -#if defined(__GNUC__ ) && (__GNUC__ < 9) +#if defined(__GNUC__ ) && (__GNUC__ < 9) && defined(_WIN32) #define _mm512_loadA_si512 _mm512_loadu_si512 #define _mm512_storeA_si512 _mm512_storeu_si512 #else From 6bc0256292cf51d390fee0cb78963da884dc2677 Mon Sep 17 00:00:00 2001 From: Daylen Yang Date: Tue, 11 Aug 2020 12:02:48 -0700 Subject: [PATCH 66/86] Use posix_memalign for Apple Silicon instead of _mm_malloc fails to build on that target, because of missing Intel Intrinsics. macOS has posix_memalign() since ~2014 so we can simplify the code and just use that for all Apple platforms. closes https://github.com/official-stockfish/Stockfish/pull/2985 No functional change. --- src/misc.cpp | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/misc.cpp b/src/misc.cpp index fc3746cf..aeb3c912 100644 --- a/src/misc.cpp +++ b/src/misc.cpp @@ -51,7 +51,7 @@ typedef bool(*fun3_t)(HANDLE, CONST GROUP_AFFINITY*, PGROUP_AFFINITY); #include #endif -#if (defined(__APPLE__) && defined(_LIBCPP_HAS_C11_FEATURES)) || defined(__ANDROID__) || defined(__OpenBSD__) || (defined(__GLIBCXX__) && !defined(_GLIBCXX_HAVE_ALIGNED_ALLOC) && !defined(_WIN32)) +#if defined(__APPLE__) || defined(__ANDROID__) || defined(__OpenBSD__) || (defined(__GLIBCXX__) && !defined(_GLIBCXX_HAVE_ALIGNED_ALLOC) && !defined(_WIN32)) #define POSIXALIGNEDALLOC #include #endif @@ -328,7 +328,7 @@ void* std_aligned_alloc(size_t alignment, size_t size) { if(posix_memalign(&pointer, alignment, size) == 0) return pointer; return nullptr; -#elif (defined(_WIN32) || (defined(__APPLE__) && !defined(_LIBCPP_HAS_C11_FEATURES))) +#elif defined(_WIN32) return _mm_malloc(size, alignment); #else return std::aligned_alloc(alignment, size); @@ -338,7 +338,7 @@ void* std_aligned_alloc(size_t alignment, size_t size) { void std_aligned_free(void* ptr) { #if defined(POSIXALIGNEDALLOC) free(ptr); -#elif (defined(_WIN32) || (defined(__APPLE__) && !defined(_LIBCPP_HAS_C11_FEATURES))) +#elif defined(_WIN32) _mm_free(ptr); #else free(ptr); From dd63b98fb06e050aa961fbad6fd1f9316f2b17df Mon Sep 17 00:00:00 2001 From: mstembera Date: Tue, 11 Aug 2020 12:59:39 -0700 Subject: [PATCH 67/86] Add support for VNNI Adds support for Vector Neural Network Instructions (avx512), as available on Intel Cascade Lake The _mm512_dpbusd_epi32() intrinsic (vpdpbusd instruction) is taylor made for NNUE. on a cascade lake CPU (AWS C5.24x.large, gcc 10) NNUE eval is at roughly 78% nps of classical (single core test) bench 1024 1 24 default depth: target classical NNUE ratio vnni 2207232 1725987 78.20 avx512 2216789 1671734 75.41 avx2 2194006 1611263 73.44 modern 2185001 1352469 61.90 closes https://github.com/official-stockfish/Stockfish/pull/2987 No functional change --- src/Makefile | 25 +++++++++++++++++++++++++ src/misc.cpp | 3 +++ src/nnue/layers/affine_transform.h | 14 +++++++++++++- 3 files changed, 41 insertions(+), 1 deletion(-) diff --git a/src/Makefile b/src/Makefile index e82b066b..0804cdd5 100644 --- a/src/Makefile +++ b/src/Makefile @@ -73,6 +73,7 @@ endif # avx2 = yes/no --- -mavx2 --- Use Intel Advanced Vector Extensions 2 # pext = yes/no --- -DUSE_PEXT --- Use pext x86_64 asm-instruction # avx512 = yes/no --- -mavx512bw --- Use Intel Advanced Vector Extensions 512 +# vnni = yes/no --- -mavx512vnni --- Use Intel Vector Neural Network Instructions 512 # neon = yes/no --- -DUSE_NEON --- Use ARM SIMD architecture # # Note that Makefile is space sensitive, so when adding new architectures @@ -93,6 +94,7 @@ sse41 = no avx2 = no pext = no avx512 = no +vnni = no neon = no ARCH = x86-64-modern @@ -190,6 +192,19 @@ ifeq ($(ARCH),x86-64-avx512) avx512 = yes endif +ifeq ($(ARCH),x86-64-vnni) + arch = x86_64 + prefetch = yes + popcnt = yes + sse = yes + ssse3 = yes + sse41 = yes + avx2 = yes + pext = yes + avx512 = yes + vnni = yes +endif + ifeq ($(ARCH),armv7) arch = armv7 prefetch = yes @@ -420,6 +435,13 @@ ifeq ($(avx512),yes) endif endif +ifeq ($(vnni),yes) + CXXFLAGS += -DUSE_VNNI + ifeq ($(comp),$(filter $(comp),gcc clang mingw)) + CXXFLAGS += -mavx512vnni -mavx512dq -mavx512vl + endif +endif + ifeq ($(sse41),yes) CXXFLAGS += -DUSE_SSE41 ifeq ($(comp),$(filter $(comp),gcc clang mingw)) @@ -522,6 +544,7 @@ help: @echo "" @echo "Supported archs:" @echo "" + @echo "x86-64-vnni > x86 64-bit with vnni support" @echo "x86-64-avx512 > x86 64-bit with avx512 support" @echo "x86-64-bmi2 > x86 64-bit with bmi2 support" @echo "x86-64-avx2 > x86 64-bit with avx2 support" @@ -640,6 +663,7 @@ config-sanity: @echo "avx2: '$(avx2)'" @echo "pext: '$(pext)'" @echo "avx512: '$(avx512)'" + @echo "vnni: '$(vnni)'" @echo "neon: '$(neon)'" @echo "" @echo "Flags:" @@ -664,6 +688,7 @@ config-sanity: @test "$(avx2)" = "yes" || test "$(avx2)" = "no" @test "$(pext)" = "yes" || test "$(pext)" = "no" @test "$(avx512)" = "yes" || test "$(avx512)" = "no" + @test "$(vnni)" = "yes" || test "$(vnni)" = "no" @test "$(neon)" = "yes" || test "$(neon)" = "no" @test "$(comp)" = "gcc" || test "$(comp)" = "icc" || test "$(comp)" = "mingw" || test "$(comp)" = "clang" diff --git a/src/misc.cpp b/src/misc.cpp index aeb3c912..ab52d30b 100644 --- a/src/misc.cpp +++ b/src/misc.cpp @@ -219,6 +219,9 @@ const std::string compiler_info() { compiler += "\nCompilation settings include: "; compiler += (Is64Bit ? " 64bit" : " 32bit"); + #if defined(USE_VNNI) + compiler += " VNNI"; + #endif #if defined(USE_AVX512) compiler += " AVX512"; #endif diff --git a/src/nnue/layers/affine_transform.h b/src/nnue/layers/affine_transform.h index 8d2acd18..322e3240 100644 --- a/src/nnue/layers/affine_transform.h +++ b/src/nnue/layers/affine_transform.h @@ -79,8 +79,10 @@ namespace Eval::NNUE::Layers { #if defined(USE_AVX512) constexpr IndexType kNumChunks = kPaddedInputDimensions / (kSimdWidth * 2); - const __m512i kOnes = _mm512_set1_epi16(1); const auto input_vector = reinterpret_cast(input); + #if !defined(USE_VNNI) + const __m512i kOnes = _mm512_set1_epi16(1); + #endif #elif defined(USE_AVX2) constexpr IndexType kNumChunks = kPaddedInputDimensions / kSimdWidth; @@ -113,9 +115,13 @@ namespace Eval::NNUE::Layers { __m512i sum = _mm512_setzero_si512(); const auto row = reinterpret_cast(&weights_[offset]); for (IndexType j = 0; j < kNumChunks; ++j) { + #if defined(USE_VNNI) + sum = _mm512_dpbusd_epi32(sum, _mm512_loadA_si512(&input_vector[j]), _mm512_load_si512(&row[j])); + #else __m512i product = _mm512_maddubs_epi16(_mm512_loadA_si512(&input_vector[j]), _mm512_load_si512(&row[j])); product = _mm512_madd_epi16(product, kOnes); sum = _mm512_add_epi32(sum, product); + #endif } // Note: Changing kMaxSimdWidth from 32 to 64 breaks loading existing networks. @@ -125,8 +131,14 @@ namespace Eval::NNUE::Layers { { const auto iv256 = reinterpret_cast(&input_vector[kNumChunks]); const auto row256 = reinterpret_cast(&row[kNumChunks]); + #if defined(USE_VNNI) + __m256i product256 = _mm256_dpbusd_epi32( + _mm512_castsi512_si256(sum), _mm256_loadA_si256(&iv256[0]), _mm256_load_si256(&row256[0])); + sum = _mm512_inserti32x8(sum, product256, 0); + #else __m256i product256 = _mm256_maddubs_epi16(_mm256_loadA_si256(&iv256[0]), _mm256_load_si256(&row256[0])); sum = _mm512_add_epi32(sum, _mm512_cvtepi16_epi32(product256)); + #endif } output[i] = _mm512_reduce_add_epi32(sum) + biases_[i]; From 69cfe28f315b559cb1a07c0806266aa2850b5d4b Mon Sep 17 00:00:00 2001 From: Joost VandeVondele Date: Wed, 12 Aug 2020 17:21:12 +0200 Subject: [PATCH 68/86] Output the SSE2 flag in compiler_info was missing in the list of outputs, slightly reorder flags. explicitly add -msse2 if USE_SSE2 (is implicit already, -msse -m64). closes https://github.com/official-stockfish/Stockfish/pull/2990 No functional change. --- src/Makefile | 2 +- src/misc.cpp | 8 ++++++-- 2 files changed, 7 insertions(+), 3 deletions(-) diff --git a/src/Makefile b/src/Makefile index 0804cdd5..027cc3e3 100644 --- a/src/Makefile +++ b/src/Makefile @@ -468,7 +468,7 @@ ifeq ($(neon),yes) endif ifeq ($(arch),x86_64) - CXXFLAGS += -DUSE_SSE2 + CXXFLAGS += -msse2 -DUSE_SSE2 endif ### 3.7 pext diff --git a/src/misc.cpp b/src/misc.cpp index ab52d30b..1cee4726 100644 --- a/src/misc.cpp +++ b/src/misc.cpp @@ -225,6 +225,7 @@ const std::string compiler_info() { #if defined(USE_AVX512) compiler += " AVX512"; #endif + compiler += (HasPext ? " BMI2" : ""); #if defined(USE_AVX2) compiler += " AVX2"; #endif @@ -234,11 +235,14 @@ const std::string compiler_info() { #if defined(USE_SSSE3) compiler += " SSSE3"; #endif - compiler += (HasPext ? " BMI2" : ""); - compiler += (HasPopCnt ? " POPCNT" : ""); + #if defined(USE_SSE2) + compiler += " SSE2"; + #endif + compiler += (HasPopCnt ? " POPCNT" : ""); #if defined(USE_MMX) compiler += " MMX"; #endif + #if !defined(NDEBUG) compiler += " DEBUG"; #endif From 67e48418afd58dd69708dcd67dea6161f61ef76f Mon Sep 17 00:00:00 2001 From: Sergio Vieri Date: Wed, 12 Aug 2020 23:21:21 +0800 Subject: [PATCH 69/86] Update default net to nn-82215d0fd0df.nnue Net created at: 20200812-2257 passed STC: https://tests.stockfishchess.org/tests/view/5f340ca99e5f2effc089da17 LLR: 2.96 (-2.94,2.94) {-0.50,1.50} Total: 5744 W: 756 L: 627 D: 4361 Ptnml(0-2): 28, 485, 1731, 586, 42 passed LTC: https://tests.stockfishchess.org/tests/view/5f341eba9e5f2effc089da23 LLR: 2.94 (-2.94,2.94) {0.25,1.75} Total: 17136 W: 1041 L: 917 D: 15178 Ptnml(0-2): 13, 813, 6807, 907, 28 closes https://github.com/official-stockfish/Stockfish/pull/2992 Bench: 3935117 --- src/ucioption.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/ucioption.cpp b/src/ucioption.cpp index b0689d6d..0a35d01b 100644 --- a/src/ucioption.cpp +++ b/src/ucioption.cpp @@ -79,7 +79,7 @@ void init(OptionsMap& o) { o["Syzygy50MoveRule"] << Option(true); o["SyzygyProbeLimit"] << Option(7, 0, 7); o["Use NNUE"] << Option(false, on_use_NNUE); - o["EvalFile"] << Option("nn-112bb1c8cdb5.nnue", on_eval_file); + o["EvalFile"] << Option("nn-82215d0fd0df.nnue", on_eval_file); } From e8ea215a13e009b78a148fda831392eb3224107e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ste=CC=81phane=20Nicolet?= Date: Thu, 13 Aug 2020 13:40:06 +0200 Subject: [PATCH 70/86] Clean-up Makefile help Do not show the details of the default architecture for a simple "make help" invocation, as the details are most likely to confuse beginners. Instead we make it clear which architecture is the default and put an example at the end of the Makefile as an incentative to use "make help ARCH=blah" to discover the flags used by the different architectures. ``` make help make help ARCH=x86-64-ssse3 ``` Also clean-up and modernize a bit the Makefile examples while at it. closes https://github.com/official-stockfish/Stockfish/pull/2996 No functional change --- src/Makefile | 41 +++++++++++++++++++++-------------------- 1 file changed, 21 insertions(+), 20 deletions(-) diff --git a/src/Makefile b/src/Makefile index 027cc3e3..a9fb7b81 100644 --- a/src/Makefile +++ b/src/Makefile @@ -81,6 +81,11 @@ endif # at the end of the line for flag values. ### 2.1. General and architecture defaults + +ifeq ($(ARCH),) + empty_arch = yes +endif + optimize = yes debug = no sanitize = no @@ -99,6 +104,7 @@ neon = no ARCH = x86-64-modern ### 2.2 Architecture specific + ifeq ($(ARCH),general-32) arch = any bits = 32 @@ -141,16 +147,7 @@ ifeq ($(ARCH),x86-64-ssse3) ssse3 = yes endif -ifeq ($(ARCH),x86-64-modern) - arch = x86_64 - prefetch = yes - popcnt = yes - sse = yes - ssse3 = yes - sse41 = yes -endif - -ifeq ($(ARCH),x86-64-sse41-popcnt) +ifeq ($(ARCH),$(filter $(ARCH),x86-64-sse41-popcnt x86-64-modern)) arch = x86_64 prefetch = yes popcnt = yes @@ -535,12 +532,13 @@ help: @echo "" @echo "Supported targets:" @echo "" + @echo "help > Display architecture details" @echo "build > Standard build" - @echo "profile-build > Standard build with PGO" + @echo "net > Download the default nnue net" + @echo "profile-build > Faster build (with profile-guided optimization)" @echo "strip > Strip executable" @echo "install > Install executable" @echo "clean > Clean up" - @echo "net > Download the default nnue net" @echo "" @echo "Supported archs:" @echo "" @@ -549,7 +547,7 @@ help: @echo "x86-64-bmi2 > x86 64-bit with bmi2 support" @echo "x86-64-avx2 > x86 64-bit with avx2 support" @echo "x86-64-sse41-popcnt > x86 64-bit with sse41 and popcnt support" - @echo "x86-64-modern > the same as previous (x86-64-sse41-popcnt)" + @echo "x86-64-modern > common modern CPU, currently x86-64-sse41-popcnt" @echo "x86-64-ssse3 > x86 64-bit with ssse3 support" @echo "x86-64-sse3-popcnt > x86 64-bit with sse3 and popcnt support" @echo "x86-64 > x86 64-bit generic" @@ -572,17 +570,20 @@ help: @echo "" @echo "Simple examples. If you don't know what to do, you likely want to run: " @echo "" - @echo "make -j build ARCH=x86-64 (This is for 64-bit systems)" - @echo "make -j build ARCH=x86-32 (This is for 32-bit systems)" + @echo "make -j build ARCH=x86-64 (A portable, slow compile for 64-bit systems)" + @echo "make -j build ARCH=x86-32 (A portable, slow compile for 32-bit systems)" @echo "" - @echo "Advanced examples, for experienced users: " + @echo "Advanced examples, for experienced users looking for performance: " @echo "" - @echo "make -j build ARCH=x86-64-modern COMP=clang" - @echo "make -j profile-build ARCH=x86-64-bmi2 COMP=gcc COMPCXX=g++-4.8" - @echo "" - @echo "The selected architecture $(ARCH) enables the following configuration: " + @echo "make help ARCH=x86-64-bmi2" + @echo "make -j profile-build ARCH=x86-64-bmi2 COMP=gcc COMPCXX=g++-9.0" + @echo "make -j build ARCH=x86-64-ssse3 COMP=clang" @echo "" +ifneq ($(empty_arch), yes) + @echo "-------------------------------\n" + @echo "The selected architecture $(ARCH) will enable the following configuration: " @$(MAKE) ARCH=$(ARCH) COMP=$(COMP) config-sanity +endif .PHONY: help build profile-build strip install clean net objclean profileclean \ From ce009ea1aaecc577bbdf208cef8e61dd1827a18e Mon Sep 17 00:00:00 2001 From: Joost VandeVondele Date: Thu, 13 Aug 2020 22:54:13 +0200 Subject: [PATCH 71/86] Verify SHA of downloaded net file check SHA of the available and downloaded file. Document the format requirement on the default net. Also allow curl to make possibly insecure connections, as needed for old curl. fixes https://github.com/official-stockfish/Stockfish/issues/2998 closes https://github.com/official-stockfish/Stockfish/pull/3000 No functional change. --- src/Makefile | 4 +++- src/ucioption.cpp | 2 ++ 2 files changed, 5 insertions(+), 1 deletion(-) diff --git a/src/Makefile b/src/Makefile index a9fb7b81..38f607cb 100644 --- a/src/Makefile +++ b/src/Makefile @@ -624,8 +624,10 @@ net: $(eval nnuenet := $(shell grep EvalFile ucioption.cpp | grep Option | sed 's/.*\(nn-[a-z0-9]\{12\}.nnue\).*/\1/')) @echo "Default net: $(nnuenet)" $(eval nnuedownloadurl := https://tests.stockfishchess.org/api/nn/$(nnuenet)) - $(eval curl_or_wget := $(shell if hash curl 2>/dev/null; then echo "curl -sL"; elif hash wget 2>/dev/null; then echo "wget -qO-"; fi)) + $(eval curl_or_wget := $(shell if hash curl 2>/dev/null; then echo "curl -skL"; elif hash wget 2>/dev/null; then echo "wget -qO-"; fi)) @if test -f "$(nnuenet)"; then echo "Already available."; else echo "Downloading $(nnuedownloadurl)"; $(curl_or_wget) $(nnuedownloadurl) > $(nnuenet); fi + $(eval shasum_command := $(shell if hash shasum 2>/dev/null; then echo "shasum -a 256 "; elif hash sha256sum 2>/dev/null; then echo "sha256sum "; fi)) + @if [ "$(nnuenet)" != "nn-"`$(shasum_command) $(nnuenet) | cut -c1-12`".nnue" ]; then echo "Failed download or $(nnuenet) corrupted, please delete!"; exit 1; fi # clean binaries and objects objclean: diff --git a/src/ucioption.cpp b/src/ucioption.cpp index 0a35d01b..2b66a475 100644 --- a/src/ucioption.cpp +++ b/src/ucioption.cpp @@ -79,6 +79,8 @@ void init(OptionsMap& o) { o["Syzygy50MoveRule"] << Option(true); o["SyzygyProbeLimit"] << Option(7, 0, 7); o["Use NNUE"] << Option(false, on_use_NNUE); + // The default must follow the format nn-[SHA256 first 12 digits].nnue + // for the build process (profile-build and fishtest) to work. o["EvalFile"] << Option("nn-82215d0fd0df.nnue", on_eval_file); } From e5f450cf0bfe5a34dd4ea51a5592a71be4514601 Mon Sep 17 00:00:00 2001 From: Miguel Lahoz Date: Mon, 10 Aug 2020 22:57:11 +0800 Subject: [PATCH 72/86] Also dampen NNUE eval with 50 move rule Move the existing dampening function last so that NNUE evaluations are also handled as we approach the 50 move rule. STC: LLR: 2.95 (-2.94,2.94) {-0.50,1.50} Total: 4792 W: 695 L: 561 D: 3536 Ptnml(0-2): 19, 420, 1422, 478, 57 https://tests.stockfishchess.org/tests/view/5f3164179081672066537534 LTC: LLR: 8.62 (-2.94,2.94) {0.25,1.75} Total: 286744 W: 18494 L: 17430 D: 250820 Ptnml(0-2): 418, 14886, 111745, 15860, 463 https://tests.stockfishchess.org/tests/view/5f316b039081672066537541 closes https://github.com/official-stockfish/Stockfish/pull/3004 Bench: 4001800 --- src/evaluate.cpp | 20 +++++++++----------- 1 file changed, 9 insertions(+), 11 deletions(-) diff --git a/src/evaluate.cpp b/src/evaluate.cpp index caab2979..00fd2005 100644 --- a/src/evaluate.cpp +++ b/src/evaluate.cpp @@ -927,9 +927,6 @@ make_v: // Side to move point of view v = (pos.side_to_move() == WHITE ? v : -v) + Tempo; - // Damp down the evaluation linearly when shuffling - v = v * (100 - pos.rule50_count()) / 100; - return v; } @@ -941,14 +938,15 @@ make_v: Value Eval::evaluate(const Position& pos) { - if (Eval::useNNUE) - { - Value v = eg_value(pos.psq_score()); - // Take NNUE eval only on balanced positions - if (abs(v) < NNUEThreshold) - return NNUE::evaluate(pos) + Tempo; - } - return Evaluation(pos).value(); + bool classical = !Eval::useNNUE + || abs(eg_value(pos.psq_score())) >= NNUEThreshold; + Value v = classical ? Evaluation(pos).value() + : NNUE::evaluate(pos) + Tempo; + + // Damp down the evaluation linearly when shuffling + v = v * (100 - pos.rule50_count()) / 100; + + return v; } /// trace() is like evaluate(), but instead of returning a value, it returns From 6eb186c97e9d808970d0b1369bcd7aca60612e26 Mon Sep 17 00:00:00 2001 From: mstembera Date: Fri, 14 Aug 2020 04:49:33 -0700 Subject: [PATCH 73/86] Try to match relative magnitude of NNUE eval to classical The idea is that since we are mixing NNUE and classical evals matching their magnitudes closer allows for better comparisons. STC https://tests.stockfishchess.org/tests/view/5f35a65411a9b1a1dbf18e2b LLR: 2.94 (-2.94,2.94) {-0.50,1.50} Total: 9840 W: 1150 L: 1027 D: 7663 Ptnml(0-2): 49, 772, 3175, 855, 69 LTC https://tests.stockfishchess.org/tests/view/5f35bcbe11a9b1a1dbf18e47 LLR: 2.93 (-2.94,2.94) {0.25,1.75} Total: 44424 W: 2492 L: 2294 D: 39638 Ptnml(0-2): 42, 2015, 17915, 2183, 57 also corrects the location to clamp the evaluation (non-function on bench). closes https://github.com/official-stockfish/Stockfish/pull/3003 bench: 3905447 --- src/evaluate.cpp | 5 ++++- src/nnue/evaluate_nnue.cpp | 5 +---- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/src/evaluate.cpp b/src/evaluate.cpp index 00fd2005..a453fa0f 100644 --- a/src/evaluate.cpp +++ b/src/evaluate.cpp @@ -941,11 +941,14 @@ Value Eval::evaluate(const Position& pos) { bool classical = !Eval::useNNUE || abs(eg_value(pos.psq_score())) >= NNUEThreshold; Value v = classical ? Evaluation(pos).value() - : NNUE::evaluate(pos) + Tempo; + : NNUE::evaluate(pos) * 5 / 4 + Tempo; // Damp down the evaluation linearly when shuffling v = v * (100 - pos.rule50_count()) / 100; + // Guarantee evalution outside of TB range + v = Utility::clamp(v, VALUE_TB_LOSS_IN_MAX_PLY + 1, VALUE_TB_WIN_IN_MAX_PLY - 1); + return v; } diff --git a/src/nnue/evaluate_nnue.cpp b/src/nnue/evaluate_nnue.cpp index af0894b2..a6ece8e2 100644 --- a/src/nnue/evaluate_nnue.cpp +++ b/src/nnue/evaluate_nnue.cpp @@ -159,10 +159,7 @@ namespace Eval::NNUE { // Evaluation function. Perform differential calculation. Value evaluate(const Position& pos) { - Value v = ComputeScore(pos, false); - v = Utility::clamp(v, VALUE_TB_LOSS_IN_MAX_PLY + 1, VALUE_TB_WIN_IN_MAX_PLY - 1); - - return v; + return ComputeScore(pos, false); } // Evaluation function. Perform full calculation. From cd0b8b4cf28208fffef931322749205a0ddc6066 Mon Sep 17 00:00:00 2001 From: Joost VandeVondele Date: Fri, 14 Aug 2020 22:18:12 +0200 Subject: [PATCH 74/86] Use NNUE more for fortresses Increases the use of NNUE evaluation in positions without captures/pawn moves, by increasing the NNUEThreshold threshold with rule50_count. This patch will force Stockfish to use NNUE eval more and more in materially unbalanced positions, when it seems that the classical eval is struggling to win and only manages to shuffle. This will ask the (slower) NNUE eval to double-check the potential fortress branches of the search tree, but only when necessary. passed STC: https://tests.stockfishchess.org/tests/view/5f36f1bf11a9b1a1dbf192d8 LLR: 2.93 (-2.94,2.94) {-0.50,1.50} Total: 51824 W: 5836 L: 5653 D: 40335 Ptnml(0-2): 264, 4356, 16512, 4493, 287 passed LTC: https://tests.stockfishchess.org/tests/view/5f37836111a9b1a1dbf1936d LLR: 2.93 (-2.94,2.94) {0.25,1.75} Total: 29768 W: 1747 L: 1590 D: 26431 Ptnml(0-2): 33, 1347, 11977, 1484, 43 closes https://github.com/official-stockfish/Stockfish/pull/3011 Bench: 4173967 --- src/evaluate.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/evaluate.cpp b/src/evaluate.cpp index a453fa0f..3a620a78 100644 --- a/src/evaluate.cpp +++ b/src/evaluate.cpp @@ -939,7 +939,7 @@ make_v: Value Eval::evaluate(const Position& pos) { bool classical = !Eval::useNNUE - || abs(eg_value(pos.psq_score())) >= NNUEThreshold; + || abs(eg_value(pos.psq_score())) >= NNUEThreshold * (16 + pos.rule50_count()) / 16; Value v = classical ? Evaluation(pos).value() : NNUE::evaluate(pos) * 5 / 4 + Tempo; From 8cf43c6317665295eece747ed1589ee33a435d2c Mon Sep 17 00:00:00 2001 From: Daylen Yang Date: Fri, 14 Aug 2020 19:53:46 -0700 Subject: [PATCH 75/86] Display NEON in compiler string if NEON intrinsics are being used and USE_NEON is defined. closes https://github.com/official-stockfish/Stockfish/pull/3008 No functional change --- src/misc.cpp | 3 +++ 1 file changed, 3 insertions(+) diff --git a/src/misc.cpp b/src/misc.cpp index 1cee4726..459ea100 100644 --- a/src/misc.cpp +++ b/src/misc.cpp @@ -242,6 +242,9 @@ const std::string compiler_info() { #if defined(USE_MMX) compiler += " MMX"; #endif + #if defined(USE_NEON) + compiler += " NEON"; + #endif #if !defined(NDEBUG) compiler += " DEBUG"; From 72dc7a5c54554a8c7c4bf68aa7de2d4de05f3294 Mon Sep 17 00:00:00 2001 From: syzygy1 <3028851+syzygy1@users.noreply.github.com> Date: Sat, 15 Aug 2020 16:50:39 +0200 Subject: [PATCH 76/86] Assume network file is in little-endian byte order This patch fixes the byte order when reading 16- and 32-bit values from the network file on a big-endian machine. Bytes are ordered in read_le() using unsigned arithmetic, which doesn't need tricks to determine the endianness of the machine. Unfortunately the compiler doesn't seem to be able to optimise the ordering operation, but reading in the weights is not a time-critical operation and the extra time it takes should not be noticeable. Big endian systems are still untested with NNUE. fixes #3007 closes https://github.com/official-stockfish/Stockfish/pull/3009 No functional change. --- src/nnue/evaluate_nnue.cpp | 8 ++++---- src/nnue/layers/affine_transform.h | 9 ++++----- src/nnue/nnue_common.h | 19 +++++++++++++++++++ src/nnue/nnue_feature_transformer.h | 8 ++++---- 4 files changed, 31 insertions(+), 13 deletions(-) diff --git a/src/nnue/evaluate_nnue.cpp b/src/nnue/evaluate_nnue.cpp index a6ece8e2..3aa85943 100644 --- a/src/nnue/evaluate_nnue.cpp +++ b/src/nnue/evaluate_nnue.cpp @@ -77,7 +77,7 @@ namespace Eval::NNUE { bool ReadParameters(std::istream& stream, const AlignedPtr& pointer) { std::uint32_t header; - stream.read(reinterpret_cast(&header), sizeof(header)); + header = read_le(stream); if (!stream || header != T::GetHashValue()) return false; return pointer->ReadParameters(stream); } @@ -96,9 +96,9 @@ namespace Eval::NNUE { std::uint32_t* hash_value, std::string* architecture) { std::uint32_t version, size; - stream.read(reinterpret_cast(&version), sizeof(version)); - stream.read(reinterpret_cast(hash_value), sizeof(*hash_value)); - stream.read(reinterpret_cast(&size), sizeof(size)); + version = read_le(stream); + *hash_value = read_le(stream); + size = read_le(stream); if (!stream || version != kVersion) return false; architecture->resize(size); stream.read(&(*architecture)[0], size); diff --git a/src/nnue/layers/affine_transform.h b/src/nnue/layers/affine_transform.h index 322e3240..bac258e8 100644 --- a/src/nnue/layers/affine_transform.h +++ b/src/nnue/layers/affine_transform.h @@ -62,11 +62,10 @@ namespace Eval::NNUE::Layers { // Read network parameters bool ReadParameters(std::istream& stream) { if (!previous_layer_.ReadParameters(stream)) return false; - stream.read(reinterpret_cast(biases_), - kOutputDimensions * sizeof(BiasType)); - stream.read(reinterpret_cast(weights_), - kOutputDimensions * kPaddedInputDimensions * - sizeof(WeightType)); + for (std::size_t i = 0; i < kOutputDimensions; ++i) + biases_[i] = read_le(stream); + for (std::size_t i = 0; i < kOutputDimensions * kPaddedInputDimensions; ++i) + weights_[i] = read_le(stream); return !stream.fail(); } diff --git a/src/nnue/nnue_common.h b/src/nnue/nnue_common.h index eab7d258..61f18aee 100644 --- a/src/nnue/nnue_common.h +++ b/src/nnue/nnue_common.h @@ -21,6 +21,9 @@ #ifndef NNUE_COMMON_H_INCLUDED #define NNUE_COMMON_H_INCLUDED +#include +#include + #if defined(USE_AVX2) #include @@ -101,6 +104,22 @@ namespace Eval::NNUE { return (n + base - 1) / base * base; } + // Read a signed or unsigned integer from a stream in little-endian order + template + inline IntType read_le(std::istream& stream) { + // Read the relevant bytes from the stream in little-endian order + std::uint8_t u[sizeof(IntType)]; + stream.read(reinterpret_cast(u), sizeof(IntType)); + // Use unsigned arithmetic to convert to machine order + typename std::make_unsigned::type v = 0; + for (std::size_t i = 0; i < sizeof(IntType); ++i) + v = (v << 8) | u[sizeof(IntType) - i - 1]; + // Copy the machine-ordered bytes into a potentially signed value + IntType w; + std::memcpy(&w, &v, sizeof(IntType)); + return w; + } + } // namespace Eval::NNUE #endif // #ifndef NNUE_COMMON_H_INCLUDED diff --git a/src/nnue/nnue_feature_transformer.h b/src/nnue/nnue_feature_transformer.h index 40f2603d..4db9be9f 100644 --- a/src/nnue/nnue_feature_transformer.h +++ b/src/nnue/nnue_feature_transformer.h @@ -55,10 +55,10 @@ namespace Eval::NNUE { // Read network parameters bool ReadParameters(std::istream& stream) { - stream.read(reinterpret_cast(biases_), - kHalfDimensions * sizeof(BiasType)); - stream.read(reinterpret_cast(weights_), - kHalfDimensions * kInputDimensions * sizeof(WeightType)); + for (std::size_t i = 0; i < kHalfDimensions; ++i) + biases_[i] = read_le(stream); + for (std::size_t i = 0; i < kHalfDimensions * kInputDimensions; ++i) + weights_[i] = read_le(stream); return !stream.fail(); } From 65572de4a79ab017c19d85eacee865afe7bfc7c1 Mon Sep 17 00:00:00 2001 From: Joost VandeVondele Date: Sun, 16 Aug 2020 13:21:07 +0200 Subject: [PATCH 77/86] Add further targets to travis testing general-32, general-64 and help closes https://github.com/official-stockfish/Stockfish/pull/3014 No functional change --- .travis.yml | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/.travis.yml b/.travis.yml index 0dd38047..45f1bd3d 100644 --- a/.travis.yml +++ b/.travis.yml @@ -51,11 +51,12 @@ script: - export benchref=$(cat git_sig) - echo "Reference bench:" $benchref - # # Compiler version string - $COMPILER -v - # + # test help target + - make help + # Verify bench number against various builds - export CXXFLAGS="-Werror -D_GLIBCXX_DEBUG" - make clean && make -j2 ARCH=x86-64-modern optimize=no debug=yes build && ../tests/signature.sh $benchref @@ -64,8 +65,10 @@ script: - make clean && make -j2 ARCH=x86-64-ssse3 build && ../tests/signature.sh $benchref - make clean && make -j2 ARCH=x86-64-sse3-popcnt build && ../tests/signature.sh $benchref - make clean && make -j2 ARCH=x86-64 build && ../tests/signature.sh $benchref + - if [[ "$TRAVIS_OS_NAME" == "linux" ]]; then make clean && make -j2 ARCH=general-64 build && ../tests/signature.sh $benchref; fi - if [[ "$TRAVIS_OS_NAME" == "linux" ]]; then make clean && make -j2 ARCH=x86-32 optimize=no debug=yes build && ../tests/signature.sh $benchref; fi - if [[ "$TRAVIS_OS_NAME" == "linux" ]]; then make clean && make -j2 ARCH=x86-32 build && ../tests/signature.sh $benchref; fi + - if [[ "$TRAVIS_OS_NAME" == "linux" ]]; then make clean && make -j2 ARCH=general-32 build && ../tests/signature.sh $benchref; fi - if [[ "$TRAVIS_OS_NAME" == "linux" ]]; then make clean && make -j2 ARCH=x86-32-old build && ../tests/signature.sh $benchref; fi - if [[ "$TRAVIS_OS_NAME" == "linux" && "$COMP" == "gcc" ]]; then make clean && make -j2 ARCH=x86-64-modern profile-build && ../tests/signature.sh $benchref; fi From 81d716f5ccff3f0898ae985b9ef69f79d014bdc5 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ste=CC=81phane=20Nicolet?= Date: Sun, 16 Aug 2020 21:46:54 +0200 Subject: [PATCH 78/86] Reformat code in little-endian patch Reformat code and rename the function to "read_little_endian()" in the recent commit by Ronald de Man for support of big endian systems. closes https://github.com/official-stockfish/Stockfish/pull/3016 No functional change ----- Recommended net: https://tests.stockfishchess.org/api/nn/nn-82215d0fd0df.nnue --- src/nnue/evaluate_nnue.cpp | 14 +++++++------- src/nnue/layers/affine_transform.h | 4 ++-- src/nnue/nnue_common.h | 30 +++++++++++++++-------------- src/nnue/nnue_feature_transformer.h | 4 ++-- 4 files changed, 27 insertions(+), 25 deletions(-) diff --git a/src/nnue/evaluate_nnue.cpp b/src/nnue/evaluate_nnue.cpp index 3aa85943..dfbb1ac2 100644 --- a/src/nnue/evaluate_nnue.cpp +++ b/src/nnue/evaluate_nnue.cpp @@ -77,7 +77,7 @@ namespace Eval::NNUE { bool ReadParameters(std::istream& stream, const AlignedPtr& pointer) { std::uint32_t header; - header = read_le(stream); + header = read_little_endian(stream); if (!stream || header != T::GetHashValue()) return false; return pointer->ReadParameters(stream); } @@ -92,13 +92,13 @@ namespace Eval::NNUE { } // Read network header - bool ReadHeader(std::istream& stream, - std::uint32_t* hash_value, std::string* architecture) { - + bool ReadHeader(std::istream& stream, std::uint32_t* hash_value, std::string* architecture) + { std::uint32_t version, size; - version = read_le(stream); - *hash_value = read_le(stream); - size = read_le(stream); + + version = read_little_endian(stream); + *hash_value = read_little_endian(stream); + size = read_little_endian(stream); if (!stream || version != kVersion) return false; architecture->resize(size); stream.read(&(*architecture)[0], size); diff --git a/src/nnue/layers/affine_transform.h b/src/nnue/layers/affine_transform.h index bac258e8..7ac5a1c0 100644 --- a/src/nnue/layers/affine_transform.h +++ b/src/nnue/layers/affine_transform.h @@ -63,9 +63,9 @@ namespace Eval::NNUE::Layers { bool ReadParameters(std::istream& stream) { if (!previous_layer_.ReadParameters(stream)) return false; for (std::size_t i = 0; i < kOutputDimensions; ++i) - biases_[i] = read_le(stream); + biases_[i] = read_little_endian(stream); for (std::size_t i = 0; i < kOutputDimensions * kPaddedInputDimensions; ++i) - weights_[i] = read_le(stream); + weights_[i] = read_little_endian(stream); return !stream.fail(); } diff --git a/src/nnue/nnue_common.h b/src/nnue/nnue_common.h index 61f18aee..4c93e3d1 100644 --- a/src/nnue/nnue_common.h +++ b/src/nnue/nnue_common.h @@ -101,23 +101,25 @@ namespace Eval::NNUE { // Round n up to be a multiple of base template constexpr IntType CeilToMultiple(IntType n, IntType base) { - return (n + base - 1) / base * base; + return (n + base - 1) / base * base; } - // Read a signed or unsigned integer from a stream in little-endian order + // read_little_endian() is our utility to read an integer (signed or unsigned, any size) + // from a stream in little-endian order. We swap the byte order after the read if + // necessary to return a result with the byte ordering of the compiling machine. template - inline IntType read_le(std::istream& stream) { - // Read the relevant bytes from the stream in little-endian order - std::uint8_t u[sizeof(IntType)]; - stream.read(reinterpret_cast(u), sizeof(IntType)); - // Use unsigned arithmetic to convert to machine order - typename std::make_unsigned::type v = 0; - for (std::size_t i = 0; i < sizeof(IntType); ++i) - v = (v << 8) | u[sizeof(IntType) - i - 1]; - // Copy the machine-ordered bytes into a potentially signed value - IntType w; - std::memcpy(&w, &v, sizeof(IntType)); - return w; + inline IntType read_little_endian(std::istream& stream) { + + IntType result; + std::uint8_t u[sizeof(IntType)]; + typename std::make_unsigned::type v = 0; + + stream.read(reinterpret_cast(u), sizeof(IntType)); + for (std::size_t i = 0; i < sizeof(IntType); ++i) + v = (v << 8) | u[sizeof(IntType) - i - 1]; + + std::memcpy(&result, &v, sizeof(IntType)); + return result; } } // namespace Eval::NNUE diff --git a/src/nnue/nnue_feature_transformer.h b/src/nnue/nnue_feature_transformer.h index 4db9be9f..43707610 100644 --- a/src/nnue/nnue_feature_transformer.h +++ b/src/nnue/nnue_feature_transformer.h @@ -56,9 +56,9 @@ namespace Eval::NNUE { // Read network parameters bool ReadParameters(std::istream& stream) { for (std::size_t i = 0; i < kHalfDimensions; ++i) - biases_[i] = read_le(stream); + biases_[i] = read_little_endian(stream); for (std::size_t i = 0; i < kHalfDimensions * kInputDimensions; ++i) - weights_[i] = read_le(stream); + weights_[i] = read_little_endian(stream); return !stream.fail(); } From 0e17a89e4dee73bd46e496cf6bed467432f116e6 Mon Sep 17 00:00:00 2001 From: Unai Corzo Date: Mon, 17 Aug 2020 09:22:15 +0200 Subject: [PATCH 79/86] Simplify away the passed pawn extension STC https://tests.stockfishchess.org/tests/view/5f3955f0e98b6c64b3df41d7 LLR: 2.96 (-2.94,2.94) {-1.50,0.50} Total: 31992 W: 3611 L: 3548 D: 24833 Ptnml(0-2): 174, 2658, 10273, 2713, 178 LTC https://tests.stockfishchess.org/tests/view/5f399e41e98b6c64b3df4210 LLR: 3.01 (-2.94,2.94) {-1.50,0.50} Total: 29568 W: 1488 L: 1480 D: 26600 Ptnml(0-2): 40, 1272, 12142, 1300, 30 closes https://github.com/official-stockfish/Stockfish/pull/3017 bench: 3844671 ----- Recommended net: https://tests.stockfishchess.org/api/nn/nn-82215d0fd0df.nnue --- src/search.cpp | 6 ------ 1 file changed, 6 deletions(-) diff --git a/src/search.cpp b/src/search.cpp index c5b4332f..83fb722f 100644 --- a/src/search.cpp +++ b/src/search.cpp @@ -1126,12 +1126,6 @@ moves_loop: // When in check, search starts from here && (pos.is_discovery_check_on_king(~us, move) || pos.see_ge(move))) extension = 1; - // Passed pawn extension - else if ( move == ss->killers[0] - && pos.advanced_pawn_push(move) - && pos.pawn_passed(us, to_sq(move))) - extension = 1; - // Last captures extension else if ( PieceValue[EG][pos.captured_piece()] > PawnValueEg && pos.non_pawn_material() <= 2 * RookValueMg) From 65b976439f8867e81682c0b66da6796ad3176177 Mon Sep 17 00:00:00 2001 From: notruck <56622488+notruck@users.noreply.github.com> Date: Sun, 16 Aug 2020 08:59:13 -0700 Subject: [PATCH 80/86] Support building for Android using NDK The easiest way to use the NDK in conjunction with this Makefile (tested on linux-x86_64): 1. Download the latest NDK (r21d) from Google from https://developer.android.com/ndk/downloads 2. Place and unzip the NDK in $HOME/ndk folder 3. Export the path variable e.g., `export PATH=$PATH:$HOME/ndk/android-ndk-r21d/toolchains/llvm/prebuilt/linux-x86_64/bin` 4. cd to your Stockfish/src dir 5. Issue `make -j ARCH=armv8 COMP=ndk build` (use `ARCH=armv7` or `ARCH=armv7-neon` for older CPUs) 6. Optionally `make -j ARCH=armv8 COMP=ndk strip` 7. That's all. Enjoy! Improves support from Raspberry Pi (incomplete?) and compiling on arm in general closes https://github.com/official-stockfish/Stockfish/pull/3015 fixes https://github.com/official-stockfish/Stockfish/issues/2860 fixes https://github.com/official-stockfish/Stockfish/issues/2641 Support is still fragile as we're missing CI on these targets. Nevertheless tested with: ```bash # build crosses from ubuntu 20.04 on x86 to various arch/OS combos # tested with suitable packages installed # (build-essentials, mingw-w64, g++-arm-linux-gnueabihf, NDK (r21d) from google) # cross to Android export PATH=$HOME/ndk/android-ndk-r21d/toolchains/llvm/prebuilt/linux-x86_64/bin:$PATH make clean && make -j build ARCH=armv7 COMP=ndk && make -j build ARCH=armv7 COMP=ndk strip make clean && make -j build ARCH=armv7-neon COMP=ndk && make -j build ARCH=armv7-neon COMP=ndk strip make clean && make -j build ARCH=armv8 COMP=ndk && make -j build ARCH=armv8 COMP=ndk strip # cross to Raspberry Pi make clean && make -j build ARCH=armv7 COMP=gcc COMPILER=arm-linux-gnueabihf-g++ make clean && make -j build ARCH=armv7-neon COMP=gcc COMPILER=arm-linux-gnueabihf-g++ # cross to Windows make clean && make -j build ARCH=x86-64-modern COMP=mingw ``` No functional change --- AUTHORS | 1 + src/Makefile | 65 ++++++++++++++++++++++++++++++++++++++++++++-------- 2 files changed, 56 insertions(+), 10 deletions(-) diff --git a/AUTHORS b/AUTHORS index 41b89705..d8f4d30e 100644 --- a/AUTHORS +++ b/AUTHORS @@ -127,6 +127,7 @@ Niklas Fiekas (niklasf) Nikolay Kostov (NikolayIT) Nguyen Pham (nguyenpham) Norman Schmidt (FireFather) +notruck Ondrej Mosnáček (WOnder93) Oskar Werkelin Ahlin Pablo Vazquez diff --git a/src/Makefile b/src/Makefile index 38f607cb..0f458aa1 100644 --- a/src/Makefile +++ b/src/Makefile @@ -102,6 +102,7 @@ avx512 = no vnni = no neon = no ARCH = x86-64-modern +STRIP = strip ### 2.2 Architecture specific @@ -208,6 +209,14 @@ ifeq ($(ARCH),armv7) bits = 32 endif +ifeq ($(ARCH),armv7-neon) + arch = armv7 + prefetch = yes + popcnt = yes + neon = yes + bits = 32 +endif + ifeq ($(ARCH),armv8) arch = armv8-a prefetch = yes @@ -251,7 +260,7 @@ ifeq ($(COMP),gcc) CXX=g++ CXXFLAGS += -pedantic -Wextra -Wshadow - ifeq ($(ARCH),$(filter $(ARCH),armv7 armv8)) + ifeq ($(arch),$(filter $(arch),armv7 armv8-a)) ifeq ($(OS),Android) CXXFLAGS += -m$(bits) LDFLAGS += -m$(bits) @@ -261,6 +270,10 @@ ifeq ($(COMP),gcc) LDFLAGS += -m$(bits) endif + ifeq ($(arch),$(filter $(arch),armv7)) + LDFLAGS += -latomic + endif + ifneq ($(KERNEL),Darwin) LDFLAGS += -Wl,--no-as-needed endif @@ -311,7 +324,7 @@ ifeq ($(COMP),clang) endif endif - ifeq ($(ARCH),$(filter $(ARCH),armv7 armv8)) + ifeq ($(arch),$(filter $(arch),armv7 armv8)) ifeq ($(OS),Android) CXXFLAGS += -m$(bits) LDFLAGS += -m$(bits) @@ -340,6 +353,25 @@ ifeq ($(KERNEL),Darwin) LDFLAGS += -arch $(arch) -mmacosx-version-min=10.14 endif +# To cross-compile for Android, NDK version r21 or later is recommended. +# In earlier NDK versions, you'll need to pass -fno-addrsig if using GNU binutils. +# Currently we don't know how to make PGO builds with the NDK yet. +ifeq ($(COMP),ndk) + CXXFLAGS += -stdlib=libc++ -fPIE + ifeq ($(arch),armv7) + comp=armv7a-linux-androideabi16-clang + CXX=armv7a-linux-androideabi16-clang++ + CXXFLAGS += -mthumb -march=armv7-a -mfloat-abi=softfp -mfpu=neon + STRIP=arm-linux-androideabi-strip + endif + ifeq ($(arch),armv8-a) + comp=aarch64-linux-android21-clang + CXX=aarch64-linux-android21-clang++ + STRIP=aarch64-linux-android-strip + endif + LDFLAGS += -static-libstdc++ -pie -lm -latomic +endif + ### Travis CI script uses COMPILER to overwrite CXX ifdef COMPILER COMPCXX=$(COMPILER) @@ -356,7 +388,9 @@ ifneq ($(comp),mingw) ifneq ($(OS),Android) # Haiku has pthreads in its libroot, so only link it in on other platforms ifneq ($(KERNEL),Haiku) - LDFLAGS += -lpthread + ifneq ($(COMP),ndk) + LDFLAGS += -lpthread + endif endif endif endif @@ -401,7 +435,6 @@ endif ifeq ($(prefetch),yes) ifeq ($(sse),yes) CXXFLAGS += -msse - DEPENDFLAGS += -msse endif else CXXFLAGS += -DNO_PREFETCH @@ -409,7 +442,7 @@ endif ### 3.6 popcnt ifeq ($(popcnt),yes) - ifeq ($(arch),$(filter $(arch),ppc64 armv8-a arm64)) + ifeq ($(arch),$(filter $(arch),ppc64 armv7 armv8-a arm64)) CXXFLAGS += -DUSE_POPCNT else ifeq ($(comp),icc) CXXFLAGS += -msse3 -DUSE_POPCNT @@ -418,6 +451,7 @@ ifeq ($(popcnt),yes) endif endif + ifeq ($(avx2),yes) CXXFLAGS += -DUSE_AVX2 ifeq ($(comp),$(filter $(comp),gcc clang mingw)) @@ -462,6 +496,11 @@ endif ifeq ($(neon),yes) CXXFLAGS += -DUSE_NEON + ifeq ($(KERNEL),Linux) + ifneq ($(COMP),ndk) + CXXFLAGS += -mfpu=neon + endif + endif endif ifeq ($(arch),x86_64) @@ -481,7 +520,10 @@ endif ### needs access to the optimization flags. ifeq ($(optimize),yes) ifeq ($(debug), no) - ifeq ($(comp),clang) + ifeq ($(COMP),ndk) + CXXFLAGS += -flto=thin + LDFLAGS += $(CXXFLAGS) + else ifeq ($(comp),clang) CXXFLAGS += -flto=thin LDFLAGS += $(CXXFLAGS) @@ -502,7 +544,7 @@ ifeq ($(debug), no) endif # To use LTO and static linking on windows, the tool chain requires a recent gcc: -# gcc version 10.1 in msys2 or TDM-GCC version 9.2 are know to work, older might not. +# gcc version 10.1 in msys2 or TDM-GCC version 9.2 are known to work, older might not. # So, only enable it for a cross from Linux by default. else ifeq ($(comp),mingw) ifeq ($(KERNEL),Linux) @@ -556,7 +598,8 @@ help: @echo "ppc-64 > PPC 64-bit" @echo "ppc-32 > PPC 32-bit" @echo "armv7 > ARMv7 32-bit" - @echo "armv8 > ARMv8 64-bit" + @echo "armv7-neon" > ARMv7 32-bit with popcnt and neon" + @echo "armv8 > ARMv8 64-bit with popcnt and neon" @echo "apple-silicon > Apple silicon ARM64" @echo "general-64 > unspecified 64-bit" @echo "general-32 > unspecified 32-bit" @@ -567,6 +610,7 @@ help: @echo "mingw > Gnu compiler with MinGW under Windows" @echo "clang > LLVM Clang compiler" @echo "icc > Intel compiler" + @echo "ndk > Google NDK to cross-compile for Android" @echo "" @echo "Simple examples. If you don't know what to do, you likely want to run: " @echo "" @@ -609,7 +653,7 @@ profile-build: net config-sanity objclean profileclean $(MAKE) ARCH=$(ARCH) COMP=$(COMP) profileclean strip: - strip $(EXE) + $(STRIP) $(EXE) install: -mkdir -p -m 755 $(BINDIR) @@ -693,7 +737,8 @@ config-sanity: @test "$(avx512)" = "yes" || test "$(avx512)" = "no" @test "$(vnni)" = "yes" || test "$(vnni)" = "no" @test "$(neon)" = "yes" || test "$(neon)" = "no" - @test "$(comp)" = "gcc" || test "$(comp)" = "icc" || test "$(comp)" = "mingw" || test "$(comp)" = "clang" + @test "$(comp)" = "gcc" || test "$(comp)" = "icc" || test "$(comp)" = "mingw" || test "$(comp)" = "clang" \ + || test "$(comp)" = "armv7a-linux-androideabi16-clang" || test "$(comp)" = "aarch64-linux-android21-clang" $(EXE): $(OBJS) +$(CXX) -o $@ $(OBJS) $(LDFLAGS) From 1c0b7bdf4f77b8160cebe8af96b28230e870a136 Mon Sep 17 00:00:00 2001 From: VoyagerOne Date: Mon, 17 Aug 2020 08:58:03 -0400 Subject: [PATCH 81/86] Remove history bonus from Eval STC: LLR: 2.92 (-2.94,2.94) {-1.50,0.50} Total: 26776 W: 2787 L: 2725 D: 21264 https://tests.stockfishchess.org/tests/view/5f39d6beb38d442594aabd9b LTC: LLR: 2.93 (-2.94,2.94) {-1.50,0.50} Total: 12968 W: 635 L: 608 D: 11725 https://tests.stockfishchess.org/tests/view/5f39decfb38d442594aabda7 closes https://github.com/official-stockfish/Stockfish/pull/3019 Bench: 4335100 --- src/search.cpp | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) diff --git a/src/search.cpp b/src/search.cpp index 83fb722f..7c839dfc 100644 --- a/src/search.cpp +++ b/src/search.cpp @@ -794,11 +794,7 @@ namespace { else { if ((ss-1)->currentMove != MOVE_NULL) - { - int bonus = -(ss-1)->statScore / 512; - - ss->staticEval = eval = evaluate(pos) + bonus; - } + ss->staticEval = eval = evaluate(pos); else ss->staticEval = eval = -(ss-1)->staticEval + 2 * Tempo; From 581b92e4a70b99fa5a22f7a1a38f2c8d2099769f Mon Sep 17 00:00:00 2001 From: Unai Corzo Date: Mon, 17 Aug 2020 18:22:32 +0200 Subject: [PATCH 82/86] Remove last captures extension STC https://tests.stockfishchess.org/tests/view/5f395657e98b6c64b3df41dd LLR: 2.95 (-2.94,2.94) {-1.50,0.50} Total: 144664 W: 15426 L: 15537 D: 113701 Ptnml(0-2): 612, 11341, 48537, 11230, 612 LTC https://tests.stockfishchess.org/tests/view/5f3a2ec7b38d442594aabdd7 LLR: 2.96 (-2.94,2.94) {-1.50,0.50} Total: 22728 W: 1161 L: 1146 D: 20421 Ptnml(0-2): 21, 960, 9388, 973, 22 closes https://github.com/official-stockfish/Stockfish/pull/3020 bench: 3832662 --- src/search.cpp | 5 ----- 1 file changed, 5 deletions(-) diff --git a/src/search.cpp b/src/search.cpp index 7c839dfc..1d5bc5f7 100644 --- a/src/search.cpp +++ b/src/search.cpp @@ -1122,11 +1122,6 @@ moves_loop: // When in check, search starts from here && (pos.is_discovery_check_on_king(~us, move) || pos.see_ge(move))) extension = 1; - // Last captures extension - else if ( PieceValue[EG][pos.captured_piece()] > PawnValueEg - && pos.non_pawn_material() <= 2 * RookValueMg) - extension = 1; - // Castling extension if ( type_of(move) == CASTLING && popcount(pos.pieces(us) & ~pos.pieces(PAWN) & (to_sq(move) & KingSide ? KingSide : QueenSide)) <= 2) From 1bcc981a5a70e3065b4ff588644f270136fd7e3c Mon Sep 17 00:00:00 2001 From: mstembera Date: Sun, 16 Aug 2020 15:23:50 -0700 Subject: [PATCH 83/86] Fallback to NNUE If the classical eval ends up much smaller than estimated fall back to NNUE. Also use multiply instead of divide for the threshold comparison for smoother transitions without rounding. STC https://tests.stockfishchess.org/tests/view/5f3a5011b38d442594aabdfe LLR: 2.96 (-2.94,2.94) {-0.50,1.50} Total: 57352 W: 6325 L: 6135 D: 44892 Ptnml(0-2): 277, 4748, 18482, 4846, 323 LTC https://tests.stockfishchess.org/tests/view/5f3aee9db38d442594aabe82 LLR: 2.95 (-2.94,2.94) {0.25,1.75} Total: 16232 W: 897 L: 781 D: 14554 Ptnml(0-2): 19, 679, 6616, 771, 31 closes https://github.com/official-stockfish/Stockfish/pull/3023 bench: 4026216 ----- Recommended net: https://tests.stockfishchess.org/api/nn/nn-82215d0fd0df.nnue --- src/evaluate.cpp | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/src/evaluate.cpp b/src/evaluate.cpp index 3a620a78..1bd89353 100644 --- a/src/evaluate.cpp +++ b/src/evaluate.cpp @@ -114,7 +114,8 @@ namespace { constexpr Value LazyThreshold1 = Value(1400); constexpr Value LazyThreshold2 = Value(1300); constexpr Value SpaceThreshold = Value(12222); - constexpr Value NNUEThreshold = Value(575); + constexpr Value NNUEThreshold1 = Value(550); + constexpr Value NNUEThreshold2 = Value(150); // KingAttackWeights[PieceType] contains king attack weights by piece type constexpr int KingAttackWeights[PIECE_TYPE_NB] = { 0, 0, 81, 52, 44, 10 }; @@ -939,10 +940,13 @@ make_v: Value Eval::evaluate(const Position& pos) { bool classical = !Eval::useNNUE - || abs(eg_value(pos.psq_score())) >= NNUEThreshold * (16 + pos.rule50_count()) / 16; + || abs(eg_value(pos.psq_score())) * 16 > NNUEThreshold1 * (16 + pos.rule50_count()); Value v = classical ? Evaluation(pos).value() : NNUE::evaluate(pos) * 5 / 4 + Tempo; + if (classical && Eval::useNNUE && abs(v) * 16 < NNUEThreshold2 * (16 + pos.rule50_count())) + v = NNUE::evaluate(pos) * 5 / 4 + Tempo; + // Damp down the evaluation linearly when shuffling v = v * (100 - pos.rule50_count()) / 100; From fbae5614eb1e82bccd37fbcfb0d2ca388b7a9a7d Mon Sep 17 00:00:00 2001 From: Joost VandeVondele Date: Tue, 18 Aug 2020 08:49:06 +0200 Subject: [PATCH 84/86] Fix Makefile typo remove stray quote, shown with `make help` No functional change --- src/Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Makefile b/src/Makefile index 0f458aa1..1f8ba455 100644 --- a/src/Makefile +++ b/src/Makefile @@ -598,7 +598,7 @@ help: @echo "ppc-64 > PPC 64-bit" @echo "ppc-32 > PPC 32-bit" @echo "armv7 > ARMv7 32-bit" - @echo "armv7-neon" > ARMv7 32-bit with popcnt and neon" + @echo "armv7-neon > ARMv7 32-bit with popcnt and neon" @echo "armv8 > ARMv8 64-bit with popcnt and neon" @echo "apple-silicon > Apple silicon ARM64" @echo "general-64 > unspecified 64-bit" From 384d6844841e9f2da8f5a913c7620440f9e05ab5 Mon Sep 17 00:00:00 2001 From: Joost VandeVondele Date: Tue, 18 Aug 2020 18:06:28 +0200 Subject: [PATCH 85/86] Better error message on missing curl/wget provide clean error/warning message for missing curl/wget, sha256sum/shasum fixes https://github.com/official-stockfish/Stockfish/issues/3025 closes https://github.com/official-stockfish/Stockfish/pull/3026 No functional change --- src/Makefile | 19 +++++++++++++++++-- 1 file changed, 17 insertions(+), 2 deletions(-) diff --git a/src/Makefile b/src/Makefile index 1f8ba455..a3feb68e 100644 --- a/src/Makefile +++ b/src/Makefile @@ -669,9 +669,24 @@ net: @echo "Default net: $(nnuenet)" $(eval nnuedownloadurl := https://tests.stockfishchess.org/api/nn/$(nnuenet)) $(eval curl_or_wget := $(shell if hash curl 2>/dev/null; then echo "curl -skL"; elif hash wget 2>/dev/null; then echo "wget -qO-"; fi)) - @if test -f "$(nnuenet)"; then echo "Already available."; else echo "Downloading $(nnuedownloadurl)"; $(curl_or_wget) $(nnuedownloadurl) > $(nnuenet); fi + @if test -f "$(nnuenet)"; then \ + echo "Already available."; \ + else \ + if [ "x$(curl_or_wget)" = "x" ]; then \ + echo "Automatic download failed: neither curl nor wget is installed. Install one of these tools or download the net manually"; exit 1; \ + else \ + echo "Downloading $(nnuedownloadurl)"; $(curl_or_wget) $(nnuedownloadurl) > $(nnuenet);\ + fi; \ + fi; $(eval shasum_command := $(shell if hash shasum 2>/dev/null; then echo "shasum -a 256 "; elif hash sha256sum 2>/dev/null; then echo "sha256sum "; fi)) - @if [ "$(nnuenet)" != "nn-"`$(shasum_command) $(nnuenet) | cut -c1-12`".nnue" ]; then echo "Failed download or $(nnuenet) corrupted, please delete!"; exit 1; fi + @if [ "x$(shasum_command)" != "x" ]; then \ + if [ "$(nnuenet)" != "nn-"`$(shasum_command) $(nnuenet) | cut -c1-12`".nnue" ]; then \ + echo "Failed download or $(nnuenet) corrupted, please delete!"; exit 1; \ + fi \ + else \ + echo "shasum / sha256sum not found, skipping net validation"; \ + fi + # clean binaries and objects objclean: From 42e8789f0b3935b7ea389b3aa929e05e0a016872 Mon Sep 17 00:00:00 2001 From: syzygy1 <3028851+syzygy1@users.noreply.github.com> Date: Tue, 18 Aug 2020 01:56:12 +0200 Subject: [PATCH 86/86] Expanded support for x86-32 architectures. add new ARCH targets x86-32-sse41-popcnt > x86 32-bit with sse41 and popcnt support x86-32-sse2 > x86 32-bit with sse2 support x86-32 > x86 32-bit generic (with mmx and sse support) retire x86-32-old (use general-32) closes https://github.com/official-stockfish/Stockfish/pull/3022 No functional change. --- .travis.yml | 3 +- src/Makefile | 145 +++++++++++++++++++++++++++++++-------------------- 2 files changed, 91 insertions(+), 57 deletions(-) diff --git a/.travis.yml b/.travis.yml index 45f1bd3d..12596f1e 100644 --- a/.travis.yml +++ b/.travis.yml @@ -67,9 +67,10 @@ script: - make clean && make -j2 ARCH=x86-64 build && ../tests/signature.sh $benchref - if [[ "$TRAVIS_OS_NAME" == "linux" ]]; then make clean && make -j2 ARCH=general-64 build && ../tests/signature.sh $benchref; fi - if [[ "$TRAVIS_OS_NAME" == "linux" ]]; then make clean && make -j2 ARCH=x86-32 optimize=no debug=yes build && ../tests/signature.sh $benchref; fi + - if [[ "$TRAVIS_OS_NAME" == "linux" ]]; then make clean && make -j2 ARCH=x86-32-sse41-popcnt build && ../tests/signature.sh $benchref; fi + - if [[ "$TRAVIS_OS_NAME" == "linux" ]]; then make clean && make -j2 ARCH=x86-32-sse2 build && ../tests/signature.sh $benchref; fi - if [[ "$TRAVIS_OS_NAME" == "linux" ]]; then make clean && make -j2 ARCH=x86-32 build && ../tests/signature.sh $benchref; fi - if [[ "$TRAVIS_OS_NAME" == "linux" ]]; then make clean && make -j2 ARCH=general-32 build && ../tests/signature.sh $benchref; fi - - if [[ "$TRAVIS_OS_NAME" == "linux" ]]; then make clean && make -j2 ARCH=x86-32-old build && ../tests/signature.sh $benchref; fi - if [[ "$TRAVIS_OS_NAME" == "linux" && "$COMP" == "gcc" ]]; then make clean && make -j2 ARCH=x86-64-modern profile-build && ../tests/signature.sh $benchref; fi # compile only for some more advanced architectures (might not run in travis) diff --git a/src/Makefile b/src/Makefile index a3feb68e..79c7333a 100644 --- a/src/Makefile +++ b/src/Makefile @@ -67,11 +67,13 @@ endif # bits = 64/32 --- -DIS_64BIT --- 64-/32-bit operating system # prefetch = yes/no --- -DUSE_PREFETCH --- Use prefetch asm-instruction # popcnt = yes/no --- -DUSE_POPCNT --- Use popcnt asm-instruction +# pext = yes/no --- -DUSE_PEXT --- Use pext x86_64 asm-instruction # sse = yes/no --- -msse --- Use Intel Streaming SIMD Extensions +# mmx = yes/no --- -mmmx --- Use Intel MMX instructions +# sse2 = yes/no --- -msse2 --- Use Intel Streaming SIMD Extensions 2 # ssse3 = yes/no --- -mssse3 --- Use Intel Supplemental Streaming SIMD Extensions 3 # sse41 = yes/no --- -msse4.1 --- Use Intel Streaming SIMD Extensions 4.1 # avx2 = yes/no --- -mavx2 --- Use Intel Advanced Vector Extensions 2 -# pext = yes/no --- -DUSE_PEXT --- Use pext x86_64 asm-instruction # avx512 = yes/no --- -mavx512bw --- Use Intel Advanced Vector Extensions 512 # vnni = yes/no --- -mavx512vnni --- Use Intel Vector Neural Network Instructions 512 # neon = yes/no --- -DUSE_NEON --- Use ARM SIMD architecture @@ -92,12 +94,13 @@ sanitize = no bits = 64 prefetch = no popcnt = no -mmx = no +pext = no sse = no +mmx = no +sse2 = no ssse3 = no sse41 = no avx2 = no -pext = no avx512 = no vnni = no neon = no @@ -106,83 +109,82 @@ STRIP = strip ### 2.2 Architecture specific -ifeq ($(ARCH),general-32) - arch = any - bits = 32 -endif +ifeq ($(findstring x86,$(ARCH)),x86) -ifeq ($(ARCH),x86-32-old) +# x86-32/64 + +ifeq ($(findstring x86-32,$(ARCH)),x86-32) arch = i386 bits = 32 -endif - -ifeq ($(ARCH),x86-32) - arch = i386 - bits = 32 - prefetch = yes + sse = yes mmx = yes - sse = yes -endif - -ifeq ($(ARCH),general-64) - arch = any -endif - -ifeq ($(ARCH),x86-64) +else arch = x86_64 - prefetch = yes + sse = yes + sse2 = yes +endif + +ifeq ($(findstring -sse,$(ARCH)),-sse) sse = yes endif -ifeq ($(ARCH),x86-64-sse3-popcnt) - arch = x86_64 - prefetch = yes - sse = yes +ifeq ($(findstring -popcnt,$(ARCH)),-popcnt) popcnt = yes endif -ifeq ($(ARCH),x86-64-ssse3) - arch = x86_64 - prefetch = yes +ifeq ($(findstring -mmx,$(ARCH)),-mmx) + mmx = yes +endif + +ifeq ($(findstring -sse2,$(ARCH)),-sse2) sse = yes + sse2 = yes +endif + +ifeq ($(findstring -ssse3,$(ARCH)),-ssse3) + sse = yes + sse2 = yes ssse3 = yes endif -ifeq ($(ARCH),$(filter $(ARCH),x86-64-sse41-popcnt x86-64-modern)) - arch = x86_64 - prefetch = yes - popcnt = yes +ifeq ($(findstring -sse41,$(ARCH)),-sse41) sse = yes + sse2 = yes ssse3 = yes sse41 = yes endif -ifeq ($(ARCH),x86-64-avx2) - arch = x86_64 - prefetch = yes +ifeq ($(findstring -modern,$(ARCH)),-modern) popcnt = yes sse = yes + sse2 = yes + ssse3 = yes + sse41 = yes +endif + +ifeq ($(findstring -avx2,$(ARCH)),-avx2) + popcnt = yes + sse = yes + sse2 = yes ssse3 = yes sse41 = yes avx2 = yes endif -ifeq ($(ARCH),x86-64-bmi2) - arch = x86_64 - prefetch = yes +ifeq ($(findstring -bmi2,$(ARCH)),-bmi2) popcnt = yes sse = yes + sse2 = yes ssse3 = yes sse41 = yes avx2 = yes pext = yes endif -ifeq ($(ARCH),x86-64-avx512) - arch = x86_64 - prefetch = yes +ifeq ($(findstring -avx512,$(ARCH)),-avx512) popcnt = yes sse = yes + sse2 = yes ssse3 = yes sse41 = yes avx2 = yes @@ -190,11 +192,10 @@ ifeq ($(ARCH),x86-64-avx512) avx512 = yes endif -ifeq ($(ARCH),x86-64-vnni) - arch = x86_64 - prefetch = yes +ifeq ($(findstring -vnni,$(ARCH)),-vnni) popcnt = yes sse = yes + sse2 = yes ssse3 = yes sse41 = yes avx2 = yes @@ -203,6 +204,28 @@ ifeq ($(ARCH),x86-64-vnni) vnni = yes endif +ifeq ($(sse),yes) + prefetch = yes +endif + +# 64-bit pext is not available on x86-32 +ifeq ($(bits),32) + pext = no +endif + +else + +# all other architectures + +ifeq ($(ARCH),general-32) + arch = any + bits = 32 +endif + +ifeq ($(ARCH),general-64) + arch = any +endif + ifeq ($(ARCH),armv7) arch = armv7 prefetch = yes @@ -242,6 +265,8 @@ ifeq ($(ARCH),ppc-64) prefetch = yes endif +endif + ### ========================================================================== ### Section 3. Low-level Configuration ### ========================================================================== @@ -487,6 +512,13 @@ ifeq ($(ssse3),yes) endif endif +ifeq ($(sse2),yes) + CXXFLAGS += -DUSE_SSE2 + ifeq ($(comp),$(filter $(comp),gcc clang mingw)) + CXXFLAGS += -msse2 + endif +endif + ifeq ($(mmx),yes) CXXFLAGS += -DUSE_MMX ifeq ($(comp),$(filter $(comp),gcc clang mingw)) @@ -503,10 +535,6 @@ ifeq ($(neon),yes) endif endif -ifeq ($(arch),x86_64) - CXXFLAGS += -msse2 -DUSE_SSE2 -endif - ### 3.7 pext ifeq ($(pext),yes) CXXFLAGS += -DUSE_PEXT @@ -592,9 +620,10 @@ help: @echo "x86-64-modern > common modern CPU, currently x86-64-sse41-popcnt" @echo "x86-64-ssse3 > x86 64-bit with ssse3 support" @echo "x86-64-sse3-popcnt > x86 64-bit with sse3 and popcnt support" - @echo "x86-64 > x86 64-bit generic" - @echo "x86-32 > x86 32-bit (also enables MMX and SSE)" - @echo "x86-32-old > x86 32-bit fall back for old hardware" + @echo "x86-64 > x86 64-bit generic (with sse2 support)" + @echo "x86-32-sse41-popcnt > x86 32-bit with sse41 and popcnt support" + @echo "x86-32-sse2 > x86 32-bit with sse2 support" + @echo "x86-32 > x86 32-bit generic (with mmx and sse support)" @echo "ppc-64 > PPC 64-bit" @echo "ppc-32 > PPC 32-bit" @echo "armv7 > ARMv7 32-bit" @@ -624,7 +653,7 @@ help: @echo "make -j build ARCH=x86-64-ssse3 COMP=clang" @echo "" ifneq ($(empty_arch), yes) - @echo "-------------------------------\n" + @echo "-------------------------------" @echo "The selected architecture $(ARCH) will enable the following configuration: " @$(MAKE) ARCH=$(ARCH) COMP=$(COMP) config-sanity endif @@ -719,11 +748,13 @@ config-sanity: @echo "os: '$(OS)'" @echo "prefetch: '$(prefetch)'" @echo "popcnt: '$(popcnt)'" + @echo "pext: '$(pext)'" @echo "sse: '$(sse)'" + @echo "mmx: '$(mmx)'" + @echo "sse2: '$(sse2)'" @echo "ssse3: '$(ssse3)'" @echo "sse41: '$(sse41)'" @echo "avx2: '$(avx2)'" - @echo "pext: '$(pext)'" @echo "avx512: '$(avx512)'" @echo "vnni: '$(vnni)'" @echo "neon: '$(neon)'" @@ -744,11 +775,13 @@ config-sanity: @test "$(bits)" = "32" || test "$(bits)" = "64" @test "$(prefetch)" = "yes" || test "$(prefetch)" = "no" @test "$(popcnt)" = "yes" || test "$(popcnt)" = "no" + @test "$(pext)" = "yes" || test "$(pext)" = "no" @test "$(sse)" = "yes" || test "$(sse)" = "no" + @test "$(mmx)" = "yes" || test "$(mmx)" = "no" + @test "$(sse2)" = "yes" || test "$(sse2)" = "no" @test "$(ssse3)" = "yes" || test "$(ssse3)" = "no" @test "$(sse41)" = "yes" || test "$(sse41)" = "no" @test "$(avx2)" = "yes" || test "$(avx2)" = "no" - @test "$(pext)" = "yes" || test "$(pext)" = "no" @test "$(avx512)" = "yes" || test "$(avx512)" = "no" @test "$(vnni)" = "yes" || test "$(vnni)" = "no" @test "$(neon)" = "yes" || test "$(neon)" = "no"