Cleanup trainer.

2026-05-20 14:27:45 +00:00 · 2020-10-14 21:26:03 +02:00
parent ea8eb415de
commit c286f9cd7d
6 changed files with 1263 additions and 1153 deletions
@@ -1,247 +1,267 @@
-// Specialization of NNUE evaluation function learning class template for InputSlice
-
-#ifndef _NNUE_TRAINER_INPUT_SLICE_H_
+#ifndef _NNUE_TRAINER_INPUT_SLICE_H_
 #define _NNUE_TRAINER_INPUT_SLICE_H_

-#include "../../learn/learn.h"
-#include "../layers/input_slice.h"
 #include "trainer.h"

-namespace Eval {
+#include "learn/learn.h"

-namespace NNUE {
+#include "nnue/layers/input_slice.h"

-// Learning: Input layer
-class SharedInputTrainer {
- public:
-  // factory function
-  static std::shared_ptr<SharedInputTrainer> Create(
-      FeatureTransformer* ft) {
-    static std::shared_ptr<SharedInputTrainer> instance;
-    if (!instance) {
-      instance.reset(new SharedInputTrainer(ft));
-    }
-    ++instance->num_referrers_;
-    return instance;
-  }
+// Specialization of NNUE evaluation function learning class template for InputSlice
+namespace Eval::NNUE {

-  // Set options such as hyperparameters
-  void SendMessage(Message* message) {
-    if (num_calls_ == 0) {
-      current_operation_ = Operation::kSendMessage;
-      feature_transformer_trainer_->SendMessage(message);
-    }
-    assert(current_operation_ == Operation::kSendMessage);
-    if (++num_calls_ == num_referrers_) {
-      num_calls_ = 0;
-      current_operation_ = Operation::kNone;
-    }
-  }
+    // Learning: Input layer
+    class SharedInputTrainer {
+    public:
+        // factory function
+        static std::shared_ptr<SharedInputTrainer> Create(
+            FeatureTransformer* ft) {

-  // Initialize the parameters with random numbers
-  template <typename RNG>
-  void Initialize(RNG& rng) {
-    if (num_calls_ == 0) {
-      current_operation_ = Operation::kInitialize;
-      feature_transformer_trainer_->Initialize(rng);
-    }
-    assert(current_operation_ == Operation::kInitialize);
-    if (++num_calls_ == num_referrers_) {
-      num_calls_ = 0;
-      current_operation_ = Operation::kNone;
-    }
-  }
+            static std::shared_ptr<SharedInputTrainer> instance;

-  // forward propagation
-  const LearnFloatType* Propagate(const std::vector<Example>& batch) {
-    if (gradients_.size() < kInputDimensions * batch.size()) {
-      gradients_.resize(kInputDimensions * batch.size());
-    }
-    batch_size_ = static_cast<IndexType>(batch.size());
-    if (num_calls_ == 0) {
-      current_operation_ = Operation::kPropagate;
-      output_ = feature_transformer_trainer_->Propagate(batch);
-    }
-    assert(current_operation_ == Operation::kPropagate);
-    if (++num_calls_ == num_referrers_) {
-      num_calls_ = 0;
-      current_operation_ = Operation::kNone;
-    }
-    return output_;
-  }
+            if (!instance) {
+                instance.reset(new SharedInputTrainer(ft));
+            }

-  // backpropagation
-  void Backpropagate(const LearnFloatType* gradients,
-                     LearnFloatType learning_rate) {
-    if (num_referrers_ == 1) {
-      feature_transformer_trainer_->Backpropagate(gradients, learning_rate);
-      return;
-    }
-    if (num_calls_ == 0) {
-      current_operation_ = Operation::kBackPropagate;
-      for (IndexType b = 0; b < batch_size_; ++b) {
-        const IndexType batch_offset = kInputDimensions * b;
-        for (IndexType i = 0; i < kInputDimensions; ++i) {
-          gradients_[batch_offset + i] = static_cast<LearnFloatType>(0.0);
+            ++instance->num_referrers_;
+
+            return instance;
        }
-      }
-    }
-    assert(current_operation_ == Operation::kBackPropagate);
-    for (IndexType b = 0; b < batch_size_; ++b) {
-      const IndexType batch_offset = kInputDimensions * b;
-      for (IndexType i = 0; i < kInputDimensions; ++i) {
-        gradients_[batch_offset + i] += gradients[batch_offset + i];
-      }
-    }
-    if (++num_calls_ == num_referrers_) {
-      feature_transformer_trainer_->Backpropagate(
-          gradients_.data(), learning_rate);
-      num_calls_ = 0;
-      current_operation_ = Operation::kNone;
-    }
-  }

- private:
-  // constructor
-  SharedInputTrainer(FeatureTransformer* ft) :
-      batch_size_(0),
-      num_referrers_(0),
-      num_calls_(0),
-      current_operation_(Operation::kNone),
-      feature_transformer_trainer_(Trainer<FeatureTransformer>::Create(
-          ft)),
-      output_(nullptr) {
-  }
+        // Set options such as hyperparameters
+        void SendMessage(Message* message) {
+            if (num_calls_ == 0) {
+                current_operation_ = Operation::kSendMessage;
+                feature_transformer_trainer_->SendMessage(message);
+            }

-  // number of input/output dimensions
-  static constexpr IndexType kInputDimensions =
-      FeatureTransformer::kOutputDimensions;
+            assert(current_operation_ == Operation::kSendMessage);

-  // type of processing
-  enum class Operation {
-    kNone,
-    kSendMessage,
-    kInitialize,
-    kPropagate,
-    kBackPropagate,
-  };
+            if (++num_calls_ == num_referrers_) {
+                num_calls_ = 0;
+                current_operation_ = Operation::kNone;
+            }
+        }

-  // number of samples in mini-batch
-  IndexType batch_size_;
+        // Initialize the parameters with random numbers
+        template <typename RNG>
+        void Initialize(RNG& rng) {
+            if (num_calls_ == 0) {
+                current_operation_ = Operation::kInitialize;
+                feature_transformer_trainer_->Initialize(rng);
+            }

-  // number of layers sharing this layer as input
-  std::uint32_t num_referrers_;
+            assert(current_operation_ == Operation::kInitialize);

-  // Number of times the current process has been called
-  std::uint32_t num_calls_;
+            if (++num_calls_ == num_referrers_) {
+                num_calls_ = 0;
+                current_operation_ = Operation::kNone;
+            }
+        }

-  // current processing type
-  Operation current_operation_;
+        // forward propagation
+        const LearnFloatType* Propagate(const std::vector<Example>& batch) {
+            if (gradients_.size() < kInputDimensions * batch.size()) {
+                gradients_.resize(kInputDimensions * batch.size());
+            }

-  // Trainer of input feature converter
-  const std::shared_ptr<Trainer<FeatureTransformer>>
-      feature_transformer_trainer_;
+            batch_size_ = static_cast<IndexType>(batch.size());

-  // pointer to output shared for forward propagation
-  const LearnFloatType* output_;
+            if (num_calls_ == 0) {
+                current_operation_ = Operation::kPropagate;
+                output_ = feature_transformer_trainer_->Propagate(batch);
+            }

-  // buffer for back propagation
-  std::vector<LearnFloatType> gradients_;
-};
+            assert(current_operation_ == Operation::kPropagate);

-// Learning: Input layer
-template <IndexType OutputDimensions, IndexType Offset>
-class Trainer<Layers::InputSlice<OutputDimensions, Offset>> {
- private:
-  // Type of layer to learn
-  using LayerType = Layers::InputSlice<OutputDimensions, Offset>;
+            if (++num_calls_ == num_referrers_) {
+                num_calls_ = 0;
+                current_operation_ = Operation::kNone;
+            }

- public:
-  // factory function
-  static std::shared_ptr<Trainer> Create(
-      LayerType* /*target_layer*/, FeatureTransformer* ft) {
-    return std::shared_ptr<Trainer>(new Trainer(ft));
-  }
+            return output_;
+        }

-  // Set options such as hyperparameters
-  void SendMessage(Message* message) {
-    shared_input_trainer_->SendMessage(message);
-  }
+        // backpropagation
+        void Backpropagate(const LearnFloatType* gradients,
+                           LearnFloatType learning_rate) {

-  // Initialize the parameters with random numbers
-  template <typename RNG>
-  void Initialize(RNG& rng) {
-    shared_input_trainer_->Initialize(rng);
-  }
+            if (num_referrers_ == 1) {
+                feature_transformer_trainer_->Backpropagate(gradients, learning_rate);
+                return;
+            }

-  // forward propagation
-  const LearnFloatType* Propagate(const std::vector<Example>& batch) {
-    if (output_.size() < kOutputDimensions * batch.size()) {
-      output_.resize(kOutputDimensions * batch.size());
-      gradients_.resize(kInputDimensions * batch.size());
-    }
-    batch_size_ = static_cast<IndexType>(batch.size());
-    const auto input = shared_input_trainer_->Propagate(batch);
-    for (IndexType b = 0; b < batch_size_; ++b) {
-      const IndexType input_offset = kInputDimensions * b;
-      const IndexType output_offset = kOutputDimensions * b;
+            if (num_calls_ == 0) {
+                current_operation_ = Operation::kBackPropagate;
+                for (IndexType b = 0; b < batch_size_; ++b) {
+                    const IndexType batch_offset = kInputDimensions * b;
+                    for (IndexType i = 0; i < kInputDimensions; ++i) {
+                        gradients_[batch_offset + i] = static_cast<LearnFloatType>(0.0);
+                    }
+                }
+            }
+
+            assert(current_operation_ == Operation::kBackPropagate);
+
+            for (IndexType b = 0; b < batch_size_; ++b) {
+                const IndexType batch_offset = kInputDimensions * b;
+                for (IndexType i = 0; i < kInputDimensions; ++i) {
+                    gradients_[batch_offset + i] += gradients[batch_offset + i];
+                }
+            }
+
+            if (++num_calls_ == num_referrers_) {
+                feature_transformer_trainer_->Backpropagate(
+                    gradients_.data(), learning_rate);
+                num_calls_ = 0;
+                current_operation_ = Operation::kNone;
+            }
+        }
+
+    private:
+        // constructor
+        SharedInputTrainer(FeatureTransformer* ft) :
+            batch_size_(0),
+            num_referrers_(0),
+            num_calls_(0),
+            current_operation_(Operation::kNone),
+            feature_transformer_trainer_(Trainer<FeatureTransformer>::Create(
+                ft)),
+            output_(nullptr) {
+        }
+
+        // number of input/output dimensions
+        static constexpr IndexType kInputDimensions =
+            FeatureTransformer::kOutputDimensions;
+
+        // type of processing
+        enum class Operation {
+            kNone,
+            kSendMessage,
+            kInitialize,
+            kPropagate,
+            kBackPropagate,
+        };
+
+        // number of samples in mini-batch
+        IndexType batch_size_;
+
+        // number of layers sharing this layer as input
+        std::uint32_t num_referrers_;
+
+        // Number of times the current process has been called
+        std::uint32_t num_calls_;
+
+        // current processing type
+        Operation current_operation_;
+
+        // Trainer of input feature converter
+        const std::shared_ptr<Trainer<FeatureTransformer>>
+            feature_transformer_trainer_;
+
+        // pointer to output shared for forward propagation
+        const LearnFloatType* output_;
+
+        // buffer for back propagation
+        std::vector<LearnFloatType> gradients_;
+    };
+
+    // Learning: Input layer
+    template <IndexType OutputDimensions, IndexType Offset>
+    class Trainer<Layers::InputSlice<OutputDimensions, Offset>> {
+    private:
+        // Type of layer to learn
+        using LayerType = Layers::InputSlice<OutputDimensions, Offset>;
+
+    public:
+        // factory function
+        static std::shared_ptr<Trainer> Create(
+            LayerType* /*target_layer*/, FeatureTransformer* ft) {
+
+            return std::shared_ptr<Trainer>(new Trainer(ft));
+        }
+
+        // Set options such as hyperparameters
+        void SendMessage(Message* message) {
+            shared_input_trainer_->SendMessage(message);
+        }
+
+        // Initialize the parameters with random numbers
+        template <typename RNG>
+        void Initialize(RNG& rng) {
+            shared_input_trainer_->Initialize(rng);
+        }
+
+        // forward propagation
+        const LearnFloatType* Propagate(const std::vector<Example>& batch) {
+            if (output_.size() < kOutputDimensions * batch.size()) {
+              output_.resize(kOutputDimensions * batch.size());
+              gradients_.resize(kInputDimensions * batch.size());
+            }
+
+            batch_size_ = static_cast<IndexType>(batch.size());
+
+            const auto input = shared_input_trainer_->Propagate(batch);
+            for (IndexType b = 0; b < batch_size_; ++b) {
+                const IndexType input_offset = kInputDimensions * b;
+                const IndexType output_offset = kOutputDimensions * b;
 #if defined(USE_BLAS)
-      cblas_scopy(kOutputDimensions, &input[input_offset + Offset], 1,
-                  &output_[output_offset], 1);
+                cblas_scopy(kOutputDimensions, &input[input_offset + Offset], 1,
+                            &output_[output_offset], 1);
 #else
-      for (IndexType i = 0; i < kOutputDimensions; ++i) {
-        output_[output_offset + i] = input[input_offset + Offset + i];
-      }
+                for (IndexType i = 0; i < kOutputDimensions; ++i) {
+                    output_[output_offset + i] = input[input_offset + Offset + i];
+                }
 #endif
-    }
-    return output_.data();
-  }
+            }

-  // backpropagation
-  void Backpropagate(const LearnFloatType* gradients,
-                     LearnFloatType learning_rate) {
-    for (IndexType b = 0; b < batch_size_; ++b) {
-      const IndexType input_offset = kInputDimensions * b;
-      const IndexType output_offset = kOutputDimensions * b;
-      for (IndexType i = 0; i < kInputDimensions; ++i) {
-        if ((int)i < (int)Offset || i >= Offset + kOutputDimensions) {
-          gradients_[input_offset + i] = static_cast<LearnFloatType>(0.0);
-        } else {
-          gradients_[input_offset + i] = gradients[output_offset + i - Offset];
+            return output_.data();
        }
-      }
-    }
-    shared_input_trainer_->Backpropagate(gradients_.data(), learning_rate);
-  }

- private:
-  // constructor
-  Trainer(FeatureTransformer* ft):
-      batch_size_(0),
-      shared_input_trainer_(SharedInputTrainer::Create(ft)) {
-  }
+        // backpropagation
+        void Backpropagate(const LearnFloatType* gradients,
+                           LearnFloatType learning_rate) {

-  // number of input/output dimensions
-  static constexpr IndexType kInputDimensions =
-      FeatureTransformer::kOutputDimensions;
-  static constexpr IndexType kOutputDimensions = OutputDimensions;
-  static_assert(Offset + kOutputDimensions <= kInputDimensions, "");
+            for (IndexType b = 0; b < batch_size_; ++b) {
+                const IndexType input_offset = kInputDimensions * b;
+                const IndexType output_offset = kOutputDimensions * b;
+                for (IndexType i = 0; i < kInputDimensions; ++i) {
+                    if ((int)i < (int)Offset || i >= Offset + kOutputDimensions) {
+                        gradients_[input_offset + i] = static_cast<LearnFloatType>(0.0);
+                    } else {
+                        gradients_[input_offset + i] = gradients[output_offset + i - Offset];
+                    }
+                }
+            }
+            shared_input_trainer_->Backpropagate(gradients_.data(), learning_rate);
+        }

-  // number of samples in mini-batch
-  IndexType batch_size_;
+    private:
+        // constructor
+        Trainer(FeatureTransformer* ft):
+            batch_size_(0),
+            shared_input_trainer_(SharedInputTrainer::Create(ft)) {
+        }

-  // Trainer of shared input layer
-  const std::shared_ptr<SharedInputTrainer> shared_input_trainer_;
+        // number of input/output dimensions
+        static constexpr IndexType kInputDimensions =
+            FeatureTransformer::kOutputDimensions;
+        static constexpr IndexType kOutputDimensions = OutputDimensions;
+        static_assert(Offset + kOutputDimensions <= kInputDimensions, "");

-  // Forward propagation buffer
-  std::vector<LearnFloatType> output_;
+        // number of samples in mini-batch
+        IndexType batch_size_;

-  // buffer for back propagation
-  std::vector<LearnFloatType> gradients_;
-};
+        // Trainer of shared input layer
+        const std::shared_ptr<SharedInputTrainer> shared_input_trainer_;

-}  // namespace NNUE
+        // Forward propagation buffer
+        std::vector<LearnFloatType> output_;

-}  // namespace Eval
+        // buffer for back propagation
+        std::vector<LearnFloatType> gradients_;
+    };
+
+}  // namespace Eval::NNUE

 #endif