Merge branch 'tools' into tools_merge

2026-05-20 13:17:44 +00:00 · 2021-08-09 11:39:42 +02:00
parent dabaf2220f cee4ed39bd
commit 51b4e7bd6e
59 changed files with 15556 additions and 93 deletions
@@ -19,12 +19,20 @@
 #ifndef MISC_H_INCLUDED
 #define MISC_H_INCLUDED

+#include <algorithm>
 #include <cassert>
 #include <chrono>
+#include <functional>
+#include <mutex>
 #include <ostream>
 #include <string>
 #include <vector>
+
 #include <cstdint>
+#include <cmath>
+#include <cctype>
+#include <sstream>
+#include <deque>

 #include "types.h"

@@ -128,6 +136,221 @@ private:
  std::size_t size_ = 0;
 };

+// This logger allows printing many parts in a region atomically
+// but doesn't block the threads trying to append to other regions.
+// Instead if some region tries to pring while other region holds
+// the lock the messages are queued to be printed as soon as the
+// current region releases the lock.
+struct SynchronizedRegionLogger
+{
+  using RegionId = std::uint64_t;
+
+  struct Region
+  {
+    friend struct SynchronizedRegionLogger;
+
+    Region() :
+      logger(nullptr), region_id(0), is_held(false)
+    {
+    }
+
+    Region(const Region&) = delete;
+    Region& operator=(const Region&) = delete;
+
+    Region(Region&& other) :
+      logger(other.logger), region_id(other.region_id), is_held(other.is_held)
+    {
+      other.logger = nullptr;
+      other.is_held = false;
+    }
+
+    Region& operator=(Region&& other) {
+      if (is_held && logger != nullptr)
+      {
+        logger->release_region(region_id);
+      }
+
+      logger = other.logger;
+      region_id = other.region_id;
+      is_held = other.is_held;
+
+      other.is_held = false;
+
+      return *this;
+    }
+
+    ~Region() { unlock(); }
+
+    void unlock() {
+      if (is_held) {
+        is_held = false;
+
+        if (logger != nullptr)
+          logger->release_region(region_id);
+      }
+    }
+
+    Region& operator << (std::ostream&(*pManip)(std::ostream&)) {
+      if (logger != nullptr)
+        logger->write(region_id, pManip);
+
+      return *this;
+    }
+
+    template <typename T>
+    Region& operator << (const T& value) {
+      if (logger != nullptr)
+        logger->write(region_id, value);
+
+      return *this;
+    }
+
+  private:
+    SynchronizedRegionLogger* logger;
+    RegionId region_id;
+    bool is_held;
+
+    Region(SynchronizedRegionLogger& log, RegionId id) :
+      logger(&log), region_id(id), is_held(true)
+    {
+    }
+  };
+
+private:
+  struct RegionBookkeeping
+  {
+    RegionBookkeeping(RegionId rid) : id(rid), is_held(true) {}
+
+    std::vector<std::string> pending_parts;
+    RegionId id;
+    bool is_held;
+  };
+
+  RegionId init_next_region()
+  {
+    static RegionId next_id = 0;
+
+    std::lock_guard lock(mutex);
+
+    const auto id = next_id++;
+    regions.emplace_back(id);
+
+    return id;
+  }
+
+  void write(RegionId id, std::ostream&(*pManip)(std::ostream&)) {
+    std::lock_guard lock(mutex);
+
+    if (regions.empty())
+      return;
+
+    if (id == regions.front().id) {
+      // We can just directly print to the output because
+      // we are at the front of the region queue.
+      out << *pManip;
+    } else {
+      // We have to schedule the print until previous regions are
+      // processed
+      auto* region = find_region_nolock(id);
+      if (region == nullptr)
+        return;
+
+      std::stringstream ss;
+      ss << *pManip;
+      region->pending_parts.emplace_back(std::move(ss).str());
+    }
+  }
+
+  template <typename T>
+  void write(RegionId id, const T& value) {
+    std::lock_guard lock(mutex);
+
+    if (regions.empty())
+      return;
+
+    if (id == regions.front().id) {
+      // We can just directly print to the output because
+      // we are at the front of the region queue.
+      out << value;
+    } else {
+      // We have to schedule the print until previous regions are
+      // processed
+      auto* region = find_region_nolock(id);
+      if (region == nullptr)
+        return;
+
+      std::stringstream ss;
+      ss << value;
+      region->pending_parts.emplace_back(std::move(ss).str());
+    }
+  }
+
+  std::ostream& out;
+
+  std::deque<RegionBookkeeping> regions;
+
+  std::mutex mutex;
+
+  RegionBookkeeping* find_region_nolock(RegionId id) {
+    // Linear search because the amount of concurrent regions should be small.
+    auto it = std::find_if(
+      regions.begin(),
+      regions.end(),
+      [id](const RegionBookkeeping& r) { return r.id == id; });
+
+    if (it == regions.end())
+      return nullptr;
+    else
+      return &*it;
+  }
+
+  void release_region(RegionId id) {
+    std::lock_guard lock(mutex);
+
+    auto* region = find_region_nolock(id);
+    if (region == nullptr)
+      return;
+
+    region->is_held = false;
+
+    process_backlog_nolock();
+  }
+
+  void process_backlog_nolock()
+  {
+    while(!regions.empty()) {
+      auto& region = regions.front();
+
+      for(auto& part : region.pending_parts) {
+        out << part;
+      }
+
+      // If the region is still held then we don't
+      // want to start printing stuff from the next region.
+      if (region.is_held)
+        break;
+
+      regions.pop_front();
+    }
+  }
+
+public:
+
+  SynchronizedRegionLogger(std::ostream& s) :
+    out(s)
+  {
+  }
+
+  [[nodiscard]] Region new_region() {
+    const auto id = init_next_region();
+    return Region(*this, id);
+  }
+
+};
+
+extern SynchronizedRegionLogger sync_region_cout;
+
+
 /// xorshift64star Pseudo-Random Number Generator
 /// This class is based on original code written and dedicated
 /// to the public domain by Sebastiano Vigna (2014).
@@ -143,6 +366,19 @@ private:
 /// For further analysis see
 ///   <http://vigna.di.unimi.it/ftp/papers/xorshift.pdf>

+static uint64_t string_hash(const std::string& str)
+{
+  uint64_t h = 525201411107845655ull;
+
+  for (auto c : str) {
+    h ^= static_cast<uint64_t>(c);
+    h *= 0x5bd1e9955bd1e995ull;
+    h ^= h >> 47;
+  }
+
+  return h;
+}
+
 class PRNG {

  uint64_t s;
@@ -154,7 +390,9 @@ class PRNG {
  }

 public:
+  PRNG() { set_seed_from_time(); }
  PRNG(uint64_t seed) : s(seed) { assert(seed); }
+  PRNG(const std::string& seed) { set_seed(seed); }

  template<typename T> T rand() { return T(rand64()); }

@@ -162,8 +400,54 @@ public:
  /// Output values only have 1/8th of their bits set on average.
  template<typename T> T sparse_rand()
  { return T(rand64() & rand64() & rand64()); }
+  // Returns a random number from 0 to n-1. (Not uniform distribution, but this is enough in reality)
+  uint64_t rand(uint64_t n) { return rand<uint64_t>() % n; }
+
+  // Return the random seed used internally.
+  uint64_t get_seed() const { return s; }
+
+  void set_seed(uint64_t seed) { s = seed; }
+
+  uint64_t next_random_seed()
+  {
+    uint64_t seed = 0;
+    for(int i = 0; i < 64; ++i)
+    {
+      const auto off = rand64() % 64;
+      seed |= (rand64() & (uint64_t(1) << off)) >> off;
+      seed <<= 1;
+    }
+    return seed;
+  }
+
+  void set_seed_from_time()
+  {
+      set_seed(std::chrono::system_clock::now().time_since_epoch().count());
+  }
+
+  void set_seed(const std::string& str)
+  {
+    if (str.empty())
+    {
+      set_seed_from_time();
+    }
+    else if (std::all_of(str.begin(), str.end(), [](char c) { return std::isdigit(c);} )) {
+      set_seed(std::stoull(str));
+    }
+    else
+    {
+      set_seed(string_hash(str));
+    }
+  }
 };

+// Display a random seed. (For debugging)
+inline std::ostream& operator<<(std::ostream& os, PRNG& prng)
+{
+  os << "PRNG::seed = " << std::hex << prng.get_seed() << std::dec;
+  return os;
+}
+
 inline uint64_t mul_hi64(uint64_t a, uint64_t b) {
 #if defined(__GNUC__) && defined(IS_64BIT)
    __extension__ typedef unsigned __int128 uint128;
@@ -178,6 +462,74 @@ inline uint64_t mul_hi64(uint64_t a, uint64_t b) {
 #endif
 }

+// This bitset can be accessed concurrently, provided
+// the concurrent accesses are performed on distinct
+// instances of underlying type. That means the cuncurrent
+// accesses need to be spaced by at least
+// bits_per_bucket bits.
+// But at least best_concurrent_access_stride bits
+// is recommended to prevent false sharing.
+template <uint64_t N>
+struct LargeBitset
+{
+private:
+    constexpr static uint64_t cache_line_size = 64;
+
+public:
+    using UnderlyingType = uint64_t;
+
+    constexpr static uint64_t num_bits = N;
+    constexpr static uint64_t bits_per_bucket = 8 * sizeof(uint64_t);
+    constexpr static uint64_t num_buckets = (num_bits + bits_per_bucket - 1) / bits_per_bucket;
+    constexpr static uint64_t best_concurrent_access_stride = 8 * cache_line_size;
+
+    LargeBitset()
+    {
+        std::fill(std::begin(bits), std::end(bits), 0);
+    }
+
+    void set(uint64_t idx)
+    {
+        const uint64_t bucket = idx / bits_per_bucket;
+        const uint64_t bit = uint64_t(1) << (idx % bits_per_bucket);
+        bits[bucket] |= bit;
+    }
+
+    bool test(uint64_t idx) const
+    {
+        const uint64_t bucket = idx / bits_per_bucket;
+        const uint64_t bit = uint64_t(1) << (idx % bits_per_bucket);
+        return bits[bucket] & bit;
+    }
+
+    uint64_t count() const
+    {
+        uint64_t c = 0;
+        uint64_t i = 0;
+
+        for (; i < num_buckets - 3; i += 4)
+        {
+            uint64_t c0 = popcount(bits[i+0]);
+            uint64_t c1 = popcount(bits[i+1]);
+            uint64_t c2 = popcount(bits[i+2]);
+            uint64_t c3 = popcount(bits[i+3]);
+            c0 += c1;
+            c2 += c3;
+            c += c0 + c2;
+        }
+
+        for (; i < num_buckets; ++i)
+        {
+            c += popcount(bits[i]);
+        }
+
+        return c;
+    }
+
+private:
+    alignas(cache_line_size) UnderlyingType bits[num_buckets];
+};
+
 /// Under Windows it is not possible for a process to run on more than one
 /// logical processor group. This usually means to be limited to use max 64
 /// cores. To overcome this, some special platform specific API should be
@@ -188,6 +540,111 @@ namespace WinProcGroup {
  void bindThisThread(size_t idx);
 }

+// Returns a string that represents the current time. (Used for log output when learning evaluation function)
+std::string now_string();
+void sleep(int ms);
+
+namespace Algo {
+    // Fisher-Yates
+    template <typename Rng, typename T>
+    void shuffle(std::vector<T>& buf, Rng&& prng)
+    {
+        const auto size = buf.size();
+        for (uint64_t i = 0; i < size; ++i)
+            std::swap(buf[i], buf[prng.rand(size - i) + i]);
+    }
+
+    // split the string
+    inline std::vector<std::string> split(const std::string& input, char delimiter) {
+        std::istringstream stream(input);
+        std::string field;
+        std::vector<std::string> fields;
+
+        while (std::getline(stream, field, delimiter)) {
+            fields.push_back(field);
+        }
+
+        return fields;
+    }
+}
+
+// --------------------
+//       Path
+// --------------------
+
+// Something like Path class in C#. File name manipulation.
+// Match with the C# method name.
+struct Path
+{
+	// Combine the path name and file name and return it.
+	// If the folder name is not an empty string, append it if there is no'/' or'\\' at the end.
+	static std::string combine(const std::string& folder, const std::string& filename)
+	{
+		if (folder.length() >= 1 && *folder.rbegin() != '/' && *folder.rbegin() != '\\')
+			return folder + "/" + filename;
+
+		return folder + filename;
+	}
+
+	// Get the file name part (excluding the folder name) from the full path expression.
+	static std::string get_file_name(const std::string& path)
+	{
+		// I don't know which "\" or "/" is used.
+		auto path_index1 = path.find_last_of("\\") + 1;
+		auto path_index2 = path.find_last_of("/") + 1;
+		auto path_index = std::max(path_index1, path_index2);
+
+		return path.substr(path_index);
+	}
+};
+
+// It is ignored when new even though alignas is specified & because it is ignored when the STL container allocates memory,
+// A custom allocator used for that.
+template <typename T>
+class AlignedAllocator {
+public:
+  using value_type = T;
+
+  AlignedAllocator() {}
+  AlignedAllocator(const AlignedAllocator&) {}
+  AlignedAllocator(AlignedAllocator&&) {}
+
+  template <typename U> AlignedAllocator(const AlignedAllocator<U>&) {}
+
+  T* allocate(std::size_t n) { return (T*)std_aligned_alloc(alignof(T), n * sizeof(T)); }
+  void deallocate(T* p, std::size_t ) { std_aligned_free(p); }
+};
+
+template <typename T>
+class CacheLineAlignedAllocator {
+public:
+    using value_type = T;
+
+    constexpr static uint64_t cache_line_size = 64;
+
+    CacheLineAlignedAllocator() {}
+    CacheLineAlignedAllocator(const CacheLineAlignedAllocator&) {}
+    CacheLineAlignedAllocator(CacheLineAlignedAllocator&&) {}
+
+    template <typename U> CacheLineAlignedAllocator(const CacheLineAlignedAllocator<U>&) {}
+
+    T* allocate(std::size_t n) { return (T*)std_aligned_alloc(cache_line_size, n * sizeof(T)); }
+    void deallocate(T* p, std::size_t) { std_aligned_free(p); }
+};
+
+// --------------------
+//  Dependency Wrapper
+// --------------------
+
+namespace Dependency
+{
+  // In the Linux environment, if you getline() the text file is'\r\n'
+  // Since'\r' remains at the end, write a wrapper to remove this'\r'.
+  // So when calling getline() on fstream,
+  // just write getline() instead of std::getline() and use this function.
+  extern bool getline(std::ifstream& fs, std::string& s);
+}
+
 namespace CommandLine {
  void init(int argc, char* argv[]);