quill
RdtscClock.h
1 
7 #pragma once
8 
9 #include "quill/core/Attributes.h"
10 #include "quill/core/ChronoTimeUtils.h"
11 #include "quill/core/Common.h"
12 #include "quill/core/Rdtsc.h"
13 
14 #include <algorithm>
15 #include <array>
16 #include <atomic>
17 #include <chrono>
18 #include <cstdint>
19 #include <cstdio>
20 #include <limits>
21 #include <vector>
22 
23 QUILL_BEGIN_NAMESPACE
24 
25 namespace detail
26 {
27 
28 #if defined(_WIN32) && defined(_MSC_VER) && !defined(__GNUC__)
29  #pragma warning(push)
30  #pragma warning(disable : 4324)
31 #endif
32 
37 {
38 public:
42  class RdtscTicks
43  {
44  public:
45  QUILL_NODISCARD QUILL_EXPORT static RdtscTicks& instance()
46  {
47  static RdtscTicks inst;
48  return inst;
49  }
50 
51  /***/
52  QUILL_NODISCARD double ns_per_tick() const noexcept { return _ns_per_tick; }
53 
54  private:
58  RdtscTicks()
59  {
60 #if defined(__aarch64__)
61  // On AArch64 the generic timer frequency is exposed directly
62  uint64_t freq;
63  __asm__ volatile("mrs %0, cntfrq_el0" : "=r"(freq));
64  // freq is typically exactly 24000000, 50000000, or 1000000000 depending on the SoC
65  _ns_per_tick = 1e9 / static_cast<double>(freq);
66 #else
67  // Convert rdtsc to wall time.
68  // 1. Get real time and rdtsc current count
69  // 2. Calculate how many rdtsc ticks can occur in one
70  // calculate _ticks_per_ns as the median over a number of observations
71  // we use always odd number of trials for easy median calc
72  constexpr uint64_t spin_duration_ns = 10ull * 1'000'000ull; // 10 ms
73  constexpr size_t max_trials = 15;
74  constexpr size_t min_trials = 3;
75  constexpr double convergence_threshold = 0.01; // 1% threshold
76 
77  std::vector<double> rates;
78  rates.reserve(max_trials);
79 
80  double previous_median = 0.0;
81 
82  for (size_t i = 0; i < max_trials; ++i)
83  {
84  uint64_t const beg_ts = detail::get_steady_time_ns();
85  uint64_t const beg_tsc = rdtsc();
86  uint64_t end_tsc;
87  uint64_t elapsed_ns;
88 
89  do
90  {
91  uint64_t const end_ts = detail::get_steady_time_ns();
92  end_tsc = rdtsc();
93  elapsed_ns = end_ts - beg_ts;
94  } while (elapsed_ns < spin_duration_ns);
95 
96  rates.push_back(static_cast<double>(end_tsc - beg_tsc) / static_cast<double>(elapsed_ns));
97 
98  // Check for convergence after minimum trials and only on an odd count of trials.
99  if (((i + 1) >= min_trials) && (((i + 1) % 2) != 0))
100  {
101  std::nth_element(rates.begin(), rates.begin() + static_cast<ptrdiff_t>((i + 1) / 2), rates.end());
102  double current_median = rates[(i + 1) / 2];
103 
104  // If we've converged, break early
105  if (std::abs(current_median - previous_median) / current_median < convergence_threshold)
106  {
107  break;
108  }
109 
110  previous_median = current_median;
111  }
112  }
113 
114  // Calculate final median.
115  std::nth_element(rates.begin(), rates.begin() + static_cast<ptrdiff_t>(rates.size() / 2),
116  rates.end());
117 
118  double const ticks_per_ns = rates[rates.size() / 2];
119  _ns_per_tick = 1 / ticks_per_ns;
120 #endif
121  }
122 
123  double _ns_per_tick{0};
124  };
125 
126  /***/
127  explicit RdtscClock(std::chrono::nanoseconds resync_interval)
128  : _ns_per_tick(RdtscTicks::instance().ns_per_tick())
129  {
130  double const calc_value = static_cast<double>(resync_interval.count()) / _ns_per_tick;
131 
132  // Check for overflow and negative values
133  if (calc_value >= static_cast<double>(std::numeric_limits<int64_t>::max()) || calc_value < 0)
134  {
135  _resync_interval_ticks = std::numeric_limits<int64_t>::max();
136  }
137  else
138  {
139  _resync_interval_ticks = static_cast<int64_t>(calc_value);
140  }
141 
142  _resync_interval_original = _resync_interval_ticks;
143 
144  if (!resync(resync_lag_cycles))
145  {
146  // try to resync again with higher lag
147  if (!resync(resync_lag_cycles * 2u))
148  {
149  std::fprintf(stderr, "Failed to sync RdtscClock. Timestamps will be incorrect\n");
150  }
151  }
152  }
153 
154  /***/
155  uint64_t time_since_epoch(uint64_t rdtsc_value) const noexcept
156  {
157  // should only get called by the backend thread
158 
159  // get the current index, this is only safe to call from the thread that is doing the resync
160  auto const index = _version.load(std::memory_order_relaxed) & (_base.size() - 1);
161 
162  // Unsigned subtraction + int64_t cast: a stale rdtsc_value yields a small negative
163  // diff, producing a wall time slightly in the past. This is intentional.
164  auto diff = static_cast<int64_t>(rdtsc_value - _base[index].base_tsc);
165 
166  // we need to sync after we calculated otherwise base_tsc value will be ahead of passed tsc value
167  if (diff > _resync_interval_ticks)
168  {
169  resync(resync_lag_cycles);
170  auto const resynced_index = _version.load(std::memory_order_relaxed) & (_base.size() - 1);
171  diff = static_cast<int64_t>(rdtsc_value - _base[resynced_index].base_tsc);
172  return static_cast<uint64_t>(_base[resynced_index].base_time +
173  static_cast<int64_t>(static_cast<double>(diff) * _ns_per_tick));
174  }
175 
176  return static_cast<uint64_t>(_base[index].base_time +
177  static_cast<int64_t>(static_cast<double>(diff) * _ns_per_tick));
178  }
179 
180  /***/
181  uint64_t time_since_epoch_safe(uint64_t rdtsc_value) const noexcept
182  {
183  // thread-safe, can be called by anyone
184  // this function won't resync as it can be called by anyone and only a single thread resyncs
185  uint32_t version;
186  uint64_t wall_ts;
187 
188  do
189  {
190  version = _version.load(std::memory_order_acquire);
191  auto const index = version & (_base.size() - 1);
192 
193  if (QUILL_UNLIKELY((_base[index].base_tsc) == 0 && (_base[index].base_time == 0)))
194  {
195  return 0;
196  }
197 
198  // get rdtsc current value and compare the diff then add it to base wall time
199  auto const diff = static_cast<int64_t>(rdtsc_value - _base[index].base_tsc);
200  wall_ts = static_cast<uint64_t>(_base[index].base_time +
201  static_cast<int64_t>(static_cast<double>(diff) * _ns_per_tick));
202  } while (version != _version.load(std::memory_order_acquire));
203 
204  return wall_ts;
205  }
206 
207  /***/
208  bool resync(uint32_t lag) const noexcept
209  {
210  // Sometimes we might get an interrupt and might never resync, so we will try again up to max_attempts
211  constexpr uint8_t max_attempts{4};
212 
213  for (uint8_t attempt = 0; attempt < max_attempts; ++attempt)
214  {
215  uint64_t const beg = rdtsc();
216  // we force convert to nanoseconds because the precision of system_clock::time-point is not portable across platforms.
217  auto const wall_time = static_cast<int64_t>(detail::get_system_time_ns());
218  uint64_t const end = rdtsc();
219 
220  if (QUILL_LIKELY(end - beg <= lag))
221  {
222  // update the next index
223  auto const index = (_version.load(std::memory_order_relaxed) + 1) & (_base.size() - 1);
224  _base[index].base_time = wall_time;
225  _base[index].base_tsc = _fast_average(beg, end);
226  _version.fetch_add(1, std::memory_order_release);
227 
228  _resync_interval_ticks = _resync_interval_original;
229  return true;
230  }
231  }
232 
233  // we failed to return earlier and we never resynced, but we don't really want to keep retrying on each call
234  // to time_since_epoch() so we do non accurate resync we will increase the resync duration to resync later
235  constexpr int64_t max_int64_half = std::numeric_limits<int64_t>::max() / 2;
236  if (_resync_interval_ticks <= max_int64_half)
237  {
238  _resync_interval_ticks = _resync_interval_ticks * 2;
239  }
240 
241  return false;
242  }
243 
244  /***/
245  double nanoseconds_per_tick() const noexcept { return _ns_per_tick; }
246 
247 protected:
248  struct BaseTimeTsc
249  {
250  BaseTimeTsc() = default;
251  int64_t base_time{0};
252  uint64_t base_tsc{0};
253  };
254 
255  /***/
256  QUILL_NODISCARD static uint64_t _fast_average(uint64_t x, uint64_t y) noexcept
257  {
258  return (x & y) + ((x ^ y) >> 1);
259  }
260 
261 protected:
262  static constexpr uint32_t resync_lag_cycles{50'000};
263  mutable int64_t _resync_interval_ticks{0};
264  int64_t _resync_interval_original{0}; /**< stores the initial interval value as as if we fail to resync we increase the timer */
265  double _ns_per_tick{0};
266 
267  alignas(QUILL_CACHE_LINE_ALIGNED) mutable std::atomic<uint32_t> _version{0};
268  mutable std::array<BaseTimeTsc, 2> _base{};
269 };
270 
271 #if defined(_WIN32) && defined(_MSC_VER) && !defined(__GNUC__)
272  #pragma warning(pop)
273 #endif
274 
275 } // namespace detail
276 
277 QUILL_END_NAMESPACE
Definition: RdtscClock.h:248
Setups a signal handler to handle fatal signals.
Definition: BackendManager.h:28
Converts tsc ticks to nanoseconds since epoch.
Definition: RdtscClock.h:36
QUILL_NODISCARD QUILL_ATTRIBUTE_HOT uint64_t rdtsc() noexcept
Get the TSC counter.
Definition: Rdtsc.h:105
QUILL_NODISCARD QUILL_ATTRIBUTE_HOT uint64_t get_steady_time_ns() noexcept
Mirrors std::chrono::duration_cast<std::chrono::nanoseconds>( std::chrono::steady_clock::now().time_since_epoch()).count().
Definition: ChronoTimeUtils.h:90
QUILL_NODISCARD QUILL_ATTRIBUTE_HOT uint64_t get_system_time_ns() noexcept
Mirrors std::chrono::duration_cast<std::chrono::nanoseconds>( std::chrono::system_clock::now().time_since_epoch()).count().
Definition: ChronoTimeUtils.h:57
A static class that calculates the rdtsc ticks per second.
Definition: RdtscClock.h:42
int64_t _resync_interval_original
stores the initial interval value as as if we fail to resync we increase the timer ...
Definition: RdtscClock.h:264