OpenLexocad  28.0
dtoa.h
Go to the documentation of this file.
1 #pragma once
2 
3 #include <cassert>
4 #include <cmath>
5 
6 #if defined(_MSC_VER)
7 #include <intrin.h>
8 #else
9 #include <stdint.h>
10 #endif
11 
12 #define UINT64_C2(h, l) ((static_cast<uint64_t>(h) << 32) | static_cast<uint64_t>(l))
13 
14 struct DiyFp
15 {
16  DiyFp() {}
17 
18  DiyFp(uint64_t f, int e) : f(f), e(e) {}
19 
20  DiyFp(double d)
21  {
22  union {
23  double d;
24  uint64_t u64;
25  } u = {d};
26 
27  int biased_e = (u.u64 & kDpExponentMask) >> kDpSignificandSize;
28  uint64_t significand = (u.u64 & kDpSignificandMask);
29  if (biased_e != 0)
30  {
31  f = significand + kDpHiddenBit;
32  e = biased_e - kDpExponentBias;
33  }
34  else
35  {
36  f = significand;
37  e = kDpMinExponent + 1;
38  }
39  }
40 
41  DiyFp operator-(const DiyFp& rhs) const
42  {
43  assert(e == rhs.e);
44  assert(f >= rhs.f);
45  return DiyFp(f - rhs.f, e);
46  }
47 
48  DiyFp operator*(const DiyFp& rhs) const
49  {
50 #if defined(_MSC_VER) && defined(_M_AMD64)
51  uint64_t h;
52  uint64_t l = _umul128(f, rhs.f, &h);
53  if (l & (uint64_t(1) << 63)) // rounding
54  h++;
55  return DiyFp(h, e + rhs.e + 64);
56 #elif (__GNUC__ > 4 || (__GNUC__ == 4 && __GNUC_MINOR__ >= 6)) && defined(__x86_64__)
57  unsigned __int128 p = static_cast<unsigned __int128>(f) * static_cast<unsigned __int128>(rhs.f);
58  uint64_t h = p >> 64;
59  uint64_t l = static_cast<uint64_t>(p);
60  if (l & (uint64_t(1) << 63)) // rounding
61  h++;
62  return DiyFp(h, e + rhs.e + 64);
63 #else
64  const uint64_t M32 = 0xFFFFFFFF;
65  const uint64_t a = f >> 32;
66  const uint64_t b = f & M32;
67  const uint64_t c = rhs.f >> 32;
68  const uint64_t d = rhs.f & M32;
69  const uint64_t ac = a * c;
70  const uint64_t bc = b * c;
71  const uint64_t ad = a * d;
72  const uint64_t bd = b * d;
73  uint64_t tmp = (bd >> 32) + (ad & M32) + (bc & M32);
74  tmp += 1U << 31;
75  return DiyFp(ac + (ad >> 32) + (bc >> 32) + (tmp >> 32), e + rhs.e + 64);
76 #endif
77  }
78 
79  DiyFp Normalize() const
80  {
81 #if defined(_MSC_VER) && defined(_M_AMD64)
82  unsigned long index;
83  _BitScanReverse64(&index, f);
84  return DiyFp(f << (63 - index), e - (63 - index));
85 #elif defined(__GNUC__)
86  int s = __builtin_clzll(f);
87  return DiyFp(f << s, e - s);
88 #else
89  DiyFp res = *this;
90  while (!(res.f & kDpHiddenBit))
91  {
92  res.f <<= 1;
93  res.e--;
94  }
96  res.e = res.e - (kDiySignificandSize - kDpSignificandSize - 1);
97  return res;
98 #endif
99  }
100 
102  {
103 #if defined(_MSC_VER) && defined(_M_AMD64)
104  unsigned long index;
105  _BitScanReverse64(&index, f);
106  return DiyFp(f << (63 - index), e - (63 - index));
107 #else
108  DiyFp res = *this;
109  while (!(res.f & (kDpHiddenBit << 1)))
110  {
111  res.f <<= 1;
112  res.e--;
113  }
114  res.f <<= (kDiySignificandSize - kDpSignificandSize - 2);
115  res.e = res.e - (kDiySignificandSize - kDpSignificandSize - 2);
116  return res;
117 #endif
118  }
119 
120  void NormalizedBoundaries(DiyFp* minus, DiyFp* plus) const
121  {
122  DiyFp pl = DiyFp((f << 1) + 1, e - 1).NormalizeBoundary();
123  DiyFp mi = (f == kDpHiddenBit) ? DiyFp((f << 2) - 1, e - 2) : DiyFp((f << 1) - 1, e - 1);
124  mi.f <<= mi.e - pl.e;
125  mi.e = pl.e;
126  *plus = pl;
127  *minus = mi;
128  }
129 
130  static const int kDiySignificandSize = 64;
131  static const int kDpSignificandSize = 52;
132  static const int kDpExponentBias = 0x3FF + kDpSignificandSize;
133  static const int kDpMinExponent = -kDpExponentBias;
134  static const uint64_t kDpExponentMask = UINT64_C2(0x7FF00000, 0x00000000);
135  static const uint64_t kDpSignificandMask = UINT64_C2(0x000FFFFF, 0xFFFFFFFF);
136  static const uint64_t kDpHiddenBit = UINT64_C2(0x00100000, 0x00000000);
137 
138  uint64_t f;
139  int e;
140 };
141 
142 inline DiyFp GetCachedPower(int e, int* K)
143 {
144  // 10^-348, 10^-340, ..., 10^340
145  static const uint64_t kCachedPowers_F[] = {
146  UINT64_C2(0xfa8fd5a0, 0x081c0288), UINT64_C2(0xbaaee17f, 0xa23ebf76), UINT64_C2(0x8b16fb20, 0x3055ac76), UINT64_C2(0xcf42894a, 0x5dce35ea),
147  UINT64_C2(0x9a6bb0aa, 0x55653b2d), UINT64_C2(0xe61acf03, 0x3d1a45df), UINT64_C2(0xab70fe17, 0xc79ac6ca), UINT64_C2(0xff77b1fc, 0xbebcdc4f),
148  UINT64_C2(0xbe5691ef, 0x416bd60c), UINT64_C2(0x8dd01fad, 0x907ffc3c), UINT64_C2(0xd3515c28, 0x31559a83), UINT64_C2(0x9d71ac8f, 0xada6c9b5),
149  UINT64_C2(0xea9c2277, 0x23ee8bcb), UINT64_C2(0xaecc4991, 0x4078536d), UINT64_C2(0x823c1279, 0x5db6ce57), UINT64_C2(0xc2109436, 0x4dfb5637),
150  UINT64_C2(0x9096ea6f, 0x3848984f), UINT64_C2(0xd77485cb, 0x25823ac7), UINT64_C2(0xa086cfcd, 0x97bf97f4), UINT64_C2(0xef340a98, 0x172aace5),
151  UINT64_C2(0xb23867fb, 0x2a35b28e), UINT64_C2(0x84c8d4df, 0xd2c63f3b), UINT64_C2(0xc5dd4427, 0x1ad3cdba), UINT64_C2(0x936b9fce, 0xbb25c996),
152  UINT64_C2(0xdbac6c24, 0x7d62a584), UINT64_C2(0xa3ab6658, 0x0d5fdaf6), UINT64_C2(0xf3e2f893, 0xdec3f126), UINT64_C2(0xb5b5ada8, 0xaaff80b8),
153  UINT64_C2(0x87625f05, 0x6c7c4a8b), UINT64_C2(0xc9bcff60, 0x34c13053), UINT64_C2(0x964e858c, 0x91ba2655), UINT64_C2(0xdff97724, 0x70297ebd),
154  UINT64_C2(0xa6dfbd9f, 0xb8e5b88f), UINT64_C2(0xf8a95fcf, 0x88747d94), UINT64_C2(0xb9447093, 0x8fa89bcf), UINT64_C2(0x8a08f0f8, 0xbf0f156b),
155  UINT64_C2(0xcdb02555, 0x653131b6), UINT64_C2(0x993fe2c6, 0xd07b7fac), UINT64_C2(0xe45c10c4, 0x2a2b3b06), UINT64_C2(0xaa242499, 0x697392d3),
156  UINT64_C2(0xfd87b5f2, 0x8300ca0e), UINT64_C2(0xbce50864, 0x92111aeb), UINT64_C2(0x8cbccc09, 0x6f5088cc), UINT64_C2(0xd1b71758, 0xe219652c),
157  UINT64_C2(0x9c400000, 0x00000000), UINT64_C2(0xe8d4a510, 0x00000000), UINT64_C2(0xad78ebc5, 0xac620000), UINT64_C2(0x813f3978, 0xf8940984),
158  UINT64_C2(0xc097ce7b, 0xc90715b3), UINT64_C2(0x8f7e32ce, 0x7bea5c70), UINT64_C2(0xd5d238a4, 0xabe98068), UINT64_C2(0x9f4f2726, 0x179a2245),
159  UINT64_C2(0xed63a231, 0xd4c4fb27), UINT64_C2(0xb0de6538, 0x8cc8ada8), UINT64_C2(0x83c7088e, 0x1aab65db), UINT64_C2(0xc45d1df9, 0x42711d9a),
160  UINT64_C2(0x924d692c, 0xa61be758), UINT64_C2(0xda01ee64, 0x1a708dea), UINT64_C2(0xa26da399, 0x9aef774a), UINT64_C2(0xf209787b, 0xb47d6b85),
161  UINT64_C2(0xb454e4a1, 0x79dd1877), UINT64_C2(0x865b8692, 0x5b9bc5c2), UINT64_C2(0xc83553c5, 0xc8965d3d), UINT64_C2(0x952ab45c, 0xfa97a0b3),
162  UINT64_C2(0xde469fbd, 0x99a05fe3), UINT64_C2(0xa59bc234, 0xdb398c25), UINT64_C2(0xf6c69a72, 0xa3989f5c), UINT64_C2(0xb7dcbf53, 0x54e9bece),
163  UINT64_C2(0x88fcf317, 0xf22241e2), UINT64_C2(0xcc20ce9b, 0xd35c78a5), UINT64_C2(0x98165af3, 0x7b2153df), UINT64_C2(0xe2a0b5dc, 0x971f303a),
164  UINT64_C2(0xa8d9d153, 0x5ce3b396), UINT64_C2(0xfb9b7cd9, 0xa4a7443c), UINT64_C2(0xbb764c4c, 0xa7a44410), UINT64_C2(0x8bab8eef, 0xb6409c1a),
165  UINT64_C2(0xd01fef10, 0xa657842c), UINT64_C2(0x9b10a4e5, 0xe9913129), UINT64_C2(0xe7109bfb, 0xa19c0c9d), UINT64_C2(0xac2820d9, 0x623bf429),
166  UINT64_C2(0x80444b5e, 0x7aa7cf85), UINT64_C2(0xbf21e440, 0x03acdd2d), UINT64_C2(0x8e679c2f, 0x5e44ff8f), UINT64_C2(0xd433179d, 0x9c8cb841),
167  UINT64_C2(0x9e19db92, 0xb4e31ba9), UINT64_C2(0xeb96bf6e, 0xbadf77d9), UINT64_C2(0xaf87023b, 0x9bf0ee6b)};
168  static const int16_t kCachedPowers_E[] = {
169  -1220, -1193, -1166, -1140, -1113, -1087, -1060, -1034, -1007, -980, -954, -927, -901, -874, -847, -821, -794, -768, -741, -715, -688, -661,
170  -635, -608, -582, -555, -529, -502, -475, -449, -422, -396, -369, -343, -316, -289, -263, -236, -210, -183, -157, -130, -103, -77,
171  -50, -24, 3, 30, 56, 83, 109, 136, 162, 189, 216, 242, 269, 295, 322, 348, 375, 402, 428, 455, 481, 508,
172  534, 561, 588, 614, 641, 667, 694, 720, 747, 774, 800, 827, 853, 880, 907, 933, 960, 986, 1013, 1039, 1066};
173 
174  // int k = static_cast<int>(ceil((-61 - e) * 0.30102999566398114)) + 374;
175  double dk = (-61 - e) * 0.30102999566398114 + 347; // dk must be positive, so can do ceiling in positive
176  int k = static_cast<int>(dk);
177  if (k != dk)
178  k++;
179 
180  unsigned index = static_cast<unsigned>((k >> 3) + 1);
181  *K = -(-348 + static_cast<int>(index << 3)); // decimal exponent no need lookup table
182 
183  assert(index < sizeof(kCachedPowers_F) / sizeof(kCachedPowers_F[0]));
184  return DiyFp(kCachedPowers_F[index], kCachedPowers_E[index]);
185 }
186 
187 inline void GrisuRound(char* buffer, int len, uint64_t delta, uint64_t rest, uint64_t ten_kappa, uint64_t wp_w)
188 {
189  while (rest < wp_w && delta - rest >= ten_kappa &&
190  (rest + ten_kappa < wp_w ||
191  wp_w - rest > rest + ten_kappa - wp_w))
192  {
193  buffer[len - 1]--;
194  rest += ten_kappa;
195  }
196 }
197 
198 inline unsigned CountDecimalDigit32(uint32_t n)
199 {
200  // Simple pure C++ implementation was faster than __builtin_clz version in this situation.
201  if (n < 10)
202  return 1;
203  if (n < 100)
204  return 2;
205  if (n < 1000)
206  return 3;
207  if (n < 10000)
208  return 4;
209  if (n < 100000)
210  return 5;
211  if (n < 1000000)
212  return 6;
213  if (n < 10000000)
214  return 7;
215  if (n < 100000000)
216  return 8;
217  if (n < 1000000000)
218  return 9;
219  return 10;
220 }
221 
222 inline void DigitGen(const DiyFp& W, const DiyFp& Mp, uint64_t delta, char* buffer, int* len, int* K)
223 {
224  static const uint32_t kPow10[] = {1, 10, 100, 1000, 10000, 100000, 1000000, 10000000, 100000000, 1000000000};
225  const DiyFp one(uint64_t(1) << -Mp.e, Mp.e);
226  const DiyFp wp_w = Mp - W;
227  uint32_t p1 = static_cast<uint32_t>(Mp.f >> -one.e);
228  uint64_t p2 = Mp.f & (one.f - 1);
229  int kappa = static_cast<int>(CountDecimalDigit32(p1));
230  *len = 0;
231 
232  while (kappa > 0)
233  {
234  uint32_t d;
235  switch (kappa)
236  {
237  case 10:
238  d = p1 / 1000000000;
239  p1 %= 1000000000;
240  break;
241  case 9:
242  d = p1 / 100000000;
243  p1 %= 100000000;
244  break;
245  case 8:
246  d = p1 / 10000000;
247  p1 %= 10000000;
248  break;
249  case 7:
250  d = p1 / 1000000;
251  p1 %= 1000000;
252  break;
253  case 6:
254  d = p1 / 100000;
255  p1 %= 100000;
256  break;
257  case 5:
258  d = p1 / 10000;
259  p1 %= 10000;
260  break;
261  case 4:
262  d = p1 / 1000;
263  p1 %= 1000;
264  break;
265  case 3:
266  d = p1 / 100;
267  p1 %= 100;
268  break;
269  case 2:
270  d = p1 / 10;
271  p1 %= 10;
272  break;
273  case 1:
274  d = p1;
275  p1 = 0;
276  break;
277  default:
278 #if defined(_MSC_VER)
279  __assume(0);
280 #elif __GNUC__ > 4 || (__GNUC__ == 4 && __GNUC_MINOR__ >= 5)
281  __builtin_unreachable();
282 #else
283  d = 0;
284 #endif
285  }
286  if (d || *len)
287  buffer[(*len)++] = '0' + static_cast<char>(d);
288  kappa--;
289  uint64_t tmp = (static_cast<uint64_t>(p1) << -one.e) + p2;
290  if (tmp <= delta)
291  {
292  *K += kappa;
293  GrisuRound(buffer, *len, delta, tmp, static_cast<uint64_t>(kPow10[kappa]) << -one.e, wp_w.f);
294  return;
295  }
296  }
297 
298  // kappa = 0
299  for (;;)
300  {
301  p2 *= 10;
302  delta *= 10;
303  char d = static_cast<char>(p2 >> -one.e);
304  if (d || *len)
305  buffer[(*len)++] = '0' + d;
306  p2 &= one.f - 1;
307  kappa--;
308  if (p2 < delta)
309  {
310  *K += kappa;
311  GrisuRound(buffer, *len, delta, p2, one.f, wp_w.f * kPow10[-kappa]);
312  return;
313  }
314  }
315 }
316 
317 inline void Grisu2(double value, char* buffer, int* length, int* K)
318 {
319  const DiyFp v(value);
320  DiyFp w_m, w_p;
321  v.NormalizedBoundaries(&w_m, &w_p);
322 
323  const DiyFp c_mk = GetCachedPower(w_p.e, K);
324  const DiyFp W = v.Normalize() * c_mk;
325  DiyFp Wp = w_p * c_mk;
326  DiyFp Wm = w_m * c_mk;
327  Wm.f++;
328  Wp.f--;
329  DigitGen(W, Wp, Wp.f - Wm.f, buffer, length, K);
330 }
331 
332 inline const char* GetDigitsLut()
333 {
334  static const char cDigitsLut[200] = {
335  '0', '0', '0', '1', '0', '2', '0', '3', '0', '4', '0', '5', '0', '6', '0', '7', '0', '8', '0', '9', '1', '0', '1', '1', '1',
336  '2', '1', '3', '1', '4', '1', '5', '1', '6', '1', '7', '1', '8', '1', '9', '2', '0', '2', '1', '2', '2', '2', '3', '2', '4',
337  '2', '5', '2', '6', '2', '7', '2', '8', '2', '9', '3', '0', '3', '1', '3', '2', '3', '3', '3', '4', '3', '5', '3', '6', '3',
338  '7', '3', '8', '3', '9', '4', '0', '4', '1', '4', '2', '4', '3', '4', '4', '4', '5', '4', '6', '4', '7', '4', '8', '4', '9',
339  '5', '0', '5', '1', '5', '2', '5', '3', '5', '4', '5', '5', '5', '6', '5', '7', '5', '8', '5', '9', '6', '0', '6', '1', '6',
340  '2', '6', '3', '6', '4', '6', '5', '6', '6', '6', '7', '6', '8', '6', '9', '7', '0', '7', '1', '7', '2', '7', '3', '7', '4',
341  '7', '5', '7', '6', '7', '7', '7', '8', '7', '9', '8', '0', '8', '1', '8', '2', '8', '3', '8', '4', '8', '5', '8', '6', '8',
342  '7', '8', '8', '8', '9', '9', '0', '9', '1', '9', '2', '9', '3', '9', '4', '9', '5', '9', '6', '9', '7', '9', '8', '9', '9'};
343  return cDigitsLut;
344 }
345 
346 inline void WriteExponent(int K, char* buffer)
347 {
348  if (K < 0)
349  {
350  *buffer++ = '-';
351  K = -K;
352  }
353 
354  if (K >= 100)
355  {
356  *buffer++ = '0' + static_cast<char>(K / 100);
357  K %= 100;
358  const char* d = GetDigitsLut() + K * 2;
359  *buffer++ = d[0];
360  *buffer++ = d[1];
361  }
362  else if (K >= 10)
363  {
364  const char* d = GetDigitsLut() + K * 2;
365  *buffer++ = d[0];
366  *buffer++ = d[1];
367  }
368  else
369  *buffer++ = '0' + static_cast<char>(K);
370 
371  *buffer = '\0';
372 }
373 
374 inline void Prettify(char* buffer, int length, int k)
375 {
376  const int kk = length + k; // 10^(kk-1) <= v < 10^kk
377 
378  if (length <= kk && kk <= 21)
379  {
380  // 1234e7 -> 12340000000
381  for (int i = length; i < kk; i++)
382  buffer[i] = '0';
383  buffer[kk] = '.';
384  buffer[kk + 1] = '0';
385  buffer[kk + 2] = '\0';
386  }
387  else if (0 < kk && kk <= 21)
388  {
389  // 1234e-2 -> 12.34
390  memmove(&buffer[kk + 1], &buffer[kk], length - kk);
391  buffer[kk] = '.';
392  buffer[length + 1] = '\0';
393  }
394  else if (-6 < kk && kk <= 0)
395  {
396  // 1234e-6 -> 0.001234
397  const int offset = 2 - kk;
398  memmove(&buffer[offset], &buffer[0], length);
399  buffer[0] = '0';
400  buffer[1] = '.';
401  for (int i = 2; i < offset; i++)
402  buffer[i] = '0';
403  buffer[length + offset] = '\0';
404  }
405  else if (length == 1)
406  {
407  // 1e30
408  buffer[1] = 'e';
409  WriteExponent(kk - 1, &buffer[2]);
410  }
411  else
412  {
413  // 1234e30 -> 1.234e33
414  memmove(&buffer[2], &buffer[1], length - 1);
415  buffer[1] = '.';
416  buffer[length + 1] = 'e';
417  WriteExponent(kk - 1, &buffer[0 + length + 2]);
418  }
419 }
420 
421 inline void dtoa_milo(double value, char* buffer)
422 {
423  // Not handling NaN and inf
424  assert(!isnan(value));
425  assert(!isinf(value));
426 
427  if (value == 0)
428  {
429  buffer[0] = '0';
430  buffer[1] = '.';
431  buffer[2] = '0';
432  buffer[3] = '\0';
433  }
434  else
435  {
436  if (value < 0)
437  {
438  *buffer++ = '-';
439  value = -value;
440  }
441  int length, K;
442  Grisu2(value, buffer, &length, &K);
443  Prettify(buffer, length, K);
444  }
445 }
DiyFp::kDpSignificandSize
static const int kDpSignificandSize
Definition: dtoa.h:131
Grisu2
void Grisu2(double value, char *buffer, int *length, int *K)
Definition: dtoa.h:317
DiyFp::kDpSignificandMask
static const uint64_t kDpSignificandMask
Definition: dtoa.h:135
WriteExponent
void WriteExponent(int K, char *buffer)
Definition: dtoa.h:346
GetCachedPower
DiyFp GetCachedPower(int e, int *K)
Definition: dtoa.h:142
DiyFp::f
uint64_t f
Definition: dtoa.h:138
DiyFp::kDpExponentBias
static const int kDpExponentBias
Definition: dtoa.h:132
DiyFp::kDpHiddenBit
static const uint64_t kDpHiddenBit
Definition: dtoa.h:136
DiyFp::Normalize
DiyFp Normalize() const
Definition: dtoa.h:79
UINT64_C2
#define UINT64_C2(h, l)
Definition: dtoa.h:12
GetDigitsLut
const char * GetDigitsLut()
Definition: dtoa.h:332
DiyFp::kDpExponentMask
static const uint64_t kDpExponentMask
Definition: dtoa.h:134
DiyFp::DiyFp
DiyFp()
Definition: dtoa.h:16
DiyFp::kDpMinExponent
static const int kDpMinExponent
Definition: dtoa.h:133
DiyFp::operator*
DiyFp operator*(const DiyFp &rhs) const
Definition: dtoa.h:48
Prettify
void Prettify(char *buffer, int length, int k)
Definition: dtoa.h:374
DiyFp::e
int e
Definition: dtoa.h:139
DiyFp::kDiySignificandSize
static const int kDiySignificandSize
Definition: dtoa.h:130
DiyFp
Definition: dtoa.h:15
DiyFp::DiyFp
DiyFp(double d)
Definition: dtoa.h:20
dtoa_milo
void dtoa_milo(double value, char *buffer)
Definition: dtoa.h:421
DiyFp::operator-
DiyFp operator-(const DiyFp &rhs) const
Definition: dtoa.h:41
DiyFp::NormalizeBoundary
DiyFp NormalizeBoundary() const
Definition: dtoa.h:101
DigitGen
void DigitGen(const DiyFp &W, const DiyFp &Mp, uint64_t delta, char *buffer, int *len, int *K)
Definition: dtoa.h:222
GrisuRound
void GrisuRound(char *buffer, int len, uint64_t delta, uint64_t rest, uint64_t ten_kappa, uint64_t wp_w)
Definition: dtoa.h:187
DiyFp::NormalizedBoundaries
void NormalizedBoundaries(DiyFp *minus, DiyFp *plus) const
Definition: dtoa.h:120
DiyFp::DiyFp
DiyFp(uint64_t f, int e)
Definition: dtoa.h:18
CountDecimalDigit32
unsigned CountDecimalDigit32(uint32_t n)
Definition: dtoa.h:198