Merge pull request #51 from fastfloat/dlemire/alt_long

lemire · web-flow · commit b61ed01b650b · 2021-01-08T10:24:34.000-05:00
Improves long-significand performance
diff --git a/include/fast_float/ascii_number.h b/include/fast_float/ascii_number.h
@@ -60,6 +60,7 @@ fastfloat_really_inline
 parsed_number_string parse_number_string(const char *p, const char *pend, chars_format fmt) noexcept {
   parsed_number_string answer;
   answer.valid = false;
+  answer.too_many_digits = false;
   answer.negative = (*p == '-');
   if ((*p == '-') || (*p == '+')) {
     ++p;
@@ -81,10 +82,11 @@ parsed_number_string parse_number_string(const char *p, const char *pend, chars_
         uint64_t(*p - '0'); // might overflow, we will handle the overflow later
     ++p;
   }
+  const char *const end_of_integer_part = p;
+  int64_t digit_count = int64_t(end_of_integer_part - start_digits);
   int64_t exponent = 0;
   if ((p != pend) && (*p == '.')) {
     ++p;
-    const char *first_after_period = p;
 #if FASTFLOAT_IS_BIG_ENDIAN == 0
     // Fast approach only tested under little endian systems
     if ((p + 8 <= pend) && is_made_of_eight_digits_fast(p)) {
@@ -101,19 +103,16 @@ parsed_number_string parse_number_string(const char *p, const char *pend, chars_
       ++p;
       i = i * 10 + digit; // in rare cases, this will overflow, but that's ok
     }
-    exponent = first_after_period - p;
+    exponent = end_of_integer_part + 1 - p;
+    digit_count -= exponent;
   }
   // we must have encountered at least one integer!
-  if ((start_digits == p) || ((start_digits == p - 1) && (*start_digits == '.') )) {
+  if (digit_count == 0) {
     return answer;
   }
-  // digit_count is the exact number of digits.
-  int32_t digit_count =
-      int32_t(p - start_digits); // used later to guard against overflows
-  if(exponent > 0) {digit_count--;}
+  int64_t exp_number = 0;            // explicit exponential part
   if ((fmt & chars_format::scientific) && (p != pend) && (('e' == *p) || ('E' == *p))) {
     const char * location_of_e = p;
-    int64_t exp_number = 0;            // exponential part
     ++p;
     bool neg_exp = false;
     if ((p != pend) && ('-' == *p)) {
@@ -137,7 +136,8 @@ parsed_number_string parse_number_string(const char *p, const char *pend, chars_
         }
         ++p;
       }
-      exponent += (neg_exp ? -exp_number : exp_number);
+      if(neg_exp) { exp_number = - exp_number; }
+      exponent += exp_number;
     }
   } else {
     // If it scientific and not fixed, we have to bail out.
@@ -151,25 +151,40 @@ parsed_number_string parse_number_string(const char *p, const char *pend, chars_
   // of a 64-bit integer. However, this is uncommon.
   //
   // We can deal with up to 19 digits.
-  if (((digit_count > 19))) { // this is uncommon
+  if (digit_count > 19) { // this is uncommon
     // It is possible that the integer had an overflow.
     // We have to handle the case where we have 0.0000somenumber.
     // We need to be mindful of the case where we only have zeroes...
     // E.g., 0.000000000...000.
     const char *start = start_digits;
     while ((start != pend) && (*start == '0' || *start == '.')) {
-      if(*start == '.') { digit_count++; } // We will subtract it again later.
+      if(*start == '0') { digit_count --; }
       start++;
     }
-    // We over-decrement by one when there is a decimal separator
-    digit_count -= int(start - start_digits);
     if (digit_count > 19) {
-      answer.mantissa = 0xFFFFFFFFFFFFFFFF; // important: we don't want the mantissa to be used in a fast path uninitialized.
       answer.too_many_digits = true;
-      return answer;
+      // Let us start again, this time, avoiding overflows.
+      i = 0;
+      p = start_digits;
+      const uint64_t minimal_nineteen_digit_integer{1000000000000000000};
+      while((i < minimal_nineteen_digit_integer) && (p != pend) && is_integer(*p)) {
+        i = i * 10 + uint64_t(*p - '0');
+        ++p;
+      }
+      if (i >= minimal_nineteen_digit_integer) { // We have a big integers
+        exponent = end_of_integer_part - p + exp_number;
+      } else { // We have a value with a fractional component.
+          p++; // skip the '.'
+          const char *first_after_period = p;
+          while((i < minimal_nineteen_digit_integer) && (p != pend) && is_integer(*p)) {
+            i = i * 10 + uint64_t(*p - '0');
+            ++p;
+          }
+          exponent = first_after_period - p + exp_number;
+      }
+      // We have now corrected both exponent and i, to a truncated value
     }
   }
-  answer.too_many_digits = false;
   answer.exponent = exponent;
   answer.mantissa = i;
   return answer;
diff --git a/include/fast_float/float_common.h b/include/fast_float/float_common.h
@@ -184,6 +184,9 @@ struct adjusted_mantissa {
   bool operator==(const adjusted_mantissa &o) const {
     return mantissa == o.mantissa && power2 == o.power2;
   }
+  bool operator!=(const adjusted_mantissa &o) const {
+    return mantissa != o.mantissa || power2 != o.power2;
+  }
 };
 
 struct decimal {
@@ -200,44 +203,6 @@ struct decimal {
   // Moves are allowed:
   decimal(decimal &&) = default;
   decimal &operator=(decimal &&other) = default;
-  // Generates a mantissa by truncating to 19 digits.
-  // This function should be reasonably fast.
-  // Note that the user is responsible to ensure that digits are
-  // initialized to zero when there are fewer than 19.
-  inline uint64_t to_truncated_mantissa() {
-#if FASTFLOAT_IS_BIG_ENDIAN == 1
-    uint64_t mantissa = 0;
-    for (uint32_t i = 0; i < max_digit_without_overflow;
-         i++) {
-      mantissa = mantissa * 10 + digits[i]; // can be accelerated
-    }
-    return mantissa;
-#else
-    uint64_t val;
-    // 8 first digits
-    ::memcpy(&val, digits, sizeof(uint64_t));
-    val = val * 2561 >> 8;
-    val = (val & 0x00FF00FF00FF00FF) * 6553601 >> 16;
-    uint64_t mantissa =
-        uint32_t((val & 0x0000FFFF0000FFFF) * 42949672960001 >> 32);
-    // 8 more digits for a total of 16
-    ::memcpy(&val, digits + sizeof(uint64_t), sizeof(uint64_t));
-    val = val * 2561 >> 8;
-    val = (val & 0x00FF00FF00FF00FF) * 6553601 >> 16;
-    uint32_t eight_digits_value =
-        uint32_t((val & 0x0000FFFF0000FFFF) * 42949672960001 >> 32);
-    mantissa = 100000000 * mantissa + eight_digits_value;
-    for (uint32_t i = 2 * sizeof(uint64_t); i < max_digit_without_overflow;
-         i++) {
-      mantissa = mantissa * 10 + digits[i]; // can be accelerated
-    }
-    return mantissa;
-#endif
-  }
-  // Generate an exponent matching to_truncated_mantissa()
-  inline int32_t to_truncated_exponent() {
-    return decimal_point - int32_t(max_digit_without_overflow);
-  }
 };
 
 constexpr static double powers_of_ten_double[] = {
@@ -372,4 +337,4 @@ inline OStream& operator<<(OStream &out, const fast_float::decimal &d) {
   return out;
 }
 
-#endif
+#endif
diff --git a/include/fast_float/parse_number.h b/include/fast_float/parse_number.h
@@ -66,6 +66,25 @@ from_chars_result parse_infnan(const char *first, const char *last, T &value)  n
   answer.ptr = first;
   return answer;
 }
+
+template<typename T>
+fastfloat_really_inline void to_float(bool negative, adjusted_mantissa am, T &value) {
+  uint64_t word = am.mantissa;
+  word |= uint64_t(am.power2) << binary_format<T>::mantissa_explicit_bits();
+  word = negative
+  ? word | (uint64_t(1) << binary_format<T>::sign_index()) : word;
+#if FASTFLOAT_IS_BIG_ENDIAN == 1
+   if (std::is_same<T, float>::value) {
+     ::memcpy(&value, (char *)&word + 4, sizeof(T)); // extract value at offset 4-7 if float on big-endian
+   } else {
+     ::memcpy(&value, &word, sizeof(T));
+   }
+#else
+   // For little-endian systems:
+   ::memcpy(&value, &word, sizeof(T));
+#endif
+}
+
 } // namespace
 
 
@@ -92,31 +111,23 @@ from_chars_result from_chars(const char *first, const char *last,
   answer.ec = std::errc(); // be optimistic
   answer.ptr = pns.lastmatch;
   // Next is Clinger's fast path.
-  if (binary_format<T>::min_exponent_fast_path() <= pns.exponent && pns.exponent <= binary_format<T>::max_exponent_fast_path() && pns.mantissa <=binary_format<T>::max_mantissa_fast_path()) {
+  if (binary_format<T>::min_exponent_fast_path() <= pns.exponent && pns.exponent <= binary_format<T>::max_exponent_fast_path() && pns.mantissa <=binary_format<T>::max_mantissa_fast_path() && !pns.too_many_digits) {
     value = T(pns.mantissa);
     if (pns.exponent < 0) { value = value / binary_format<T>::exact_power_of_ten(-pns.exponent); }
     else { value = value * binary_format<T>::exact_power_of_ten(pns.exponent); }
     if (pns.negative) { value = -value; }
     return answer;
   }
-  adjusted_mantissa am = pns.too_many_digits ? parse_long_mantissa<binary_format<T>>(first,last) : compute_float<binary_format<T>>(pns.exponent, pns.mantissa);
+  adjusted_mantissa am = compute_float<binary_format<T>>(pns.exponent, pns.mantissa);
+  if(pns.too_many_digits) {
+    if(am != compute_float<binary_format<T>>(pns.exponent, pns.mantissa + 1)) {
+      am.power2 = -1; // value is invalid.
+    }
+  }
   // If we called compute_float<binary_format<T>>(pns.exponent, pns.mantissa) and we have an invalid power (am.power2 < 0),
   // then we need to go the long way around again. This is very uncommon.
   if(am.power2 < 0) { am = parse_long_mantissa<binary_format<T>>(first,last); }
-  uint64_t word = am.mantissa;
-  word |= uint64_t(am.power2) << binary_format<T>::mantissa_explicit_bits();
-  word = pns.negative
-  ? word | (uint64_t(1) << binary_format<T>::sign_index()) : word;
-#if FASTFLOAT_IS_BIG_ENDIAN == 1
-   if (std::is_same<T, float>::value) {
-     ::memcpy(&value, (char *)&word + 4, sizeof(T)); // extract value at offset 4-7 if float on big-endian
-   } else {
-     ::memcpy(&value, &word, sizeof(T));
-   }
-#else
-   // For little-endian systems:
-   ::memcpy(&value, &word, sizeof(T));
-#endif
+  to_float(pns.negative, am, value);
   return answer;
 }
 
diff --git a/include/fast_float/simple_decimal_conversion.h b/include/fast_float/simple_decimal_conversion.h
@@ -353,19 +353,6 @@ adjusted_mantissa compute_float(decimal &d) {
 template <typename binary>
 adjusted_mantissa parse_long_mantissa(const char *first, const char* last) {
     decimal d = parse_decimal(first, last);
-    // In some cases we can get lucky and looking at only the first 19 digits is enough.
-    // Let us try that.
-    const uint64_t mantissa = d.to_truncated_mantissa();
-    const int64_t exponent =  d.to_truncated_exponent();
-    // credit: R. Oudompheng who first implemented this fast path (to my knowledge).
-    // It is rough, but it does the job of accelerating the slow path since most
-    // long streams of digits are determined after 19 digits.
-    // Note that mantissa+1 cannot overflow since mantissa < 10**19 and so
-    // mantissa+1 <= 10**19 < 2**64.
-    adjusted_mantissa am1 = compute_float<binary>(exponent, mantissa);
-    adjusted_mantissa am2 = compute_float<binary>(exponent, mantissa+1);
-    // They must both agree and be both a successful result.
-    if(( am1 == am2 ) && (am1.power2 >= 0)) { return am1; }
     return compute_float<binary>(d);
 }