Skip to content

Commit ede1d6b

Browse files
authored
Merge pull request #231 from mayawarrier/main
from_chars integer parser
2 parents 506b01a + 882a716 commit ede1d6b

File tree

6 files changed

+966
-12
lines changed

6 files changed

+966
-12
lines changed

include/fast_float/ascii_number.h

Lines changed: 104 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,7 @@
55
#include <cstdint>
66
#include <cstring>
77
#include <iterator>
8+
#include <limits>
89
#include <type_traits>
910

1011
#include "float_common.h"
@@ -115,7 +116,7 @@ FASTFLOAT_SIMD_RESTORE_WARNINGS
115116
#if defined(_MSC_VER) && _MSC_VER <= 1900
116117
template <typename UC>
117118
#else
118-
template <typename UC, FASTFLOAT_ENABLE_IF(!has_simd_opt<UC>())>
119+
template <typename UC, FASTFLOAT_ENABLE_IF(!has_simd_opt<UC>()) = 0>
119120
#endif
120121
// dummy for compile
121122
uint64_t simd_read8_to_u64(UC const*) {
@@ -223,15 +224,15 @@ FASTFLOAT_SIMD_RESTORE_WARNINGS
223224
#if defined(_MSC_VER) && _MSC_VER <= 1900
224225
template <typename UC>
225226
#else
226-
template <typename UC, FASTFLOAT_ENABLE_IF(!has_simd_opt<UC>())>
227+
template <typename UC, FASTFLOAT_ENABLE_IF(!has_simd_opt<UC>()) = 0>
227228
#endif
228229
// dummy for compile
229230
bool simd_parse_if_eight_digits_unrolled(UC const*, uint64_t&) {
230231
return 0;
231232
}
232233

233234

234-
template <typename UC, FASTFLOAT_ENABLE_IF(!std::is_same<UC, char>::value)>
235+
template <typename UC, FASTFLOAT_ENABLE_IF(!std::is_same<UC, char>::value) = 0>
235236
fastfloat_really_inline FASTFLOAT_CONSTEXPR20
236237
void loop_parse_if_eight_digits(const UC*& p, const UC* const pend, uint64_t& i) {
237238
if (!has_simd_opt<UC>()) {
@@ -439,6 +440,106 @@ parsed_number_string_t<UC> parse_number_string(UC const *p, UC const * pend, par
439440
return answer;
440441
}
441442

443+
template <typename T, typename UC>
444+
fastfloat_really_inline FASTFLOAT_CONSTEXPR20
445+
from_chars_result_t<UC> parse_int_string(UC const* p, UC const* pend, T& value, int base)
446+
{
447+
from_chars_result_t<UC> answer;
448+
449+
UC const* const first = p;
450+
451+
bool negative = (*p == UC('-'));
452+
if (!std::is_signed<T>::value && negative) {
453+
answer.ec = std::errc::invalid_argument;
454+
answer.ptr = first;
455+
return answer;
456+
}
457+
#ifdef FASTFLOAT_ALLOWS_LEADING_PLUS // disabled by default
458+
if ((*p == UC('-')) || (*p == UC('+'))) {
459+
#else
460+
if (*p == UC('-')) {
461+
#endif
462+
++p;
463+
}
464+
465+
UC const* const start_num = p;
466+
while (*p == UC('0')) {
467+
++p;
468+
}
469+
const bool has_leading_zeros = p > start_num;
470+
471+
UC const* const start_digits = p;
472+
473+
uint64_t i = 0;
474+
if (base == 10) {
475+
loop_parse_if_eight_digits(p, pend, i); // use SIMD if possible
476+
}
477+
while (p != pend) {
478+
uint8_t digit = ch_to_digit(*p);
479+
if (digit >= base) {
480+
break;
481+
}
482+
i = uint64_t(base) * i + digit; // might overflow, check this later
483+
p++;
484+
}
485+
486+
size_t digit_count = size_t(p - start_digits);
487+
488+
if (digit_count == 0) {
489+
if (has_leading_zeros) {
490+
value = 0;
491+
answer.ec = std::errc();
492+
answer.ptr = p;
493+
}
494+
else {
495+
answer.ec = std::errc::invalid_argument;
496+
answer.ptr = first;
497+
}
498+
return answer;
499+
}
500+
501+
answer.ptr = p;
502+
503+
// check u64 overflow
504+
size_t max_digits = max_digits_u64(base);
505+
if (digit_count > max_digits) {
506+
answer.ec = std::errc::result_out_of_range;
507+
return answer;
508+
}
509+
// this check can be eliminated for all other types, but they will all require a max_digits(base) equivalent
510+
if (digit_count == max_digits && i < min_safe_u64(base)) {
511+
answer.ec = std::errc::result_out_of_range;
512+
return answer;
513+
}
514+
515+
// check other types overflow
516+
if (!std::is_same<T, uint64_t>::value) {
517+
if (i > uint64_t(std::numeric_limits<T>::max()) + uint64_t(negative)) {
518+
answer.ec = std::errc::result_out_of_range;
519+
return answer;
520+
}
521+
}
522+
523+
if (negative) {
524+
#ifdef FASTFLOAT_VISUAL_STUDIO
525+
#pragma warning(push)
526+
#pragma warning(disable: 4146)
527+
#endif
528+
// this weird workaround is required because:
529+
// - converting unsigned to signed when its value is greater than signed max is UB pre-C++23.
530+
// - reinterpret_casting (~i + 1) would work, but it is not constexpr
531+
// this is always optimized into a neg instruction.
532+
value = T(-std::numeric_limits<T>::max() - T(i - std::numeric_limits<T>::max()));
533+
#ifdef FASTFLOAT_VISUAL_STUDIO
534+
#pragma warning(pop)
535+
#endif
536+
}
537+
else { value = T(i); }
538+
539+
answer.ec = std::errc();
540+
return answer;
541+
}
542+
442543
} // namespace fast_float
443544

444545
#endif

include/fast_float/fast_float.h

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -24,7 +24,7 @@ namespace fast_float {
2424
* to determine whether we allow the fixed point and scientific notation respectively.
2525
* The default is `fast_float::chars_format::general` which allows both `fixed` and `scientific`.
2626
*/
27-
template<typename T, typename UC = char>
27+
template<typename T, typename UC = char, typename = FASTFLOAT_ENABLE_IF(is_supported_float_type<T>())>
2828
FASTFLOAT_CONSTEXPR20
2929
from_chars_result_t<UC> from_chars(UC const * first, UC const * last,
3030
T &value, chars_format fmt = chars_format::general) noexcept;
@@ -36,6 +36,12 @@ template<typename T, typename UC = char>
3636
FASTFLOAT_CONSTEXPR20
3737
from_chars_result_t<UC> from_chars_advanced(UC const * first, UC const * last,
3838
T &value, parse_options_t<UC> options) noexcept;
39+
/**
40+
* from_chars for integer types.
41+
*/
42+
template <typename T, typename UC = char, typename = FASTFLOAT_ENABLE_IF(!is_supported_float_type<T>())>
43+
FASTFLOAT_CONSTEXPR20
44+
from_chars_result_t<UC> from_chars(UC const * first, UC const * last, T& value, int base = 10) noexcept;
3945

4046
} // namespace fast_float
4147
#include "parse_number.h"

include/fast_float/float_common.h

Lines changed: 78 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -173,7 +173,7 @@ using parse_options = parse_options_t<char>;
173173
// rust style `try!()` macro, or `?` operator
174174
#define FASTFLOAT_TRY(x) { if (!(x)) return false; }
175175

176-
#define FASTFLOAT_ENABLE_IF(...) typename std::enable_if<(__VA_ARGS__), int>::type = 0
176+
#define FASTFLOAT_ENABLE_IF(...) typename std::enable_if<(__VA_ARGS__), int>::type
177177

178178

179179
namespace fast_float {
@@ -186,6 +186,20 @@ fastfloat_really_inline constexpr bool cpp20_and_in_constexpr() {
186186
#endif
187187
}
188188

189+
template <typename T>
190+
fastfloat_really_inline constexpr bool is_supported_float_type() {
191+
return std::is_same<T, float>::value || std::is_same<T, double>::value;
192+
}
193+
194+
template <typename UC>
195+
fastfloat_really_inline constexpr bool is_supported_char_type() {
196+
return
197+
std::is_same<UC, char>::value ||
198+
std::is_same<UC, wchar_t>::value ||
199+
std::is_same<UC, char16_t>::value ||
200+
std::is_same<UC, char32_t>::value;
201+
}
202+
189203
// Compares two ASCII strings in a case insensitive manner.
190204
template <typename UC>
191205
inline FASTFLOAT_CONSTEXPR14 bool
@@ -674,6 +688,69 @@ constexpr char32_t const * str_const_inf<char32_t>()
674688
{
675689
return U"infinity";
676690
}
691+
692+
693+
template <typename = void>
694+
struct int_luts {
695+
static constexpr uint8_t chdigit[] = {
696+
255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255,
697+
255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255,
698+
255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255,
699+
0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 255, 255, 255, 255, 255, 255,
700+
255, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24,
701+
25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 255, 255, 255, 255, 255,
702+
255, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24,
703+
25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 255, 255, 255, 255, 255,
704+
255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255,
705+
255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255,
706+
255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255,
707+
255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255,
708+
255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255,
709+
255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255,
710+
255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255,
711+
255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255
712+
};
713+
714+
static constexpr size_t maxdigits_u64[] = {
715+
64, 41, 32, 28, 25, 23, 22, 21,
716+
20, 19, 18, 18, 17, 17, 16, 16,
717+
16, 16, 15, 15, 15, 15, 14, 14,
718+
14, 14, 14, 14, 14, 13, 13, 13,
719+
13, 13, 13
720+
};
721+
722+
static constexpr uint64_t min_safe_u64[] = {
723+
9223372036854775808ull, 12157665459056928801ull, 4611686018427387904, 7450580596923828125, 4738381338321616896,
724+
3909821048582988049, 9223372036854775808ull, 12157665459056928801ull, 10000000000000000000ull, 5559917313492231481,
725+
2218611106740436992, 8650415919381337933, 2177953337809371136, 6568408355712890625, 1152921504606846976,
726+
2862423051509815793, 6746640616477458432, 15181127029874798299ull, 1638400000000000000, 3243919932521508681,
727+
6221821273427820544, 11592836324538749809ull, 876488338465357824, 1490116119384765625, 2481152873203736576,
728+
4052555153018976267, 6502111422497947648, 10260628712958602189ull, 15943230000000000000ull, 787662783788549761,
729+
1152921504606846976, 1667889514952984961, 2386420683693101056, 3379220508056640625, 4738381338321616896
730+
};
731+
};
732+
733+
template <typename T>
734+
constexpr uint8_t int_luts<T>::chdigit[];
735+
736+
template <typename T>
737+
constexpr size_t int_luts<T>::maxdigits_u64[];
738+
739+
template <typename T>
740+
constexpr uint64_t int_luts<T>::min_safe_u64[];
741+
742+
template <typename UC>
743+
fastfloat_really_inline
744+
constexpr uint8_t ch_to_digit(UC c) { return int_luts<>::chdigit[static_cast<unsigned char>(c)]; }
745+
746+
fastfloat_really_inline
747+
constexpr size_t max_digits_u64(int base) { return int_luts<>::maxdigits_u64[base - 2]; }
748+
749+
// If a u64 is exactly max_digits_u64() in length, this is
750+
// the value below which it has definitely overflowed.
751+
fastfloat_really_inline
752+
constexpr uint64_t min_safe_u64(int base) { return int_luts<>::min_safe_u64[base - 2]; }
753+
677754
} // namespace fast_float
678755

679756
#endif

include/fast_float/parse_number.h

Lines changed: 24 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -133,7 +133,7 @@ fastfloat_really_inline bool rounds_to_nearest() noexcept {
133133

134134
} // namespace detail
135135

136-
template<typename T, typename UC>
136+
template<typename T, typename UC, typename>
137137
FASTFLOAT_CONSTEXPR20
138138
from_chars_result_t<UC> from_chars(UC const * first, UC const * last,
139139
T &value, chars_format fmt /*= chars_format::general*/) noexcept {
@@ -145,11 +145,8 @@ FASTFLOAT_CONSTEXPR20
145145
from_chars_result_t<UC> from_chars_advanced(UC const * first, UC const * last,
146146
T &value, parse_options_t<UC> options) noexcept {
147147

148-
static_assert (std::is_same<T, double>::value || std::is_same<T, float>::value, "only float and double are supported");
149-
static_assert (std::is_same<UC, char>::value ||
150-
std::is_same<UC, wchar_t>::value ||
151-
std::is_same<UC, char16_t>::value ||
152-
std::is_same<UC, char32_t>::value , "only char, wchar_t, char16_t and char32_t are supported");
148+
static_assert (is_supported_float_type<T>(), "only float and double are supported");
149+
static_assert (is_supported_char_type<UC>(), "only char, wchar_t, char16_t and char32_t are supported");
153150

154151
from_chars_result_t<UC> answer;
155152
#ifdef FASTFLOAT_SKIP_WHITE_SPACE // disabled by default
@@ -232,6 +229,27 @@ from_chars_result_t<UC> from_chars_advanced(UC const * first, UC const * last,
232229
return answer;
233230
}
234231

232+
233+
template <typename T, typename UC, typename>
234+
FASTFLOAT_CONSTEXPR20
235+
from_chars_result_t<UC> from_chars(UC const* first, UC const* last, T& value, int base) noexcept
236+
{
237+
static_assert (is_supported_char_type<UC>(), "only char, wchar_t, char16_t and char32_t are supported");
238+
239+
from_chars_result_t<UC> answer;
240+
#ifdef FASTFLOAT_SKIP_WHITE_SPACE // disabled by default
241+
while ((first != last) && fast_float::is_space(uint8_t(*first))) {
242+
first++;
243+
}
244+
#endif
245+
if (first == last || base < 2 || base > 36) {
246+
answer.ec = std::errc::invalid_argument;
247+
answer.ptr = first;
248+
return answer;
249+
}
250+
return parse_int_string(first, last, value, base);
251+
}
252+
235253
} // namespace fast_float
236254

237255
#endif

tests/CMakeLists.txt

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -71,7 +71,7 @@ endif()
7171
fast_float_add_cpp_test(long_test)
7272
fast_float_add_cpp_test(powersoffive_hardround)
7373
fast_float_add_cpp_test(string_test)
74-
74+
fast_float_add_cpp_test(fast_int)
7575
fast_float_add_cpp_test(json_fmt)
7676
fast_float_add_cpp_test(fortran)
7777

0 commit comments

Comments
 (0)