Skip to content

Vectorize find_first_not_of/find_last_not_of member functions (multiple characters overloads) #5206

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 16 commits into from
Mar 24, 2025
Merged
Show file tree
Hide file tree
Changes from 14 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
56 changes: 42 additions & 14 deletions benchmarks/src/find_first_of.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -5,10 +5,7 @@
#include <benchmark/benchmark.h>
#include <cstddef>
#include <cstdint>
#include <cstdio>
#include <cstdlib>
#include <limits>
#include <numeric>
#include <string>
#include <type_traits>
#include <vector>
Expand All @@ -17,9 +14,15 @@

using namespace std;

enum class AlgType { std_func, str_member_first, str_member_last };
enum class AlgType {
std_func,
str_member_first,
str_member_last,
str_member_first_not,
str_member_last_not,
};

template <AlgType Alg, class T, T Start = T{'!'}>
template <AlgType Alg, class T, T NeedleFillerBase = T{'a'}>
void bm(benchmark::State& state) {
const size_t Pos = static_cast<size_t>(state.range(0));
const size_t NSize = static_cast<size_t>(state.range(1));
Expand All @@ -29,24 +32,37 @@ void bm(benchmark::State& state) {
using container = conditional_t<Alg == AlgType::std_func, vector<T, not_highly_aligned_allocator<T>>,
basic_string<T, char_traits<T>, not_highly_aligned_allocator<T>>>;

constexpr T HaystackFiller{' '};
static_assert(HaystackFiller < Start, "The following iota() should not produce the haystack filler.");
constexpr size_t IncrementCap = 16;

container h(HSize, HaystackFiller);
constexpr T HaystackFillerBase = T{' '};
static_assert(
NeedleFillerBase + IncrementCap <= HaystackFillerBase || HaystackFillerBase + IncrementCap <= NeedleFillerBase,
"Would match where it shouldn't");

container h(HSize, T{0});
container n(NSize, T{0});

if (NSize - 1 > static_cast<size_t>(numeric_limits<T>::max()) - static_cast<size_t>(Start)) {
puts("ERROR: The following iota() would overflow.");
abort();
for (size_t i = 0; i != NSize; ++i) {
n[i] = NeedleFillerBase + i % IncrementCap;
}

iota(n.begin(), n.end(), Start);

if (Pos >= HSize || Which >= NSize) {
abort();
}

h[Pos] = n[Which];
if constexpr (Alg == AlgType::str_member_first_not || Alg == AlgType::str_member_last_not) {
for (size_t i = 0; i != HSize; ++i) {
h[i] = n[(i + Which) % NSize];
}

h[Pos] = HaystackFillerBase;
} else {
for (size_t i = 0; i != HSize; ++i) {
h[i] = HaystackFillerBase + i % IncrementCap;
}

h[Pos] = n[Which];
}

for (auto _ : state) {
benchmark::DoNotOptimize(h);
Expand All @@ -55,6 +71,10 @@ void bm(benchmark::State& state) {
benchmark::DoNotOptimize(h.find_first_of(n));
} else if constexpr (Alg == AlgType::str_member_last) {
benchmark::DoNotOptimize(h.find_last_of(n));
} else if constexpr (Alg == AlgType::str_member_first_not) {
benchmark::DoNotOptimize(h.find_first_not_of(n));
} else if constexpr (Alg == AlgType::str_member_last_not) {
benchmark::DoNotOptimize(h.find_last_not_of(n));
} else {
benchmark::DoNotOptimize(find_first_of(h.begin(), h.end(), n.begin(), n.end()));
}
Expand Down Expand Up @@ -82,4 +102,12 @@ BENCHMARK(bm<AlgType::str_member_last, char>)->Apply(common_args);
BENCHMARK(bm<AlgType::str_member_last, wchar_t>)->Apply(common_args);
BENCHMARK(bm<AlgType::str_member_last, wchar_t, L'\x03B1'>)->Apply(common_args);

BENCHMARK(bm<AlgType::str_member_first_not, char>)->Apply(common_args);
BENCHMARK(bm<AlgType::str_member_first_not, wchar_t>)->Apply(common_args);
BENCHMARK(bm<AlgType::str_member_first_not, wchar_t, L'\x03B1'>)->Apply(common_args);

BENCHMARK(bm<AlgType::str_member_last_not, char>)->Apply(common_args);
BENCHMARK(bm<AlgType::str_member_last_not, wchar_t>)->Apply(common_args);
BENCHMARK(bm<AlgType::str_member_last_not, wchar_t, L'\x03B1'>)->Apply(common_args);

BENCHMARK_MAIN();
62 changes: 62 additions & 0 deletions stl/inc/__msvc_string_view.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -43,6 +43,16 @@ __declspec(noalias) size_t __stdcall __std_find_last_of_trivial_pos_1(
__declspec(noalias) size_t __stdcall __std_find_last_of_trivial_pos_2(
const void* _Haystack, size_t _Haystack_length, const void* _Needle, size_t _Needle_length) noexcept;

__declspec(noalias) size_t __stdcall __std_find_first_not_of_trivial_pos_1(
const void* _Haystack, size_t _Haystack_length, const void* _Needle, size_t _Needle_length) noexcept;
__declspec(noalias) size_t __stdcall __std_find_first_not_of_trivial_pos_2(
const void* _Haystack, size_t _Haystack_length, const void* _Needle, size_t _Needle_length) noexcept;

__declspec(noalias) size_t __stdcall __std_find_last_not_of_trivial_pos_1(
const void* _Haystack, size_t _Haystack_length, const void* _Needle, size_t _Needle_length) noexcept;
__declspec(noalias) size_t __stdcall __std_find_last_not_of_trivial_pos_2(
const void* _Haystack, size_t _Haystack_length, const void* _Needle, size_t _Needle_length) noexcept;

} // extern "C"

_STD_BEGIN
Expand Down Expand Up @@ -77,6 +87,32 @@ size_t _Find_last_of_pos_vectorized(const _Ty1* const _Haystack, const size_t _H
}
}

template <class _Ty1, class _Ty2>
size_t _Find_first_not_of_pos_vectorized(const _Ty1* const _Haystack, const size_t _Haystack_length,
const _Ty2* const _Needle, const size_t _Needle_length) noexcept {
_STL_INTERNAL_STATIC_ASSERT(sizeof(_Ty1) == sizeof(_Ty2));
if constexpr (sizeof(_Ty1) == 1) {
return ::__std_find_first_not_of_trivial_pos_1(_Haystack, _Haystack_length, _Needle, _Needle_length);
} else if constexpr (sizeof(_Ty1) == 2) {
return ::__std_find_first_not_of_trivial_pos_2(_Haystack, _Haystack_length, _Needle, _Needle_length);
} else {
_STL_INTERNAL_STATIC_ASSERT(false); // unexpected size
}
}

template <class _Ty1, class _Ty2>
size_t _Find_last_not_of_pos_vectorized(const _Ty1* const _Haystack, const size_t _Haystack_length,
const _Ty2* const _Needle, const size_t _Needle_length) noexcept {
_STL_INTERNAL_STATIC_ASSERT(sizeof(_Ty1) == sizeof(_Ty2));
if constexpr (sizeof(_Ty1) == 1) {
return ::__std_find_last_not_of_trivial_pos_1(_Haystack, _Haystack_length, _Needle, _Needle_length);
} else if constexpr (sizeof(_Ty1) == 2) {
return ::__std_find_last_not_of_trivial_pos_2(_Haystack, _Haystack_length, _Needle, _Needle_length);
} else {
_STL_INTERNAL_STATIC_ASSERT(false); // unexpected size
}
}

_STD_END

#endif // _USE_STD_VECTOR_ALGORITHMS
Expand Down Expand Up @@ -961,6 +997,21 @@ constexpr size_t _Traits_find_first_not_of(_In_reads_(_Hay_size) const _Traits_p

if constexpr (_Is_implementation_handled_char_traits<_Traits>) {
using _Elem = typename _Traits::char_type;
#if _USE_STD_VECTOR_ALGORITHMS
if constexpr (sizeof(_Elem) <= 2) {
if (!_STD _Is_constant_evaluated()) {
const size_t _Remaining_size = _Hay_size - _Start_at;
if (_Remaining_size + _Needle_size >= _Threshold_find_first_of) {
size_t _Pos = _Find_first_not_of_pos_vectorized(_Hay_start, _Remaining_size, _Needle, _Needle_size);
if (_Pos != static_cast<size_t>(-1)) {
_Pos += _Start_at;
}
return _Pos;
}
}
}
#endif // _USE_STD_VECTOR_ALGORITHMS

_String_bitmap<_Elem> _Matches;
if (_Matches._Mark(_Needle, _Needle + _Needle_size)) {
for (auto _Match_try = _Hay_start; _Match_try < _Hay_end; ++_Match_try) {
Expand Down Expand Up @@ -1012,6 +1063,17 @@ constexpr size_t _Traits_find_last_not_of(_In_reads_(_Hay_size) const _Traits_pt

if constexpr (_Is_implementation_handled_char_traits<_Traits>) {
using _Elem = typename _Traits::char_type;
#if _USE_STD_VECTOR_ALGORITHMS
if constexpr (sizeof(_Elem) <= 2) {
if (!_STD _Is_constant_evaluated()) {
const size_t _Remaining_size = _Hay_start + 1;
if (_Remaining_size + _Needle_size >= _Threshold_find_first_of) { // same threshold for first/last
return _Find_last_not_of_pos_vectorized(_Haystack, _Remaining_size, _Needle, _Needle_size);
}
}
}
#endif // _USE_STD_VECTOR_ALGORITHMS

_String_bitmap<_Elem> _Matches;
if (_Matches._Mark(_Needle, _Needle + _Needle_size)) {
for (auto _Match_try = _Haystack + _Hay_start;; --_Match_try) {
Expand Down
Loading