Skip to content

Commit fbb0bdd

Browse files
authored
cleanup: simd runtime detection (#132)
1 parent d745bd2 commit fbb0bdd

File tree

4 files changed

+104
-219
lines changed

4 files changed

+104
-219
lines changed

src/simd/avx2.rs

Lines changed: 22 additions & 28 deletions
Original file line numberDiff line numberDiff line change
@@ -1,29 +1,24 @@
11
use crate::iter::Bytes;
22

3-
pub enum Scan {
4-
/// Returned when an implementation finds a noteworthy token.
5-
Found,
6-
/// Returned when an implementation couldn't keep running because the input was too short.
7-
TooShort,
8-
}
9-
103
#[cfg(target_arch = "x86")]
11-
unsafe fn parse_uri_batch_32(_: &[u8]) -> usize {
4+
pub unsafe fn match_uri_vectored(_: &mut Bytes) {
125
unreachable!("AVX2 detection should be disabled for x86");
136
}
147

8+
#[inline]
159
#[cfg(target_arch = "x86_64")]
16-
#[target_feature(enable = "avx2")]
17-
pub unsafe fn parse_uri_batch_32(bytes: &mut Bytes) -> Scan {
10+
#[target_feature(enable = "avx2", enable = "sse4.2")]
11+
pub unsafe fn match_uri_vectored(bytes: &mut Bytes) {
1812
while bytes.as_ref().len() >= 32 {
1913
let advance = match_url_char_32_avx(bytes.as_ref());
2014
bytes.advance(advance);
2115

2216
if advance != 32 {
23-
return Scan::Found;
17+
return;
2418
}
2519
}
26-
Scan::TooShort
20+
// do both, since avx2 only works when bytes.len() >= 32
21+
super::sse42::match_uri_vectored(bytes)
2722
}
2823

2924
#[inline(always)]
@@ -69,22 +64,23 @@ unsafe fn match_url_char_32_avx(buf: &[u8]) -> usize {
6964
}
7065

7166
#[cfg(target_arch = "x86")]
72-
unsafe fn match_header_value_batch_32(_: &[u8]) -> usize {
67+
pub unsafe fn match_header_value_vectored(_: &mut Bytes) {
7368
unreachable!("AVX2 detection should be disabled for x86");
7469
}
7570

7671
#[cfg(target_arch = "x86_64")]
77-
#[target_feature(enable = "avx2")]
78-
pub unsafe fn match_header_value_batch_32(bytes: &mut Bytes) -> Scan {
72+
#[target_feature(enable = "avx2", enable = "sse4.2")]
73+
pub unsafe fn match_header_value_vectored(bytes: &mut Bytes) {
7974
while bytes.as_ref().len() >= 32 {
8075
let advance = match_header_value_char_32_avx(bytes.as_ref());
8176
bytes.advance(advance);
8277

8378
if advance != 32 {
84-
return Scan::Found;
79+
return;
8580
}
8681
}
87-
Scan::TooShort
82+
// do both, since avx2 only works when bytes.len() >= 32
83+
super::sse42::match_header_value_vectored(bytes)
8884
}
8985

9086
#[inline(always)]
@@ -120,17 +116,16 @@ unsafe fn match_header_value_char_32_avx(buf: &[u8]) -> usize {
120116

121117
#[test]
122118
fn avx2_code_matches_uri_chars_table() {
123-
match super::detect() {
124-
super::AVX_2 | super::AVX_2_AND_SSE_42 => {},
125-
_ => return,
119+
if !is_x86_feature_detected!("avx2") {
120+
return;
126121
}
127122

128123
unsafe {
129-
assert!(byte_is_allowed(b'_', parse_uri_batch_32));
124+
assert!(byte_is_allowed(b'_', match_uri_vectored));
130125

131126
for (b, allowed) in crate::URI_MAP.iter().cloned().enumerate() {
132127
assert_eq!(
133-
byte_is_allowed(b as u8, parse_uri_batch_32), allowed,
128+
byte_is_allowed(b as u8, match_uri_vectored), allowed,
134129
"byte_is_allowed({:?}) should be {:?}", b, allowed,
135130
);
136131
}
@@ -139,25 +134,24 @@ fn avx2_code_matches_uri_chars_table() {
139134

140135
#[test]
141136
fn avx2_code_matches_header_value_chars_table() {
142-
match super::detect() {
143-
super::AVX_2 | super::AVX_2_AND_SSE_42 => {},
144-
_ => return,
137+
if !is_x86_feature_detected!("avx2") {
138+
return;
145139
}
146140

147141
unsafe {
148-
assert!(byte_is_allowed(b'_', match_header_value_batch_32));
142+
assert!(byte_is_allowed(b'_', match_header_value_vectored));
149143

150144
for (b, allowed) in crate::HEADER_VALUE_MAP.iter().cloned().enumerate() {
151145
assert_eq!(
152-
byte_is_allowed(b as u8, match_header_value_batch_32), allowed,
146+
byte_is_allowed(b as u8, match_header_value_vectored), allowed,
153147
"byte_is_allowed({:?}) should be {:?}", b, allowed,
154148
);
155149
}
156150
}
157151
}
158152

159153
#[cfg(test)]
160-
unsafe fn byte_is_allowed(byte: u8, f: unsafe fn(bytes: &mut Bytes<'_>) -> Scan) -> bool {
154+
unsafe fn byte_is_allowed(byte: u8, f: unsafe fn(bytes: &mut Bytes<'_>)) -> bool {
161155
let slice = [
162156
b'_', b'_', b'_', b'_',
163157
b'_', b'_', b'_', b'_',

src/simd/mod.rs

Lines changed: 19 additions & 179 deletions
Original file line numberDiff line numberDiff line change
@@ -38,45 +38,6 @@ mod sse42;
3838
))]
3939
mod avx2;
4040

41-
#[cfg(all(
42-
httparse_simd,
43-
any(
44-
target_arch = "x86",
45-
target_arch = "x86_64",
46-
),
47-
))]
48-
pub const SSE_42: usize = 1;
49-
#[cfg(all(
50-
httparse_simd,
51-
any(not(httparse_simd_target_feature_sse42), httparse_simd_target_feature_avx2),
52-
any(
53-
target_arch = "x86",
54-
target_arch = "x86_64",
55-
),
56-
))]
57-
pub const AVX_2: usize = 2;
58-
#[cfg(all(
59-
httparse_simd,
60-
any(
61-
not(httparse_simd_target_feature_sse42),
62-
httparse_simd_target_feature_avx2,
63-
test,
64-
),
65-
any(
66-
target_arch = "x86",
67-
target_arch = "x86_64",
68-
),
69-
))]
70-
pub const AVX_2_AND_SSE_42: usize = 3;
71-
72-
#[cfg(all(
73-
httparse_simd,
74-
any(
75-
target_arch = "x86",
76-
target_arch = "x86_64",
77-
),
78-
))]
79-
const NONE: usize = std::usize::MAX;
8041
#[cfg(all(
8142
httparse_simd,
8243
not(any(
@@ -88,77 +49,7 @@ const NONE: usize = std::usize::MAX;
8849
target_arch = "x86_64",
8950
),
9051
))]
91-
mod runtime {
92-
//! Runtime detection of simd features. Used when the build script
93-
//! doesn't notice any target features at build time.
94-
//!
95-
//! While `is_x86_feature_detected!` has it's own caching built-in,
96-
//! at least in 1.27.0, the functions don't inline, leaving using it
97-
//! actually *slower* than just using the scalar fallback.
98-
99-
use core::sync::atomic::{AtomicUsize, Ordering};
100-
101-
static FEATURE: AtomicUsize = AtomicUsize::new(0);
102-
103-
const INIT: usize = 0;
104-
105-
pub fn detect() -> usize {
106-
let feat = FEATURE.load(Ordering::Relaxed);
107-
if feat == INIT {
108-
if cfg!(target_arch = "x86_64") && is_x86_feature_detected!("avx2") {
109-
if is_x86_feature_detected!("sse4.2") {
110-
FEATURE.store(super::AVX_2_AND_SSE_42, Ordering::Relaxed);
111-
return super::AVX_2_AND_SSE_42;
112-
} else {
113-
FEATURE.store(super::AVX_2, Ordering::Relaxed);
114-
return super::AVX_2;
115-
}
116-
} else if is_x86_feature_detected!("sse4.2") {
117-
FEATURE.store(super::SSE_42, Ordering::Relaxed);
118-
return super::SSE_42;
119-
} else {
120-
FEATURE.store(super::NONE, Ordering::Relaxed);
121-
}
122-
}
123-
feat
124-
}
125-
126-
pub fn match_uri_vectored(bytes: &mut crate::iter::Bytes) {
127-
unsafe {
128-
match detect() {
129-
super::SSE_42 => super::sse42::parse_uri_batch_16(bytes),
130-
super::AVX_2 => { super::avx2::parse_uri_batch_32(bytes); },
131-
super::AVX_2_AND_SSE_42 => {
132-
if let super::avx2::Scan::Found = super::avx2::parse_uri_batch_32(bytes) {
133-
return;
134-
}
135-
super::sse42::parse_uri_batch_16(bytes)
136-
},
137-
_ => ()
138-
}
139-
}
140-
141-
// else do nothing
142-
}
143-
144-
pub fn match_header_value_vectored(bytes: &mut crate::iter::Bytes) {
145-
unsafe {
146-
match detect() {
147-
super::SSE_42 => super::sse42::match_header_value_batch_16(bytes),
148-
super::AVX_2 => { super::avx2::match_header_value_batch_32(bytes); },
149-
super::AVX_2_AND_SSE_42 => {
150-
if let super::avx2::Scan::Found = super::avx2::match_header_value_batch_32(bytes) {
151-
return;
152-
}
153-
super::sse42::match_header_value_batch_16(bytes)
154-
},
155-
_ => ()
156-
}
157-
}
158-
159-
// else do nothing
160-
}
161-
}
52+
mod runtime;
16253

16354
#[cfg(all(
16455
httparse_simd,
@@ -183,32 +74,16 @@ pub use self::runtime::*;
18374
),
18475
))]
18576
mod sse42_compile_time {
186-
pub fn match_uri_vectored(bytes: &mut crate::iter::Bytes) {
187-
if detect() == super::SSE_42 {
188-
unsafe {
189-
super::sse42::parse_uri_batch_16(bytes);
190-
}
191-
}
192-
193-
// else do nothing
77+
#[inline(always)]
78+
pub fn match_uri_vectored(b: &mut crate::iter::Bytes<'_>) {
79+
// SAFETY: calls are guarded by a compile time feature check
80+
unsafe { crate::simd::sse42::match_uri_vectored(b) }
19481
}
195-
196-
pub fn match_header_value_vectored(bytes: &mut crate::iter::Bytes) {
197-
if detect() == super::SSE_42 {
198-
unsafe {
199-
super::sse42::match_header_value_batch_16(bytes);
200-
}
201-
}
202-
203-
// else do nothing
204-
}
205-
206-
pub fn detect() -> usize {
207-
if is_x86_feature_detected!("sse4.2") {
208-
super::SSE_42
209-
} else {
210-
super::NONE
211-
}
82+
83+
#[inline(always)]
84+
pub fn match_header_value_vectored(b: &mut crate::iter::Bytes<'_>) {
85+
// SAFETY: calls are guarded by a compile time feature check
86+
unsafe { crate::simd::sse42::match_header_value_vectored(b) }
21287
}
21388
}
21489

@@ -232,51 +107,16 @@ pub use self::sse42_compile_time::*;
232107
),
233108
))]
234109
mod avx2_compile_time {
235-
pub fn match_uri_vectored(bytes: &mut crate::iter::Bytes) {
236-
// do both, since avx2 only works when bytes.len() >= 32
237-
if detect() == super::AVX_2_AND_SSE_42 {
238-
unsafe {
239-
super::avx2::parse_uri_batch_32(bytes);
240-
}
241-
242-
}
243-
if detect() == super::SSE_42 {
244-
unsafe {
245-
super::sse42::parse_uri_batch_16(bytes);
246-
}
247-
}
248-
249-
// else do nothing
110+
#[inline(always)]
111+
pub fn match_uri_vectored(b: &mut crate::iter::Bytes<'_>) {
112+
// SAFETY: calls are guarded by a compile time feature check
113+
unsafe { crate::simd::avx2::match_uri_vectored(b) }
250114
}
251-
252-
pub fn match_header_value_vectored(bytes: &mut crate::iter::Bytes) {
253-
// do both, since avx2 only works when bytes.len() >= 32
254-
if detect() == super::AVX_2_AND_SSE_42 {
255-
let scanned = unsafe {
256-
super::avx2::match_header_value_batch_32(bytes)
257-
};
258-
259-
if let super::avx2::Scan::Found = scanned {
260-
return;
261-
}
262-
}
263-
if detect() == super::SSE_42 {
264-
unsafe {
265-
super::sse42::match_header_value_batch_16(bytes);
266-
}
267-
}
268-
269-
// else do nothing
270-
}
271-
272-
pub fn detect() -> usize {
273-
if cfg!(target_arch = "x86_64") && is_x86_feature_detected!("avx2") {
274-
super::AVX_2_AND_SSE_42
275-
} else if is_x86_feature_detected!("sse4.2") {
276-
super::SSE_42
277-
} else {
278-
super::NONE
279-
}
115+
116+
#[inline(always)]
117+
pub fn match_header_value_vectored(b: &mut crate::iter::Bytes<'_>) {
118+
// SAFETY: calls are guarded by a compile time feature check
119+
unsafe { crate::simd::avx2::match_header_value_vectored(b) }
280120
}
281121
}
282122

0 commit comments

Comments
 (0)