|
1 | 1 | use { |
2 | 2 | crate::progress::Progress, |
3 | | - indicatif::{ParallelProgressIterator, ProgressIterator}, |
| 3 | + dashmap::DashMap, |
| 4 | + indicatif::ParallelProgressIterator, |
4 | 5 | rayon::iter::{IntoParallelRefIterator, ParallelIterator}, |
5 | | - std::{collections::HashMap, hash::Hash, mem::size_of}, |
| 6 | + std::{hash::Hash, mem::size_of, num::TryFromIntError}, |
6 | 7 | }; |
7 | 8 |
|
8 | 9 | pub struct Addresses<T> { |
9 | 10 | addresses: Vec<T>, |
10 | 11 | } |
11 | 12 |
|
12 | | -impl<T: Copy + Send + Sync + Default + PartialEq + Eq + Hash> Addresses<T> { |
13 | | - fn read_addresses<F: Fn(&[u8]) -> T + Sync + Send>(bytes: &[u8], convert: F) -> Vec<T> { |
| 13 | +impl< |
| 14 | + T: Copy |
| 15 | + + Send |
| 16 | + + Sync |
| 17 | + + Default |
| 18 | + + PartialEq |
| 19 | + + Eq |
| 20 | + + Hash |
| 21 | + + TryFrom<usize, Error = TryFromIntError>, |
| 22 | + > Addresses<T> |
| 23 | +{ |
| 24 | + fn get_address_frequencies<F: Fn(&[u8]) -> T + Sync + Send>( |
| 25 | + bytes: &[u8], |
| 26 | + convert: F, |
| 27 | + ) -> DashMap<T, usize> { |
14 | 28 | let chunks = bytes.chunks(size_of::<T>()).collect::<Vec<&[u8]>>(); |
15 | 29 | let pb = Progress::get("Reading addresses", chunks.len()); |
| 30 | + let map = DashMap::<T, usize>::new(); |
16 | 31 | chunks |
17 | 32 | .par_iter() |
18 | 33 | .progress_with(pb) |
19 | 34 | .map(|&p| convert(p)) |
20 | 35 | .filter(|&p| p != T::default()) |
21 | | - .collect::<Vec<T>>() |
| 36 | + .for_each(|ptr| { |
| 37 | + *map.entry(ptr).or_insert(0) += 1; |
| 38 | + }); |
| 39 | + map |
22 | 40 | } |
23 | 41 |
|
24 | | - fn get_freqencies(addresses: Vec<T>) -> Vec<HashMap<T, usize>> { |
25 | | - /* Calculate frequencies in parallel */ |
26 | | - let pb = Progress::get("Calculating frequencies", addresses.len()); |
27 | | - addresses |
| 42 | + fn get_unique_addresses(frequencies: DashMap<T, usize>) -> Vec<T> { |
| 43 | + let pb = Progress::get("Finding unique addresses", frequencies.len()); |
| 44 | + frequencies |
28 | 45 | .par_iter() |
29 | 46 | .progress_with(pb) |
30 | | - .fold(HashMap::<T, usize>::new, |mut map, ptr| { |
31 | | - if let Some(v) = map.get(ptr) { |
32 | | - map.insert(*ptr, v + 1); |
| 47 | + .filter_map(|r| { |
| 48 | + let (&k, &v) = r.pair(); |
| 49 | + if v == 1 { |
| 50 | + Some(k) |
33 | 51 | } else { |
34 | | - map.insert(*ptr, 1); |
| 52 | + None |
35 | 53 | } |
36 | | - map |
37 | 54 | }) |
38 | | - .collect::<Vec<HashMap<T, usize>>>() |
39 | | - } |
40 | | - |
41 | | - fn collate_frequencies(frequencies: Vec<HashMap<T, usize>>) -> HashMap<T, usize> { |
42 | | - let pb = Progress::get("Collating frequencies", frequencies.len()); |
43 | | - frequencies.into_iter().progress_with(pb).fold( |
44 | | - HashMap::<T, usize>::new(), |
45 | | - |mut map, chunk| { |
46 | | - for (k, v) in chunk { |
47 | | - if let Some(v) = map.get(&k) { |
48 | | - map.insert(k, v + 1); |
49 | | - } else { |
50 | | - map.insert(k, v); |
51 | | - } |
52 | | - } |
53 | | - map |
54 | | - }, |
55 | | - ) |
56 | | - } |
57 | | - |
58 | | - fn get_unique_addresses(frequencies: HashMap<T, usize>) -> Vec<T> { |
59 | | - let pb = Progress::get("Finding unique addresses", frequencies.len()); |
60 | | - frequencies |
61 | | - .par_iter() |
62 | | - .progress_with(pb) |
63 | | - .filter_map(|(k, v)| if *v == 1 { Some(*k) } else { None }) |
64 | 55 | .collect() |
65 | 56 | } |
66 | 57 |
|
67 | 58 | pub fn new<F: Fn(&[u8]) -> T + Sync + Send + Copy>(bytes: &[u8], convert: F) -> Self { |
68 | | - let addresses = Self::read_addresses(bytes, convert); |
69 | | - println!("Found: {:?} addresses", addresses.len()); |
70 | | - let frequencies = Self::get_freqencies(addresses); |
71 | | - let collated = Self::collate_frequencies(frequencies); |
72 | | - let unique = Self::get_unique_addresses(collated); |
| 59 | + let frequencies = Self::get_address_frequencies(bytes, convert); |
| 60 | + println!("Found: {:?} addresses", frequencies.len()); |
| 61 | + let unique = Self::get_unique_addresses(frequencies); |
73 | 62 | println!("Found: {:?} unique addresses", unique.len()); |
74 | 63 | Self { addresses: unique } |
75 | 64 | } |
|
0 commit comments