Skip to content

Commit

Permalink
moved edition to 2021, fixed clippy warnings, took rustfmt suggestions
Browse files Browse the repository at this point in the history
  • Loading branch information
BartMassey committed Jun 28, 2024
1 parent 1f0f9c5 commit 2b29397
Show file tree
Hide file tree
Showing 2 changed files with 58 additions and 51 deletions.
2 changes: 1 addition & 1 deletion Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@ name = "popcount"
version = "0.0.1"
authors = [ "Bart Massey <[email protected]>" ]
license-file = "LICENSE"
edition = "2018"
edition = "2021"

[[bin]]
name = "popcount_rs"
Expand Down
107 changes: 57 additions & 50 deletions popcount.rs
Original file line number Diff line number Diff line change
Expand Up @@ -6,15 +6,14 @@
#[macro_use]
extern crate lazy_static;

use std::env;
use std::time;
#[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
use std::arch::asm;
use std::env;
use std::time;

const BLOCKSIZE: usize = 1000;
const PREHEAT_BASE: u32 = 5000;


mod prng {
/// Boring and probably bad linear congruential
/// pseudo-random number generator to deterministically
Expand All @@ -29,13 +28,15 @@ mod prng {
const M: u64 = 85876534675u64;
const A: u64 = 116895888786u64;

pub struct PRNG {
state: u64
pub struct Prng {
state: u64,
}

impl PRNG {
pub fn new() -> PRNG {
PRNG{ state: A.wrapping_mul(0x123456789abcdef0u64 % M) % M }
impl Prng {
pub fn new() -> Prng {
Prng {
state: A.wrapping_mul(0x123456789abcdef0u64 % M) % M,
}
}

pub fn next(&mut self) -> u32 {
Expand All @@ -47,14 +48,14 @@ mod prng {

struct Driver {
name: &'static str,
f: &'static dyn Fn (u32) -> u32,
blockf: &'static dyn Fn (u32, &[u32; BLOCKSIZE]) -> u32,
divisor: u32
f: &'static dyn Fn(u32) -> u32,
blockf: &'static dyn Fn(u32, &[u32; BLOCKSIZE]) -> u32,
divisor: u32,
}

macro_rules! driver {
($drive:ident, $popcount:ident, $entry:ident, $div:expr) => {
fn $drive(n: u32, randoms: &[u32;BLOCKSIZE]) -> u32 {
fn $drive(n: u32, randoms: &[u32; BLOCKSIZE]) -> u32 {
let mut result = 0u32;
for _ in 0..n {
for i in randoms.iter() {
Expand All @@ -67,7 +68,7 @@ macro_rules! driver {
name: stringify!($popcount),
f: &$popcount,
blockf: &$drive,
divisor: $div
divisor: $div,
};
};
}
Expand All @@ -77,8 +78,8 @@ macro_rules! driver {
fn popcount_naive(mut n: u32) -> u32 {
let mut c = 0;
while n > 0 {
c += n & 1;
n >>= 1
c += n & 1;
n >>= 1
}
c
}
Expand All @@ -90,8 +91,8 @@ fn popcount_8(mut n: u32) -> u32 {
let m = 0x01010101u32;
let mut c = n & m;
for _ in 0..7 {
n >>= 1;
c += n & m
n >>= 1;
c += n & m
}
c += c >> 8;
c += c >> 16;
Expand All @@ -105,8 +106,8 @@ fn popcount_6(mut n: u32) -> u32 {
let m = 0x41041041;
let mut c = n & m;
for _ in 0..5 {
n >>= 1;
c += n & m
n >>= 1;
c += n & m
}
c += c >> 6;
c += c >> 12;
Expand All @@ -119,7 +120,7 @@ driver!(drive_6, popcount_6, DRIVER_6, 4);
#[inline(always)]
fn popcount_hakmem(n: u32) -> u32 {
let y = (n >> 1) & 0o33333333333;
let z = n - y - ((y >>1) & 0o33333333333);
let z = n - y - ((y >> 1) & 0o33333333333);
((z + (z >> 3)) & 0o30707070707) % 63
}
driver!(drive_hakmem, popcount_hakmem, DRIVER_HAKMEM, 4);
Expand All @@ -138,7 +139,7 @@ fn remu63(n: u32) -> u32 {
#[inline(always)]
fn popcount_keane(n: u32) -> u32 {
let y = (n >> 1) & 0o33333333333;
let z = n - y - ((y >>1) & 0o33333333333);
let z = n - y - ((y >> 1) & 0o33333333333);
remu63((z + (z >> 3)) & 0o30707070707)
}
driver!(drive_keane, popcount_keane, DRIVER_KEANE, 4);
Expand All @@ -149,9 +150,9 @@ driver!(drive_keane, popcount_keane, DRIVER_KEANE, 4);
fn popcount_anderson(n: u32) -> u32 {
let p = 0x1001001001001u64;
let m = 0x84210842108421u64;
let mut c = ((n & 0xfffu32) as u64 * p & m) % 0x1fu64;
c += (((n & 0xfff000u32) >> 12) as u64 * p & m) % 0x1fu64;
c += ((n >> 24) as u64 * p & m) % 0x1fu64;
let mut c = (((n & 0xfffu32) as u64 * p) & m) % 0x1fu64;
c += ((((n & 0xfff000u32) >> 12) as u64 * p) & m) % 0x1fu64;
c += (((n >> 24) as u64 * p) & m) % 0x1fu64;
c as u32
}
driver!(drive_anderson, popcount_anderson, DRIVER_ANDERSON, 6);
Expand All @@ -162,7 +163,7 @@ fn popcount_3(mut n: u32) -> u32 {
let m1 = 0x55555555;
let m2 = 0xc30c30c3;
n -= (n >> 1) & m1;
n = (n & m2) + ((n >> 2) & m2) + ((n >> 4) & m2);
n = (n & m2) + ((n >> 2) & m2) + ((n >> 4) & m2);
n += n >> 6;
(n + (n >> 12) + (n >> 24)) & 0x3f
}
Expand Down Expand Up @@ -192,8 +193,8 @@ fn popcount_2(mut n: u32) -> u32 {
n -= (n >> 1) & m1;
n = (n & m2) + ((n >> 2) & m2);
n = (n + (n >> 4)) & m4;
n += n >> 8;
return (n + (n >> 16)) & 0x3f;
n += n >> 8;
(n + (n >> 16)) & 0x3f
}
driver!(drive_2, popcount_2, DRIVER_2, 4);

Expand All @@ -206,9 +207,13 @@ fn popcount_mult(mut n: u32) -> u32 {
let m2 = 0x33333333;
let m4 = 0x0f0f0f0f;
let h01 = 0x01010101;
n -= (n >> 1) & m1; // put count of each 2 bits into those 2 bits
n = (n & m2) + ((n >> 2) & m2); // put count of each 4 bits in
n = (n + (n >> 4)) & m4; // put count of each 8 bits in
// put count of each 2 bits into those 2 bits
n -= (n >> 1) & m1;
// put count of each 4 bits in
n = (n & m2) + ((n >> 2) & m2);
// put count of each 8 bits in
n = (n + (n >> 4)) & m4;

/* XXX This inhibits LLVM from optimizing this whole function to a
popcnt instruction (at least for now) by ensuring that the
multiply is performed.
Expand All @@ -231,31 +236,28 @@ driver!(drive_mult, popcount_mult, DRIVER_MULT, 4);

// 8-bit popcount table, filled at first use.
lazy_static! {
static ref POPCOUNT_TABLE_8: Vec<u32> =
(0..0x100).map(|i| popcount_naive(i)).collect();
static ref POPCOUNT_TABLE_8: Vec<u32> = (0..0x100).map(popcount_naive).collect();
}

// Table-driven popcount, with 8-bit tables
#[inline(always)]
fn popcount_tabular_8(n: u32) -> u32 {
POPCOUNT_TABLE_8[n as u8 as usize] +
POPCOUNT_TABLE_8[(n >> 8) as u8 as usize] +
POPCOUNT_TABLE_8[(n >> 16) as u8 as usize] +
POPCOUNT_TABLE_8[(n >> 24) as usize]
POPCOUNT_TABLE_8[n as u8 as usize]
+ POPCOUNT_TABLE_8[(n >> 8) as u8 as usize]
+ POPCOUNT_TABLE_8[(n >> 16) as u8 as usize]
+ POPCOUNT_TABLE_8[(n >> 24) as usize]
}
driver!(drive_tabular_8, popcount_tabular_8, DRIVER_TABULAR_8, 4);

// 16-bit popcount table, filled at first use.
lazy_static! {
static ref POPCOUNT_TABLE_16: Vec<u32> =
(0..0x10000).map(|i| popcount_naive(i)).collect();
static ref POPCOUNT_TABLE_16: Vec<u32> = (0..0x10000).map(popcount_naive).collect();
}

// Table-driven popcount, with 16-bit tables
#[inline(always)]
fn popcount_tabular_16(n: u32) -> u32 {
POPCOUNT_TABLE_16[n as u16 as usize] +
POPCOUNT_TABLE_16[(n >> 16) as usize]
POPCOUNT_TABLE_16[n as u16 as usize] + POPCOUNT_TABLE_16[(n >> 16) as usize]
}
driver!(drive_tabular_16, popcount_tabular_16, DRIVER_TABULAR_16, 4);

Expand Down Expand Up @@ -339,17 +341,20 @@ fn test_drivers() -> Vec<&'static Driver> {
(0x2e8eb2b2, 16),
(0x9b8be5b7, 20),
(!0, 32),
(0, 0) ];
(0, 0),
];
let mut drivers = Vec::new();
for driver in DRIVERS.iter() {
let mut ok = true;
for (nt, &(input, expected)) in testcases.iter().enumerate() {
let actual = (*driver.f)(input);
if actual != expected {
println!("{} failed case {}: {:#x} -> {} != {}: abandoning",
driver.name, nt, input, actual, expected);
println!(
"{} failed case {}: {:#x} -> {} != {}: abandoning",
driver.name, nt, input, actual, expected
);
ok = false;
break
break;
}
}
if ok {
Expand All @@ -368,7 +373,7 @@ fn main() {
assert!(has_popcnt_x86());
let args: Vec<String> = env::args().collect();
let n: u32 = args[1].parse().expect("invalid count");
let mut rng = prng::PRNG::new();
let mut rng = prng::Prng::new();
let mut randoms = [0u32; BLOCKSIZE];
let mut csum = 0u64;
for i in randoms.iter_mut() {
Expand All @@ -386,11 +391,13 @@ fn main() {
csum += (*driver.blockf)(nblocks, &randoms) as u64;
let runtime = total_time(now.elapsed());
let size = nblocks as f64 * BLOCKSIZE as f64;
println!("{}: {:e} iters in {:.0} msecs for {:.2} nsecs/iter",
driver.name,
size,
runtime * 1.0e3,
(runtime / size) * 1.0e9);
println!(
"{}: {:e} iters in {:.0} msecs for {:.2} nsecs/iter",
driver.name,
size,
runtime * 1.0e3,
(runtime / size) * 1.0e9
);
}
println!("{}", csum);
}

0 comments on commit 2b29397

Please sign in to comment.