diff --git a/Cargo.toml b/Cargo.toml
index c44a4d62f0a..cc6b64b35d7 100644
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -23,7 +23,7 @@ nightly = ["i128_support", "simd_support"] # enables all features requiring nigh
 std = ["rand_core/std", "alloc", "libc", "winapi", "cloudabi", "fuchsia-zircon"]
 alloc = ["rand_core/alloc"]  # enables Vec and Box support (without std)
 i128_support = [] # enables i128 and u128 support
-simd_support = [] # enables SIMD support
+simd_support = ["packed_simd"] # enables SIMD support
 serde1 = ["serde", "serde_derive", "rand_core/serde1"] # enables serialization for PRNGs
 
 [workspace]
@@ -34,6 +34,7 @@ rand_core = { path = "rand_core", version = "0.2", default-features = false }
 # only for deprecations and benches:
 rand_isaac = { path = "rand_isaac", version = "0.1", default-features = false }
 log = { version = "0.4", optional = true }
+packed_simd = { git = "https://github.com/gnzlbg/packed_simd", optional = true, features = ["into_bits"] }
 serde = { version = "1", optional = true }
 serde_derive = { version = "1", optional = true }
 
diff --git a/src/distributions/float.rs b/src/distributions/float.rs
index 0d418ebdc74..d7a5bb98bd3 100644
--- a/src/distributions/float.rs
+++ b/src/distributions/float.rs
@@ -15,7 +15,7 @@ use Rng;
 use distributions::{Distribution, Standard};
 use distributions::utils::FloatSIMDUtils;
 #[cfg(feature="simd_support")]
-use core::simd::*;
+use packed_simd::*;
 
 /// A distribution to sample floating point numbers uniformly in the half-open
 /// interval `(0, 1]`, i.e. including 1 but not 0.
@@ -106,7 +106,7 @@ macro_rules! float_impls {
                 // Multiply-based method; 24/53 random bits; [0, 1) interval.
                 // We use the most significant bits because for simple RNGs
                 // those are usually more random.
-                let float_size = mem::size_of::<$f_scalar>() * 8;
+                let float_size = mem::size_of::<$f_scalar>() as u32 * 8;
                 let precision = $fraction_bits + 1;
                 let scale = 1.0 / ((1 as $u_scalar << precision) as $f_scalar);
 
@@ -121,7 +121,7 @@ macro_rules! float_impls {
                 // Multiply-based method; 24/53 random bits; (0, 1] interval.
                 // We use the most significant bits because for simple RNGs
                 // those are usually more random.
-                let float_size = mem::size_of::<$f_scalar>() * 8;
+                let float_size = mem::size_of::<$f_scalar>() as u32 * 8;
                 let precision = $fraction_bits + 1;
                 let scale = 1.0 / ((1 as $u_scalar << precision) as $f_scalar);
 
@@ -138,7 +138,7 @@ macro_rules! float_impls {
                 // We use the most significant bits because for simple RNGs
                 // those are usually more random.
                 use core::$f_scalar::EPSILON;
-                let float_size = mem::size_of::<$f_scalar>() * 8;
+                let float_size = mem::size_of::<$f_scalar>() as u32 * 8;
 
                 let value: $uty = rng.gen();
                 let fraction = value >> (float_size - $fraction_bits);
@@ -174,7 +174,7 @@ mod tests {
     use distributions::{Open01, OpenClosed01};
     use rngs::mock::StepRng;
     #[cfg(feature="simd_support")]
-    use core::simd::*;
+    use packed_simd::*;
 
     const EPSILON32: f32 = ::core::f32::EPSILON;
     const EPSILON64: f64 = ::core::f64::EPSILON;
diff --git a/src/distributions/integer.rs b/src/distributions/integer.rs
index 82efd9ba795..0fce20f6de7 100644
--- a/src/distributions/integer.rs
+++ b/src/distributions/integer.rs
@@ -13,7 +13,7 @@
 use {Rng};
 use distributions::{Distribution, Standard};
 #[cfg(feature="simd_support")]
-use core::simd::*;
+use packed_simd::*;
 
 impl Distribution<u8> for Standard {
     #[inline]
diff --git a/src/distributions/uniform.rs b/src/distributions/uniform.rs
index bf68e45f51a..1cd7a3e1e45 100644
--- a/src/distributions/uniform.rs
+++ b/src/distributions/uniform.rs
@@ -124,7 +124,7 @@ use distributions::utils::Float;
 
 
 #[cfg(feature="simd_support")]
-use core::simd::*;
+use packed_simd::*;
 
 /// Sample values uniformly between two bounds.
 ///
@@ -571,7 +571,7 @@ macro_rules! uniform_float_impl {
 
             fn sample<R: Rng + ?Sized>(&self, rng: &mut R) -> Self::X {
                 // Generate a value in the range [1, 2)
-                let value1_2 = (rng.gen::<$uty>() >> $bits_to_discard as u8)
+                let value1_2 = (rng.gen::<$uty>() >> $bits_to_discard)
                                .into_float_with_exponent(0);
 
                 // Get a value in the range [0, 1) in order to avoid
@@ -600,7 +600,7 @@ macro_rules! uniform_float_impl {
 
                 loop {
                     // Generate a value in the range [1, 2)
-                    let value1_2 = (rng.gen::<$uty>() >> $bits_to_discard as u32)
+                    let value1_2 = (rng.gen::<$uty>() >> $bits_to_discard)
                                    .into_float_with_exponent(0);
 
                     // Get a value in the range [0, 1) in order to avoid
@@ -785,7 +785,7 @@ mod tests {
     use rngs::mock::StepRng;
     use distributions::uniform::Uniform;
     use distributions::utils::FloatAsSIMD;
-    #[cfg(feature="simd_support")] use core::simd::*;
+    #[cfg(feature="simd_support")] use packed_simd::*;
 
     #[should_panic]
     #[test]
diff --git a/src/distributions/utils.rs b/src/distributions/utils.rs
index 8ac2c66dcb6..4bd98ff1a48 100644
--- a/src/distributions/utils.rs
+++ b/src/distributions/utils.rs
@@ -11,7 +11,7 @@
 //! Math helper functions
 
 #[cfg(feature="simd_support")]
-use core::simd::*;
+use packed_simd::*;
 #[cfg(feature="std")]
 use distributions::ziggurat_tables;
 #[cfg(feature="std")]
@@ -263,7 +263,7 @@ macro_rules! simd_impl {
                 <$ty>::from_bits(<$uty>::from_bits(self) + <$uty>::from_bits(mask))
             }
             type UInt = $uty;
-            fn cast_from_int(i: Self::UInt) -> Self { $ty::from(i) }
+            fn cast_from_int(i: Self::UInt) -> Self { i.trunc() }
         }
     }
 }
@@ -271,10 +271,10 @@ macro_rules! simd_impl {
 #[cfg(feature="simd_support")] simd_impl! { f32x2, f32, m32x2, u32x2 }
 #[cfg(feature="simd_support")] simd_impl! { f32x4, f32, m32x4, u32x4 }
 #[cfg(feature="simd_support")] simd_impl! { f32x8, f32, m32x8, u32x8 }
-#[cfg(feature="simd_support")] simd_impl! { f32x16, f32, m1x16, u32x16 }
+#[cfg(feature="simd_support")] simd_impl! { f32x16, f32, m32x16, u32x16 }
 #[cfg(feature="simd_support")] simd_impl! { f64x2, f64, m64x2, u64x2 }
 #[cfg(feature="simd_support")] simd_impl! { f64x4, f64, m64x4, u64x4 }
-#[cfg(feature="simd_support")] simd_impl! { f64x8, f64, m1x8, u64x8 }
+#[cfg(feature="simd_support")] simd_impl! { f64x8, f64, m64x8, u64x8 }
 
 /// Calculates ln(gamma(x)) (natural logarithm of the gamma
 /// function) using the Lanczos approximation.
diff --git a/src/lib.rs b/src/lib.rs
index d1611af07ad..7c759894b0b 100644
--- a/src/lib.rs
+++ b/src/lib.rs
@@ -236,6 +236,8 @@
 #[cfg(feature="std")] extern crate std as core;
 #[cfg(all(feature = "alloc", not(feature="std")))] extern crate alloc;
 
+#[cfg(feature="simd_support")] extern crate packed_simd;
+
 #[cfg(test)] #[cfg(feature="serde1")] extern crate bincode;
 #[cfg(feature="serde1")] extern crate serde;
 #[cfg(feature="serde1")] #[macro_use] extern crate serde_derive;