From acf35a79216c6adf63498f3c166910121def63a8 Mon Sep 17 00:00:00 2001 From: Vrtgs Date: Wed, 24 Dec 2025 22:10:41 +0300 Subject: [PATCH 01/11] implement Op and use it to implement fs::try_exists --- tokio/src/fs/read.rs | 14 ++++--- tokio/src/fs/try_exists.rs | 35 ++++++++++++++++- tokio/src/fs/write.rs | 69 ++++++++++++++++------------------ tokio/src/io/uring/mod.rs | 1 + tokio/src/io/uring/statx.rs | 66 ++++++++++++++++++++++++++++++++ tokio/src/io/uring/utils.rs | 14 +++++++ tokio/src/runtime/driver/op.rs | 3 ++ 7 files changed, 158 insertions(+), 44 deletions(-) create mode 100644 tokio/src/io/uring/statx.rs diff --git a/tokio/src/fs/read.rs b/tokio/src/fs/read.rs index aabc994e95f..86e3026b0a1 100644 --- a/tokio/src/fs/read.rs +++ b/tokio/src/fs/read.rs @@ -54,7 +54,7 @@ use std::{io, path::Path}; /// } /// ``` pub async fn read(path: impl AsRef) -> io::Result> { - let path = path.as_ref().to_owned(); + let path = path.as_ref(); #[cfg(all( tokio_unstable, @@ -68,13 +68,15 @@ pub async fn read(path: impl AsRef) -> io::Result> { let handle = crate::runtime::Handle::current(); let driver_handle = handle.inner.driver().io(); - if driver_handle - .check_and_init(io_uring::opcode::Read::CODE) - .await? - { - return read_uring(&path).await; + if driver_handle.check_and_init()? { + return read_uring(path).await; } } + read_spawn_blocking(path).await +} + +async fn read_spawn_blocking(path: &Path) -> io::Result> { + let path = path.to_owned(); asyncify(move || std::fs::read(path)).await } diff --git a/tokio/src/fs/try_exists.rs b/tokio/src/fs/try_exists.rs index 2e8de04e0c5..170dedb5de6 100644 --- a/tokio/src/fs/try_exists.rs +++ b/tokio/src/fs/try_exists.rs @@ -23,6 +23,39 @@ use std::path::Path; /// # } /// ``` pub async fn try_exists(path: impl AsRef) -> io::Result { - let path = path.as_ref().to_owned(); + let path = path.as_ref(); + + #[cfg(all( + tokio_unstable, + feature = "io-uring", + feature = "rt", + feature = "fs", + target_os = "linux" + ))] + { + let handle = crate::runtime::Handle::current(); + let driver_handle = handle.inner.driver().io(); + if driver_handle.check_and_init()? { + return try_exists_uring(path).await; + } + } + + try_exists_spawn_blocking(path).await +} + +cfg_io_uring! { + async fn try_exists_uring(path: &Path) -> io::Result { + use crate::runtime::driver::op::Op; + + match Op::metadata(path)?.await { + Ok(_) => Ok(true), + Err(error) if error.kind() == io::ErrorKind::NotFound => Ok(false), + Err(error) => Err(error), + } + } +} + +async fn try_exists_spawn_blocking(path: &Path) -> io::Result { + let path = path.to_owned(); asyncify(move || path.try_exists()).await } diff --git a/tokio/src/fs/write.rs b/tokio/src/fs/write.rs index a2b22fd8cbf..6c9c195016c 100644 --- a/tokio/src/fs/write.rs +++ b/tokio/src/fs/write.rs @@ -48,49 +48,44 @@ pub async fn write(path: impl AsRef, contents: impl AsRef<[u8]>) -> io::Re write_spawn_blocking(path, contents).await } -#[cfg(all( - tokio_unstable, - feature = "io-uring", - feature = "rt", - feature = "fs", - target_os = "linux" -))] -async fn write_uring(path: &Path, mut buf: OwnedBuf) -> io::Result<()> { - use crate::{fs::OpenOptions, runtime::driver::op::Op}; - use std::os::fd::OwnedFd; +cfg_io_uring! { + async fn write_uring(path: &Path, mut buf: OwnedBuf) -> io::Result<()> { + use crate::{fs::OpenOptions, runtime::driver::op::Op}; + use std::os::fd::OwnedFd; - let file = OpenOptions::new() - .write(true) - .create(true) - .truncate(true) - .open(path) - .await?; + let file = OpenOptions::new() + .write(true) + .create(true) + .truncate(true) + .open(path) + .await?; - let mut fd: OwnedFd = file - .try_into_std() - .expect("unexpected in-flight operation detected") - .into(); + let mut fd: OwnedFd = file + .try_into_std() + .expect("unexpected in-flight operation detected") + .into(); - let total: usize = buf.as_ref().len(); - let mut buf_offset: usize = 0; - let mut file_offset: u64 = 0; - while buf_offset < total { - let (res, _buf, _fd) = Op::write_at(fd, buf, buf_offset, file_offset)?.await; + let total: usize = buf.as_ref().len(); + let mut buf_offset: usize = 0; + let mut file_offset: u64 = 0; + while buf_offset < total { + let (res, _buf, _fd) = Op::write_at(fd, buf, buf_offset, file_offset)?.await; - let n = match res { - Ok(0) => return Err(io::ErrorKind::WriteZero.into()), - Ok(n) => n, - Err(e) if e.kind() == io::ErrorKind::Interrupted => 0, - Err(e) => return Err(e), - }; + let n = match res { + Ok(0) => return Err(io::ErrorKind::WriteZero.into()), + Ok(n) => n, + Err(e) if e.kind() == io::ErrorKind::Interrupted => 0, + Err(e) => return Err(e), + }; - buf = _buf; - fd = _fd; - buf_offset += n as usize; - file_offset += n as u64; - } + buf = _buf; + fd = _fd; + buf_offset += n as usize; + file_offset += n as u64; + } - Ok(()) + Ok(()) + } } async fn write_spawn_blocking(path: &Path, contents: OwnedBuf) -> io::Result<()> { diff --git a/tokio/src/io/uring/mod.rs b/tokio/src/io/uring/mod.rs index facad596f63..c398f3d88e5 100644 --- a/tokio/src/io/uring/mod.rs +++ b/tokio/src/io/uring/mod.rs @@ -1,4 +1,5 @@ pub(crate) mod open; pub(crate) mod read; +pub(crate) mod statx; pub(crate) mod utils; pub(crate) mod write; diff --git a/tokio/src/io/uring/statx.rs b/tokio/src/io/uring/statx.rs new file mode 100644 index 00000000000..36c023258ca --- /dev/null +++ b/tokio/src/io/uring/statx.rs @@ -0,0 +1,66 @@ +use crate::io::uring::utils::{box_assume_init, box_new_uninit, cstr}; +use crate::runtime::driver::op::{CancelData, Cancellable, Completable, CqeResult, Op}; +use io_uring::{opcode, types}; +use std::ffi::CString; +use std::io; +use std::io::Error; +use std::mem::MaybeUninit; +use std::path::Path; + +#[derive(Debug)] +pub(crate) struct Statx { + /// This field will be read by the kernel during the operation, so we + /// need to ensure it is valid for the entire duration of the operation. + #[allow(dead_code)] + path: CString, + buffer: Box>, +} + +impl Completable for Statx { + type Output = io::Result; + + fn complete(self, cqe: CqeResult) -> Self::Output { + cqe.result.map(|_| *unsafe { box_assume_init(self.buffer) }) + } + + fn complete_with_error(self, error: Error) -> Self::Output { + Err(error) + } +} + +impl Cancellable for Statx { + fn cancel(self) -> CancelData { + CancelData::Statx(self) + } +} + +impl Op { + /// Submit a request to open a file. + fn statx(path: &Path, follow_symlinks: bool) -> io::Result> { + let path = cstr(path)?; + let mut buffer = box_new_uninit::(); + + let flags = libc::AT_STATX_SYNC_AS_STAT + | (libc::AT_SYMLINK_NOFOLLOW * libc::c_int::from(!follow_symlinks)); + + let open_op = opcode::Statx::new( + types::Fd(libc::AT_FDCWD), + path.as_ptr(), + buffer.as_mut_ptr().cast(), + ) + .flags(flags) + .mask(libc::STATX_BASIC_STATS | libc::STATX_BTIME) + .build(); + + // SAFETY: Parameters are valid for the entire duration of the operation + Ok(unsafe { Op::new(open_op, Statx { path, buffer }) }) + } + + pub(crate) fn metadata(path: &Path) -> io::Result> { + Op::statx(path, true) + } + + // pub(crate) fn symlink_metadata(path: &Path) -> io::Result> { + // Op::statx(path, false) + // } +} diff --git a/tokio/src/io/uring/utils.rs b/tokio/src/io/uring/utils.rs index 65185936652..ec70d8300a4 100644 --- a/tokio/src/io/uring/utils.rs +++ b/tokio/src/io/uring/utils.rs @@ -1,4 +1,5 @@ use std::os::fd::{AsRawFd, OwnedFd, RawFd}; +use std::mem::MaybeUninit; use std::os::unix::ffi::OsStrExt; use std::sync::Arc; use std::{ffi::CString, io, path::Path}; @@ -29,3 +30,16 @@ impl UringFd for ArcFd { pub(crate) fn cstr(p: &Path) -> io::Result { Ok(CString::new(p.as_os_str().as_bytes())?) } + +// TODO: Remove this once we bump the MSRV to 1.82. +pub(crate) fn box_new_uninit() -> Box> { + // Box::::new_uninit() + Box::new(MaybeUninit::uninit()) +} + +// TODO: Remove this once we bump the MSRV to 1.82. +pub(crate) unsafe fn box_assume_init(boxed: Box>) -> Box { + // Box::>::assume_init() + let raw = Box::into_raw(boxed); + unsafe { Box::from_raw(raw as *mut T) } +} diff --git a/tokio/src/runtime/driver/op.rs b/tokio/src/runtime/driver/op.rs index f5fe4c37bbd..755371545d9 100644 --- a/tokio/src/runtime/driver/op.rs +++ b/tokio/src/runtime/driver/op.rs @@ -6,6 +6,7 @@ use crate::io::uring::write::Write; use crate::runtime::Handle; +use crate::io::uring::statx::Statx; use io_uring::cqueue; use io_uring::squeue::Entry; use std::future::Future; @@ -24,6 +25,8 @@ pub(crate) enum CancelData { Write(Write), ReadVec(Read, OwnedFd>), ReadBuf(Read), + Read(Read), + Statx(Statx), } #[derive(Debug)] From ae51fab931a66376f0e5d46490d1cc9816f4b8d8 Mon Sep 17 00:00:00 2001 From: Vrtgs Date: Wed, 24 Dec 2025 22:46:00 +0300 Subject: [PATCH 02/11] fix statx on unavailable platforms --- tokio/Cargo.toml | 2 +- tokio/src/io/uring/statx.rs | 37 ++++++++++++++++++++++++------------- tokio/src/io/uring/utils.rs | 5 ++--- 3 files changed, 27 insertions(+), 17 deletions(-) diff --git a/tokio/Cargo.toml b/tokio/Cargo.toml index 123781bbf81..ae88f4bab1d 100644 --- a/tokio/Cargo.toml +++ b/tokio/Cargo.toml @@ -85,7 +85,7 @@ sync = [] test-util = ["rt", "sync", "time"] time = [] # Unstable feature. Requires `--cfg tokio_unstable` to enable. -io-uring = ["dep:io-uring", "libc", "mio/os-poll", "mio/os-ext", "dep:slab"] +io-uring = ["dep:io-uring", "linux-raw-sys", "libc", "mio/os-poll", "mio/os-ext", "dep:slab"] # Unstable feature. Requires `--cfg tokio_unstable` to enable. taskdump = ["dep:backtrace"] diff --git a/tokio/src/io/uring/statx.rs b/tokio/src/io/uring/statx.rs index 36c023258ca..6b14fcb9e73 100644 --- a/tokio/src/io/uring/statx.rs +++ b/tokio/src/io/uring/statx.rs @@ -1,9 +1,9 @@ -use crate::io::uring::utils::{box_assume_init, box_new_uninit, cstr}; +use crate::io::uring::utils::{box_new_uninit, cstr}; use crate::runtime::driver::op::{CancelData, Cancellable, Completable, CqeResult, Op}; use io_uring::{opcode, types}; -use std::ffi::CString; +use linux_raw_sys::general::statx; +use std::fmt::{Debug, Formatter}; use std::io; -use std::io::Error; use std::mem::MaybeUninit; use std::path::Path; @@ -12,18 +12,29 @@ pub(crate) struct Statx { /// This field will be read by the kernel during the operation, so we /// need to ensure it is valid for the entire duration of the operation. #[allow(dead_code)] - path: CString, - buffer: Box>, + path: std::ffi::CString, + buffer: Box>, +} + +pub(crate) struct Metadata(#[allow(dead_code)] statx); + +impl Debug for Metadata { + fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result { + f.debug_struct("Metadata").finish_non_exhaustive() + } } impl Completable for Statx { - type Output = io::Result; + type Output = io::Result; fn complete(self, cqe: CqeResult) -> Self::Output { - cqe.result.map(|_| *unsafe { box_assume_init(self.buffer) }) + use crate::io::uring::utils::box_assume_init; + + cqe.result + .map(|_| Metadata(*unsafe { box_assume_init(self.buffer) })) } - fn complete_with_error(self, error: Error) -> Self::Output { + fn complete_with_error(self, error: io::Error) -> Self::Output { Err(error) } } @@ -38,18 +49,18 @@ impl Op { /// Submit a request to open a file. fn statx(path: &Path, follow_symlinks: bool) -> io::Result> { let path = cstr(path)?; - let mut buffer = box_new_uninit::(); + let mut buffer = box_new_uninit::(); - let flags = libc::AT_STATX_SYNC_AS_STAT - | (libc::AT_SYMLINK_NOFOLLOW * libc::c_int::from(!follow_symlinks)); + let flags: u32 = linux_raw_sys::general::AT_STATX_SYNC_AS_STAT + | (linux_raw_sys::general::AT_SYMLINK_NOFOLLOW * u32::from(!follow_symlinks)); let open_op = opcode::Statx::new( types::Fd(libc::AT_FDCWD), path.as_ptr(), buffer.as_mut_ptr().cast(), ) - .flags(flags) - .mask(libc::STATX_BASIC_STATS | libc::STATX_BTIME) + .flags(flags as i32) + .mask(linux_raw_sys::general::STATX_BASIC_STATS | linux_raw_sys::general::STATX_BTIME) .build(); // SAFETY: Parameters are valid for the entire duration of the operation diff --git a/tokio/src/io/uring/utils.rs b/tokio/src/io/uring/utils.rs index ec70d8300a4..3d87d47f2a6 100644 --- a/tokio/src/io/uring/utils.rs +++ b/tokio/src/io/uring/utils.rs @@ -31,15 +31,14 @@ pub(crate) fn cstr(p: &Path) -> io::Result { Ok(CString::new(p.as_os_str().as_bytes())?) } -// TODO: Remove this once we bump the MSRV to 1.82. +// TODO(MSRV 1.82): When bumping MSRV, switch to `Box::::new_uninit()`. pub(crate) fn box_new_uninit() -> Box> { // Box::::new_uninit() Box::new(MaybeUninit::uninit()) } -// TODO: Remove this once we bump the MSRV to 1.82. +// TODO(MSRV 1.82): When bumping MSRV, switch to `Box::>::assume_init()`. pub(crate) unsafe fn box_assume_init(boxed: Box>) -> Box { - // Box::>::assume_init() let raw = Box::into_raw(boxed); unsafe { Box::from_raw(raw as *mut T) } } From d4686aeecff80e168ec6a18896aafa0ae963d91d Mon Sep 17 00:00:00 2001 From: Vrtgs Date: Fri, 26 Dec 2025 17:07:28 +0300 Subject: [PATCH 03/11] complete using only io-uring operations for read_uring --- tokio/src/fs/mocks.rs | 9 ++ tokio/src/fs/open_options.rs | 24 +++-- .../src/fs/open_options/uring_open_options.rs | 7 ++ tokio/src/fs/read_uring.rs | 14 ++- tokio/src/fs/write.rs | 10 +-- tokio/src/io/uring/open.rs | 6 +- tokio/src/io/uring/statx.rs | 87 ++++++++++++++++--- tokio/src/runtime/driver/op.rs | 3 +- 8 files changed, 117 insertions(+), 43 deletions(-) diff --git a/tokio/src/fs/mocks.rs b/tokio/src/fs/mocks.rs index ae5d7e5368e..138e12ac6bb 100644 --- a/tokio/src/fs/mocks.rs +++ b/tokio/src/fs/mocks.rs @@ -106,6 +106,15 @@ impl From for OwnedFd { } } +#[cfg(all(test, unix))] +impl From for MockFile { + #[inline] + fn from(file: OwnedFd) -> MockFile { + use std::os::fd::IntoRawFd; + unsafe { MockFile::from_raw_fd(IntoRawFd::into_raw_fd(file)) } + } +} + tokio_thread_local! { static QUEUE: RefCell>> = RefCell::new(VecDeque::new()) } diff --git a/tokio/src/fs/open_options.rs b/tokio/src/fs/open_options.rs index 27e98b5b234..682ab4406b4 100644 --- a/tokio/src/fs/open_options.rs +++ b/tokio/src/fs/open_options.rs @@ -6,7 +6,6 @@ use std::path::Path; cfg_io_uring! { mod uring_open_options; pub(crate) use uring_open_options::UringOpenOptions; - use crate::runtime::driver::op::Op; } #[cfg(test)] @@ -518,8 +517,12 @@ impl OpenOptions { /// [`Other`]: std::io::ErrorKind::Other /// [`PermissionDenied`]: std::io::ErrorKind::PermissionDenied pub async fn open(&self, path: impl AsRef) -> io::Result { + self.open_inner(path.as_ref()).await + } + + async fn open_inner(&self, path: &Path) -> io::Result { match &self.inner { - Kind::Std(opts) => Self::std_open(opts, path).await, + Kind::Std(opts) => Self::std_open(opts.clone(), path).await, #[cfg(all( tokio_unstable, feature = "io-uring", @@ -528,6 +531,11 @@ impl OpenOptions { target_os = "linux" ))] Kind::Uring(opts) => { + #[cfg(test)] + use super::mocks::MockFile as StdFile; + #[cfg(not(test))] + use std::fs::File as StdFile; + let handle = crate::runtime::Handle::current(); let driver_handle = handle.inner.driver().io(); @@ -538,18 +546,16 @@ impl OpenOptions { Op::open(path.as_ref(), opts)?.await } else { let opts = opts.clone().into(); - Self::std_open(&opts, path).await + Self::std_open(opts, path).await } } } } - async fn std_open(opts: &StdOpenOptions, path: impl AsRef) -> io::Result { - let path = path.as_ref().to_owned(); - let opts = opts.clone(); - - let std = asyncify(move || opts.open(path)).await?; - Ok(File::from_std(std)) + async fn std_open(opts: StdOpenOptions, path: &Path) -> io::Result { + let path = path.to_owned(); + let std = asyncify(move || opts.open(path).map(File::from_std)).await?; + Ok(std) } #[cfg(windows)] diff --git a/tokio/src/fs/open_options/uring_open_options.rs b/tokio/src/fs/open_options/uring_open_options.rs index 48297ca3b5b..09097e7930b 100644 --- a/tokio/src/fs/open_options/uring_open_options.rs +++ b/tokio/src/fs/open_options/uring_open_options.rs @@ -2,8 +2,11 @@ use std::{io, os::unix::fs::OpenOptionsExt}; #[cfg(test)] use super::mock_open_options::MockOpenOptions as StdOpenOptions; +use crate::runtime::driver::op::Op; #[cfg(not(test))] use std::fs::OpenOptions as StdOpenOptions; +use std::os::fd::OwnedFd; +use std::path::Path; #[derive(Debug, Clone)] pub(crate) struct UringOpenOptions { @@ -107,6 +110,10 @@ impl UringOpenOptions { (_, _, true) => libc::O_CREAT | libc::O_EXCL, }) } + + pub(crate) async fn open(&self, path: &Path) -> io::Result { + Op::open(path, self)?.await + } } impl From for StdOpenOptions { diff --git a/tokio/src/fs/read_uring.rs b/tokio/src/fs/read_uring.rs index 67d709a2ce3..e4c30088f4e 100644 --- a/tokio/src/fs/read_uring.rs +++ b/tokio/src/fs/read_uring.rs @@ -1,4 +1,4 @@ -use crate::fs::OpenOptions; +use crate::fs::UringOpenOptions; use crate::runtime::driver::op::Op; use std::io; @@ -17,15 +17,13 @@ const PROBE_SIZE_U32: u32 = PROBE_SIZE as u32; const MAX_READ_SIZE: usize = 64 * 1024 * 1024; pub(crate) async fn read_uring(path: &Path) -> io::Result> { - let file = OpenOptions::new().read(true).open(path).await?; + let fd = UringOpenOptions::new().read(true).open(path).await?; - // TODO: use io uring in the future to obtain metadata - let size_hint: Option = file.metadata().await.map(|m| m.len() as usize).ok(); + let (size_hint, fd) = Op::metadata_fd(fd).await; - let fd: OwnedFd = file - .try_into_std() - .expect("unexpected in-flight operation detected") - .into(); + let size_hint: Option = size_hint + .ok() + .map(|m| usize::try_from(m.len()).unwrap_or(usize::MAX)); let mut buf = Vec::new(); diff --git a/tokio/src/fs/write.rs b/tokio/src/fs/write.rs index 6c9c195016c..99d1d63eae0 100644 --- a/tokio/src/fs/write.rs +++ b/tokio/src/fs/write.rs @@ -50,21 +50,15 @@ pub async fn write(path: impl AsRef, contents: impl AsRef<[u8]>) -> io::Re cfg_io_uring! { async fn write_uring(path: &Path, mut buf: OwnedBuf) -> io::Result<()> { - use crate::{fs::OpenOptions, runtime::driver::op::Op}; - use std::os::fd::OwnedFd; + use crate::{fs::UringOpenOptions, runtime::driver::op::Op}; - let file = OpenOptions::new() + let mut fd = UringOpenOptions::new() .write(true) .create(true) .truncate(true) .open(path) .await?; - let mut fd: OwnedFd = file - .try_into_std() - .expect("unexpected in-flight operation detected") - .into(); - let total: usize = buf.as_ref().len(); let mut buf_offset: usize = 0; let mut file_offset: u64 = 0; diff --git a/tokio/src/io/uring/open.rs b/tokio/src/io/uring/open.rs index 913588c665c..3785e522ba9 100644 --- a/tokio/src/io/uring/open.rs +++ b/tokio/src/io/uring/open.rs @@ -6,7 +6,7 @@ use crate::runtime::driver::op::{CancelData, Cancellable, Completable, CqeResult use io_uring::{opcode, types}; use std::ffi::CString; use std::io::{self, Error}; -use std::os::fd::FromRawFd; +use std::os::fd::{FromRawFd, OwnedFd}; use std::path::Path; #[derive(Debug)] @@ -18,10 +18,10 @@ pub(crate) struct Open { } impl Completable for Open { - type Output = io::Result; + type Output = io::Result; fn complete(self, cqe: CqeResult) -> Self::Output { cqe.result - .map(|fd| unsafe { crate::fs::File::from_raw_fd(fd as i32) }) + .map(|fd| unsafe { OwnedFd::from_raw_fd(fd as i32) }) } fn complete_with_error(self, err: Error) -> Self::Output { diff --git a/tokio/src/io/uring/statx.rs b/tokio/src/io/uring/statx.rs index 6b14fcb9e73..0518ff69049 100644 --- a/tokio/src/io/uring/statx.rs +++ b/tokio/src/io/uring/statx.rs @@ -1,12 +1,29 @@ -use crate::io::uring::utils::{box_new_uninit, cstr}; +use crate::io::uring::utils::{box_assume_init, box_new_uninit, cstr}; use crate::runtime::driver::op::{CancelData, Cancellable, Completable, CqeResult, Op}; use io_uring::{opcode, types}; use linux_raw_sys::general::statx; use std::fmt::{Debug, Formatter}; use std::io; use std::mem::MaybeUninit; +use std::os::fd::{AsRawFd, OwnedFd}; use std::path::Path; +pub(crate) struct Metadata(statx); + +impl Metadata { + pub(crate) fn len(&self) -> u64 { + self.0.stx_size + } +} + +impl Debug for Metadata { + fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result { + let mut debug = f.debug_struct("Metadata"); + debug.field("len", &self.len()); + debug.finish_non_exhaustive() + } +} + #[derive(Debug)] pub(crate) struct Statx { /// This field will be read by the kernel during the operation, so we @@ -16,20 +33,10 @@ pub(crate) struct Statx { buffer: Box>, } -pub(crate) struct Metadata(#[allow(dead_code)] statx); - -impl Debug for Metadata { - fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result { - f.debug_struct("Metadata").finish_non_exhaustive() - } -} - impl Completable for Statx { type Output = io::Result; fn complete(self, cqe: CqeResult) -> Self::Output { - use crate::io::uring::utils::box_assume_init; - cqe.result .map(|_| Metadata(*unsafe { box_assume_init(self.buffer) })) } @@ -54,17 +61,17 @@ impl Op { let flags: u32 = linux_raw_sys::general::AT_STATX_SYNC_AS_STAT | (linux_raw_sys::general::AT_SYMLINK_NOFOLLOW * u32::from(!follow_symlinks)); - let open_op = opcode::Statx::new( + let statx_op = opcode::Statx::new( types::Fd(libc::AT_FDCWD), path.as_ptr(), buffer.as_mut_ptr().cast(), ) .flags(flags as i32) - .mask(linux_raw_sys::general::STATX_BASIC_STATS | linux_raw_sys::general::STATX_BTIME) + .mask(linux_raw_sys::general::STATX_BASIC_STATS) .build(); // SAFETY: Parameters are valid for the entire duration of the operation - Ok(unsafe { Op::new(open_op, Statx { path, buffer }) }) + Ok(unsafe { Op::new(statx_op, Statx { path, buffer }) }) } pub(crate) fn metadata(path: &Path) -> io::Result> { @@ -75,3 +82,55 @@ impl Op { // Op::statx(path, false) // } } + +#[derive(Debug)] +pub(crate) struct StatxFd { + fd: OwnedFd, + buffer: Box>, +} + +impl Completable for StatxFd { + type Output = (io::Result, OwnedFd); + + fn complete(self, cqe: CqeResult) -> Self::Output { + let ret = cqe + .result + .map(|_| Metadata(*unsafe { box_assume_init(self.buffer) })); + + (ret, self.fd) + } + + fn complete_with_error(self, error: io::Error) -> Self::Output { + (Err(error), self.fd) + } +} + +impl Cancellable for StatxFd { + fn cancel(self) -> CancelData { + CancelData::StatxFd(self) + } +} + +impl Op { + pub(crate) fn metadata_fd(fd: OwnedFd) -> Op { + let mut buffer = box_new_uninit::(); + + let flags: u32 = + linux_raw_sys::general::AT_STATX_SYNC_AS_STAT | linux_raw_sys::general::AT_EMPTY_PATH; + + // io-uring was introduced in linux 5.1 + // pass in an empty path instead of null as specified by man + // https://man7.org/linux/man-pages/man2/statx.2.html + let statx_op = opcode::Statx::new( + types::Fd(fd.as_raw_fd()), + c"".as_ptr(), + buffer.as_mut_ptr().cast(), + ) + .flags(flags as i32) + .mask(linux_raw_sys::general::STATX_BASIC_STATS) + .build(); + + // SAFETY: Parameters are valid for the entire duration of the operation + unsafe { Op::new(statx_op, StatxFd { fd, buffer }) } + } +} diff --git a/tokio/src/runtime/driver/op.rs b/tokio/src/runtime/driver/op.rs index 755371545d9..e317a8355b9 100644 --- a/tokio/src/runtime/driver/op.rs +++ b/tokio/src/runtime/driver/op.rs @@ -6,7 +6,7 @@ use crate::io::uring::write::Write; use crate::runtime::Handle; -use crate::io::uring::statx::Statx; +use crate::io::uring::statx::{Statx, StatxFd}; use io_uring::cqueue; use io_uring::squeue::Entry; use std::future::Future; @@ -27,6 +27,7 @@ pub(crate) enum CancelData { ReadBuf(Read), Read(Read), Statx(Statx), + StatxFd(StatxFd), } #[derive(Debug)] From 3edeeffa64573a8404d10feef7557cd0a8cc0a8b Mon Sep 17 00:00:00 2001 From: Mahdi Ali-Raihan Date: Tue, 21 Apr 2026 02:12:56 -0400 Subject: [PATCH 04/11] Implemented io-uring::Op and apply it accordingly to read_uring, try_exists --- tokio/Cargo.toml | 2 +- tokio/src/fs/open_options.rs | 19 ++- .../src/fs/open_options/uring_open_options.rs | 7 -- tokio/src/fs/read.rs | 5 +- tokio/src/fs/read_uring.rs | 16 ++- tokio/src/fs/try_exists.rs | 9 +- tokio/src/fs/write.rs | 10 +- tokio/src/io/uring/open.rs | 6 +- tokio/src/io/uring/statx.rs | 109 +++++++++--------- tokio/src/io/uring/utils.rs | 6 +- tokio/src/runtime/driver/op.rs | 4 +- 11 files changed, 102 insertions(+), 91 deletions(-) diff --git a/tokio/Cargo.toml b/tokio/Cargo.toml index ae88f4bab1d..123781bbf81 100644 --- a/tokio/Cargo.toml +++ b/tokio/Cargo.toml @@ -85,7 +85,7 @@ sync = [] test-util = ["rt", "sync", "time"] time = [] # Unstable feature. Requires `--cfg tokio_unstable` to enable. -io-uring = ["dep:io-uring", "linux-raw-sys", "libc", "mio/os-poll", "mio/os-ext", "dep:slab"] +io-uring = ["dep:io-uring", "libc", "mio/os-poll", "mio/os-ext", "dep:slab"] # Unstable feature. Requires `--cfg tokio_unstable` to enable. taskdump = ["dep:backtrace"] diff --git a/tokio/src/fs/open_options.rs b/tokio/src/fs/open_options.rs index 682ab4406b4..99073a9d185 100644 --- a/tokio/src/fs/open_options.rs +++ b/tokio/src/fs/open_options.rs @@ -6,6 +6,7 @@ use std::path::Path; cfg_io_uring! { mod uring_open_options; pub(crate) use uring_open_options::UringOpenOptions; + use crate::runtime::driver::op::Op; } #[cfg(test)] @@ -522,7 +523,7 @@ impl OpenOptions { async fn open_inner(&self, path: &Path) -> io::Result { match &self.inner { - Kind::Std(opts) => Self::std_open(opts.clone(), path).await, + Kind::Std(opts) => Self::std_open(opts, path).await, #[cfg(all( tokio_unstable, feature = "io-uring", @@ -531,11 +532,6 @@ impl OpenOptions { target_os = "linux" ))] Kind::Uring(opts) => { - #[cfg(test)] - use super::mocks::MockFile as StdFile; - #[cfg(not(test))] - use std::fs::File as StdFile; - let handle = crate::runtime::Handle::current(); let driver_handle = handle.inner.driver().io(); @@ -543,19 +539,20 @@ impl OpenOptions { .check_and_init(io_uring::opcode::OpenAt::CODE) .await? { - Op::open(path.as_ref(), opts)?.await + Op::open(path, opts)?.await } else { let opts = opts.clone().into(); - Self::std_open(opts, path).await + Self::std_open(&opts, path).await } } } } - async fn std_open(opts: StdOpenOptions, path: &Path) -> io::Result { + async fn std_open(opts: &StdOpenOptions, path: &Path) -> io::Result { let path = path.to_owned(); - let std = asyncify(move || opts.open(path).map(File::from_std)).await?; - Ok(std) + let opts = opts.clone(); + + Ok(asyncify(move || opts.open(path)).await?.into()) } #[cfg(windows)] diff --git a/tokio/src/fs/open_options/uring_open_options.rs b/tokio/src/fs/open_options/uring_open_options.rs index 09097e7930b..48297ca3b5b 100644 --- a/tokio/src/fs/open_options/uring_open_options.rs +++ b/tokio/src/fs/open_options/uring_open_options.rs @@ -2,11 +2,8 @@ use std::{io, os::unix::fs::OpenOptionsExt}; #[cfg(test)] use super::mock_open_options::MockOpenOptions as StdOpenOptions; -use crate::runtime::driver::op::Op; #[cfg(not(test))] use std::fs::OpenOptions as StdOpenOptions; -use std::os::fd::OwnedFd; -use std::path::Path; #[derive(Debug, Clone)] pub(crate) struct UringOpenOptions { @@ -110,10 +107,6 @@ impl UringOpenOptions { (_, _, true) => libc::O_CREAT | libc::O_EXCL, }) } - - pub(crate) async fn open(&self, path: &Path) -> io::Result { - Op::open(path, self)?.await - } } impl From for StdOpenOptions { diff --git a/tokio/src/fs/read.rs b/tokio/src/fs/read.rs index 86e3026b0a1..24594d84b62 100644 --- a/tokio/src/fs/read.rs +++ b/tokio/src/fs/read.rs @@ -68,7 +68,10 @@ pub async fn read(path: impl AsRef) -> io::Result> { let handle = crate::runtime::Handle::current(); let driver_handle = handle.inner.driver().io(); - if driver_handle.check_and_init()? { + if driver_handle + .check_and_init(io_uring::opcode::Read::CODE) + .await? + { return read_uring(path).await; } } diff --git a/tokio/src/fs/read_uring.rs b/tokio/src/fs/read_uring.rs index e4c30088f4e..9818b52a96f 100644 --- a/tokio/src/fs/read_uring.rs +++ b/tokio/src/fs/read_uring.rs @@ -1,4 +1,4 @@ -use crate::fs::UringOpenOptions; +use crate::fs::OpenOptions; use crate::runtime::driver::op::Op; use std::io; @@ -17,13 +17,17 @@ const PROBE_SIZE_U32: u32 = PROBE_SIZE as u32; const MAX_READ_SIZE: usize = 64 * 1024 * 1024; pub(crate) async fn read_uring(path: &Path) -> io::Result> { - let fd = UringOpenOptions::new().read(true).open(path).await?; + let file = OpenOptions::new().read(true).open(path).await?; - let (size_hint, fd) = Op::metadata_fd(fd).await; + let size_hint = Op::file_metadata(&file)? + .await + .map(|m| m.len() as usize) + .ok(); - let size_hint: Option = size_hint - .ok() - .map(|m| usize::try_from(m.len()).unwrap_or(usize::MAX)); + let fd: OwnedFd = file + .try_into_std() + .expect("unexpected in-flight operation detected") + .into(); let mut buf = Vec::new(); diff --git a/tokio/src/fs/try_exists.rs b/tokio/src/fs/try_exists.rs index 170dedb5de6..16ca1401353 100644 --- a/tokio/src/fs/try_exists.rs +++ b/tokio/src/fs/try_exists.rs @@ -30,12 +30,16 @@ pub async fn try_exists(path: impl AsRef) -> io::Result { feature = "io-uring", feature = "rt", feature = "fs", - target_os = "linux" + // libc::statx is only supported on these platforms + any(target_env = "gnu", target_os = "android", target_env = "musl") ))] { let handle = crate::runtime::Handle::current(); let driver_handle = handle.inner.driver().io(); - if driver_handle.check_and_init()? { + if driver_handle + .check_and_init(io_uring::opcode::Statx::CODE) + .await? + { return try_exists_uring(path).await; } } @@ -44,6 +48,7 @@ pub async fn try_exists(path: impl AsRef) -> io::Result { } cfg_io_uring! { + #[inline] async fn try_exists_uring(path: &Path) -> io::Result { use crate::runtime::driver::op::Op; diff --git a/tokio/src/fs/write.rs b/tokio/src/fs/write.rs index 99d1d63eae0..6c9c195016c 100644 --- a/tokio/src/fs/write.rs +++ b/tokio/src/fs/write.rs @@ -50,15 +50,21 @@ pub async fn write(path: impl AsRef, contents: impl AsRef<[u8]>) -> io::Re cfg_io_uring! { async fn write_uring(path: &Path, mut buf: OwnedBuf) -> io::Result<()> { - use crate::{fs::UringOpenOptions, runtime::driver::op::Op}; + use crate::{fs::OpenOptions, runtime::driver::op::Op}; + use std::os::fd::OwnedFd; - let mut fd = UringOpenOptions::new() + let file = OpenOptions::new() .write(true) .create(true) .truncate(true) .open(path) .await?; + let mut fd: OwnedFd = file + .try_into_std() + .expect("unexpected in-flight operation detected") + .into(); + let total: usize = buf.as_ref().len(); let mut buf_offset: usize = 0; let mut file_offset: u64 = 0; diff --git a/tokio/src/io/uring/open.rs b/tokio/src/io/uring/open.rs index 3785e522ba9..913588c665c 100644 --- a/tokio/src/io/uring/open.rs +++ b/tokio/src/io/uring/open.rs @@ -6,7 +6,7 @@ use crate::runtime::driver::op::{CancelData, Cancellable, Completable, CqeResult use io_uring::{opcode, types}; use std::ffi::CString; use std::io::{self, Error}; -use std::os::fd::{FromRawFd, OwnedFd}; +use std::os::fd::FromRawFd; use std::path::Path; #[derive(Debug)] @@ -18,10 +18,10 @@ pub(crate) struct Open { } impl Completable for Open { - type Output = io::Result; + type Output = io::Result; fn complete(self, cqe: CqeResult) -> Self::Output { cqe.result - .map(|fd| unsafe { OwnedFd::from_raw_fd(fd as i32) }) + .map(|fd| unsafe { crate::fs::File::from_raw_fd(fd as i32) }) } fn complete_with_error(self, err: Error) -> Self::Output { diff --git a/tokio/src/io/uring/statx.rs b/tokio/src/io/uring/statx.rs index 0518ff69049..08001b23f69 100644 --- a/tokio/src/io/uring/statx.rs +++ b/tokio/src/io/uring/statx.rs @@ -1,16 +1,18 @@ +use crate::fs::File; use crate::io::uring::utils::{box_assume_init, box_new_uninit, cstr}; use crate::runtime::driver::op::{CancelData, Cancellable, Completable, CqeResult, Op}; use io_uring::{opcode, types}; -use linux_raw_sys::general::statx; +use libc::statx; use std::fmt::{Debug, Formatter}; use std::io; use std::mem::MaybeUninit; -use std::os::fd::{AsRawFd, OwnedFd}; +use std::os::fd::AsRawFd; use std::path::Path; pub(crate) struct Metadata(statx); impl Metadata { + /// Returns the size of the file, in bytes, this metadata is for. pub(crate) fn len(&self) -> u64 { self.0.stx_size } @@ -37,6 +39,12 @@ impl Completable for Statx { type Output = io::Result; fn complete(self, cqe: CqeResult) -> Self::Output { + // SAFETY: On success, we always receive 0, which should guarantee + // that the information about a file is stored inside the + // statx buffer. On failure, we'll receive an Error value, + // avoiding misuse of `.box_assume_init()`. + // Refer to man page description and return value: + // https://man7.org/linux/man-pages/man2/statx.2.html cqe.result .map(|_| Metadata(*unsafe { box_assume_init(self.buffer) })) } @@ -53,84 +61,77 @@ impl Cancellable for Statx { } impl Op { - /// Submit a request to open a file. - fn statx(path: &Path, follow_symlinks: bool) -> io::Result> { + /// Submit a request to retrieve a file's status. + #[inline] + fn statx(path: &Path, flags: i32) -> io::Result> { let path = cstr(path)?; let mut buffer = box_new_uninit::(); - let flags: u32 = linux_raw_sys::general::AT_STATX_SYNC_AS_STAT - | (linux_raw_sys::general::AT_SYMLINK_NOFOLLOW * u32::from(!follow_symlinks)); - let statx_op = opcode::Statx::new( types::Fd(libc::AT_FDCWD), path.as_ptr(), buffer.as_mut_ptr().cast(), ) - .flags(flags as i32) - .mask(linux_raw_sys::general::STATX_BASIC_STATS) + .flags(flags) + .mask(libc::STATX_BASIC_STATS) .build(); // SAFETY: Parameters are valid for the entire duration of the operation Ok(unsafe { Op::new(statx_op, Statx { path, buffer }) }) } + /// Retrieves the metadata information of the given path, following symlinks + /// if the path provided points to a symlink location. + #[inline] pub(crate) fn metadata(path: &Path) -> io::Result> { - Op::statx(path, true) - } - - // pub(crate) fn symlink_metadata(path: &Path) -> io::Result> { - // Op::statx(path, false) - // } -} - -#[derive(Debug)] -pub(crate) struct StatxFd { - fd: OwnedFd, - buffer: Box>, -} - -impl Completable for StatxFd { - type Output = (io::Result, OwnedFd); - - fn complete(self, cqe: CqeResult) -> Self::Output { - let ret = cqe - .result - .map(|_| Metadata(*unsafe { box_assume_init(self.buffer) })); - - (ret, self.fd) - } - - fn complete_with_error(self, error: io::Error) -> Self::Output { - (Err(error), self.fd) + Op::statx( + path, + // we don't need to pass in AT_SYMLINK_FOLLOW here, it'll follow + // by default it seems + libc::AT_STATX_SYNC_AS_STAT, + ) } -} -impl Cancellable for StatxFd { - fn cancel(self) -> CancelData { - CancelData::StatxFd(self) - } -} - -impl Op { - pub(crate) fn metadata_fd(fd: OwnedFd) -> Op { + /// Retrieves the metadata information of the given file + pub(crate) fn file_metadata(file: &File) -> io::Result> { let mut buffer = box_new_uninit::(); - - let flags: u32 = - linux_raw_sys::general::AT_STATX_SYNC_AS_STAT | linux_raw_sys::general::AT_EMPTY_PATH; + let empty_path = cstr(Path::new(""))?; // io-uring was introduced in linux 5.1 - // pass in an empty path instead of null as specified by man + // pass in an empty path instead of null to target the file descriptor + // status as specified by man: // https://man7.org/linux/man-pages/man2/statx.2.html let statx_op = opcode::Statx::new( - types::Fd(fd.as_raw_fd()), - c"".as_ptr(), + types::Fd(file.as_raw_fd()), + // it should be fine to pass in `empty_path` whose lifetime + // does not exceed the `file_metadata()` function as a ptr here + // because we want to stat the dirfd not this pathname + empty_path.as_ptr(), buffer.as_mut_ptr().cast(), ) - .flags(flags as i32) - .mask(linux_raw_sys::general::STATX_BASIC_STATS) + .flags(libc::AT_STATX_SYNC_AS_STAT | libc::AT_EMPTY_PATH) + .mask(libc::STATX_BASIC_STATS) .build(); // SAFETY: Parameters are valid for the entire duration of the operation - unsafe { Op::new(statx_op, StatxFd { fd, buffer }) } + Ok(unsafe { + Op::new( + statx_op, + Statx { + path: empty_path, + buffer, + }, + ) + }) + } + + /// Retrieves the metadata information of the given path without following symlinks. + #[inline] + #[allow(dead_code)] + pub(crate) fn symlink_metadata(path: &Path) -> io::Result> { + Op::statx( + path, + libc::AT_STATX_SYNC_AS_STAT | libc::AT_SYMLINK_NOFOLLOW, + ) } } diff --git a/tokio/src/io/uring/utils.rs b/tokio/src/io/uring/utils.rs index 3d87d47f2a6..408afc538a0 100644 --- a/tokio/src/io/uring/utils.rs +++ b/tokio/src/io/uring/utils.rs @@ -1,5 +1,5 @@ -use std::os::fd::{AsRawFd, OwnedFd, RawFd}; use std::mem::MaybeUninit; +use std::os::fd::{AsRawFd, OwnedFd, RawFd}; use std::os::unix::ffi::OsStrExt; use std::sync::Arc; use std::{ffi::CString, io, path::Path}; @@ -38,7 +38,11 @@ pub(crate) fn box_new_uninit() -> Box> { } // TODO(MSRV 1.82): When bumping MSRV, switch to `Box::>::assume_init()`. +// It is up to the caller to guarantee that the value really is in an initialized state. +// Calling this when the content is not yet fully initialized causes immediate undefined behavior. pub(crate) unsafe fn box_assume_init(boxed: Box>) -> Box { let raw = Box::into_raw(boxed); + // SAFETY: If the caller guarantees that the MaybeUninit is initialized, then + // costructing the box from a raw mut ptr of MaybeUninit should be safe. unsafe { Box::from_raw(raw as *mut T) } } diff --git a/tokio/src/runtime/driver/op.rs b/tokio/src/runtime/driver/op.rs index e317a8355b9..661a803f14d 100644 --- a/tokio/src/runtime/driver/op.rs +++ b/tokio/src/runtime/driver/op.rs @@ -6,7 +6,7 @@ use crate::io::uring::write::Write; use crate::runtime::Handle; -use crate::io::uring::statx::{Statx, StatxFd}; +use crate::io::uring::statx::Statx; use io_uring::cqueue; use io_uring::squeue::Entry; use std::future::Future; @@ -25,9 +25,7 @@ pub(crate) enum CancelData { Write(Write), ReadVec(Read, OwnedFd>), ReadBuf(Read), - Read(Read), Statx(Statx), - StatxFd(StatxFd), } #[derive(Debug)] From 7e008e62bfdd3aa82e76205dac3a5f8568d9c5b5 Mon Sep 17 00:00:00 2001 From: Mahdi Ali-Raihan Date: Mon, 4 May 2026 16:49:01 -0400 Subject: [PATCH 05/11] Added test for io uring statx operations. Checks for cancellations, shutdown, stating multiple files, ELOOP, ENAMETOOLONG, EACCES --- tokio/src/io/uring/utils.rs | 6 +- tokio/tests/fs_uring_statx.rs | 281 ++++++++++++++++++++++++++++++++++ 2 files changed, 285 insertions(+), 2 deletions(-) create mode 100644 tokio/tests/fs_uring_statx.rs diff --git a/tokio/src/io/uring/utils.rs b/tokio/src/io/uring/utils.rs index 408afc538a0..33997b1eb90 100644 --- a/tokio/src/io/uring/utils.rs +++ b/tokio/src/io/uring/utils.rs @@ -38,8 +38,10 @@ pub(crate) fn box_new_uninit() -> Box> { } // TODO(MSRV 1.82): When bumping MSRV, switch to `Box::>::assume_init()`. -// It is up to the caller to guarantee that the value really is in an initialized state. -// Calling this when the content is not yet fully initialized causes immediate undefined behavior. +/// # Safety +/// +/// It is up to the caller to guarantee that the value really is in an initialized state. +/// Calling this when the content is not yet fully initialized causes immediate undefined behavior. pub(crate) unsafe fn box_assume_init(boxed: Box>) -> Box { let raw = Box::into_raw(boxed); // SAFETY: If the caller guarantees that the MaybeUninit is initialized, then diff --git a/tokio/tests/fs_uring_statx.rs b/tokio/tests/fs_uring_statx.rs new file mode 100644 index 00000000000..6197ef3dcb8 --- /dev/null +++ b/tokio/tests/fs_uring_statx.rs @@ -0,0 +1,281 @@ +//! Uring file operations tests. +#![cfg(all( + tokio_unstable, + feature = "io-uring", + feature = "rt", + feature = "fs", + target_os = "linux" +))] + +use futures::future::Future; +use libc::PATH_MAX; +use std::future::poll_fn; +use std::io::Write; +use std::os::unix::fs::PermissionsExt; +use std::path::PathBuf; +use std::sync::mpsc; +use std::task::{Context, Poll, Waker}; +use std::time::Duration; +use tempfile::{tempdir, NamedTempFile}; +use tokio::fs::{create_dir, metadata, set_permissions, symlink, try_exists, write}; +use tokio::runtime::{Builder, Runtime}; +use tokio_test::assert_pending; +use tokio_util::task::TaskTracker; + +fn multi_rt(n: usize) -> Box Runtime> { + Box::new(move || { + Builder::new_multi_thread() + .worker_threads(n) + .enable_all() + .build() + .unwrap() + }) +} + +fn current_rt() -> Box Runtime> { + Box::new(|| Builder::new_current_thread().enable_all().build().unwrap()) +} + +fn rt_combinations() -> Vec Runtime>> { + vec![ + current_rt(), + multi_rt(1), + multi_rt(2), + multi_rt(8), + multi_rt(64), + multi_rt(256), + ] +} + +#[test] +fn shutdown_runtime_while_performing_io_uring_ops() { + fn run(rt: Runtime) { + let (done_tx, done_rx) = mpsc::channel(); + let (_tmp, path) = create_tmp_files(1); + // keep 100 permits + const N: i32 = 100; + rt.spawn(async move { + let path = path[0].clone(); + + // spawning a bunch of uring operations. + let mut futs = vec![]; + + // spawning a bunch of uring operations. + for _ in 0..N { + let path = path.clone(); + let mut fut = Box::pin(try_exists(path)); + + poll_fn(|cx| { + assert_pending!(fut.as_mut().poll(cx)); + Poll::<()>::Pending + }) + .await; + + futs.push(fut); + } + + tokio::task::yield_now().await; + }); + + std::thread::spawn(move || { + rt.shutdown_timeout(Duration::from_millis(300)); + done_tx.send(()).unwrap(); + }); + + done_rx.recv().unwrap(); + } + + for rt in rt_combinations() { + run(rt()); + } +} + +#[test] +fn stat_many_files() { + fn run(rt: Runtime) { + const NUM_FILES: usize = 512; + + let (_tmp_files, paths): (Vec, Vec) = create_tmp_files(NUM_FILES); + + rt.block_on(async move { + let tracker = TaskTracker::new(); + + for i in 0..10_000 { + let path = paths.get(i % NUM_FILES).unwrap().clone(); + tracker.spawn(async move { + let exists = try_exists(path).await.unwrap(); + assert!(exists); + }); + } + tracker.close(); + tracker.wait().await; + }); + } + + for rt in rt_combinations() { + run(rt()); + } +} + +#[tokio::test] +async fn stat_small_large_files() { + let (_tmp, path) = create_large_temp_file(); + + let exists = try_exists(path).await.unwrap(); + assert!(exists); + + let (_tmp, path) = create_small_temp_file(); + + let exists = try_exists(path).await.unwrap(); + assert!(exists); +} + +#[tokio::test] +async fn stat_nonexistent_file() { + let path = tempdir().unwrap().path().join("nonexistent_path"); + let exists = try_exists(path).await.unwrap(); + assert!(!exists); +} + +#[tokio::test] +#[cfg_attr(miri, ignore)] // No `chmod` in miri. +#[cfg(unix)] +async fn stat_permission_denied() { + let dir = tempdir().unwrap(); + let permission_denied_directory_path = dir.path().join("baz"); + create_dir(&permission_denied_directory_path).await.unwrap(); + let permission_denied_file_path = permission_denied_directory_path.join("baz.txt"); + write(&permission_denied_file_path, b"Hello File!") + .await + .unwrap(); + let mut perms = metadata(&permission_denied_directory_path) + .await + .unwrap() + .permissions(); + + perms.set_mode(0o244); + set_permissions(&permission_denied_directory_path, perms) + .await + .unwrap(); + let permission_denied_result = try_exists(permission_denied_file_path).await; + assert_eq!( + permission_denied_result.err().unwrap().kind(), + std::io::ErrorKind::PermissionDenied + ); +} + +#[tokio::test] +#[cfg(unix)] +async fn stat_filesystem_loop() { + let dir = tempdir().unwrap(); + let first_symlink = dir.path().join("bar"); + let second_symlink = dir.path().join("foo"); + symlink(&first_symlink, &second_symlink).await.unwrap(); + symlink(&second_symlink, &first_symlink).await.unwrap(); + + // Both symlinks loop on each other, so stating either one should produce + // a file system loop error. This produces a `std::io::ErrorKind::FilesystemLoop` + // error, but that error is gated behind io_error_more feature, and we can't be + // sure if that name will ever be changed, so preferred using libc::ELOOP instead + let filesystem_loop_result = try_exists(first_symlink).await; + assert_eq!( + filesystem_loop_result + .err() + .unwrap() + .raw_os_error() + .unwrap(), + libc::ELOOP + ); + let filesystem_loop_result = try_exists(second_symlink).await; + assert_eq!( + filesystem_loop_result + .err() + .unwrap() + .raw_os_error() + .unwrap(), + libc::ELOOP + ); +} + +#[tokio::test] +async fn stat_path_name_too_long() { + let dir = tempdir().unwrap(); + // if we stat a file whose name is above PATH_MAX (Linux is 4096 bytes, Windows 260 chars or 32767 chars if extended + // path is permitted), we should receive an std::io::ErrorKind::InvalidFilename error + let long_nonexistent_path = dir.path().join(vec!["a"; (PATH_MAX + 1) as usize].join("")); + let name_too_long_result = try_exists(long_nonexistent_path).await; + assert_eq!( + name_too_long_result.err().unwrap().kind(), + std::io::ErrorKind::InvalidFilename + ); +} + +#[tokio::test] +async fn cancel_op_future() { + let (_tmp_file, path): (Vec, Vec) = create_tmp_files(1); + let path = path[0].clone(); + + let (tx, mut rx) = tokio::sync::mpsc::unbounded_channel(); + + let handle = tokio::spawn(async move { + let fut = try_exists(path.clone()); + tokio::pin!(fut); + + poll_fn(move |_| { + // If io_uring is enabled (and not falling back to the thread pool), + // the first poll should return Pending. + assert_pending!(fut.as_mut().poll(&mut Context::from_waker(Waker::noop()))); + tx.send(true).unwrap(); + + Poll::<()>::Pending + }) + .await; + }); + + // Wait for the first poll + + let val = rx.recv().await; + assert!(val.unwrap()); + + handle.abort(); + + let res = handle.await.unwrap_err(); + assert!(res.is_cancelled()); +} + +fn create_tmp_files(num_files: usize) -> (Vec, Vec) { + let mut files = Vec::with_capacity(num_files); + for _ in 0..num_files { + let mut tmp = NamedTempFile::new().unwrap(); + let buf = vec![20; 1023]; + tmp.write_all(&buf).unwrap(); + let path = tmp.path().to_path_buf(); + files.push((tmp, path)); + } + + files.into_iter().unzip() +} + +fn create_large_temp_file() -> (NamedTempFile, PathBuf) { + let mut tmp = NamedTempFile::new().unwrap(); + let buf = create_buf(5000); + + tmp.write_all(&buf).unwrap(); + let path = tmp.path().to_path_buf(); + + (tmp, path) +} + +fn create_small_temp_file() -> (NamedTempFile, PathBuf) { + let mut tmp = NamedTempFile::new().unwrap(); + let buf = create_buf(20); + + tmp.write_all(&buf).unwrap(); + let path = tmp.path().to_path_buf(); + + (tmp, path) +} + +fn create_buf(length: usize) -> Vec { + (0..length).map(|i| i as u8).collect() +} From 91fe47e59755f876da7da2a45936134b580acfd8 Mon Sep 17 00:00:00 2001 From: Mahdi Ali-Raihan Date: Tue, 5 May 2026 02:37:57 -0400 Subject: [PATCH 06/11] Removed musl as supported platform for io_uring statx operations, as statx is supported on 1.25+ musl, and MSRV that uses 1.25 on all *-linux-musl platforms is 1.93 --- tokio/src/fs/mod.rs | 12 ++++++++++++ tokio/src/fs/read.rs | 11 ++++++++++- tokio/src/fs/read_uring.rs | 22 ++++++++++++++++++++++ tokio/src/fs/try_exists.rs | 26 +++++++++++++++++++++++++- tokio/src/io/uring/statx.rs | 17 +++++++++++++++++ tokio/src/io/uring/utils.rs | 2 ++ tokio/src/runtime/driver/op.rs | 24 ++++++++++++++++++++++++ tokio/tests/fs_uring_statx.rs | 1 + 8 files changed, 113 insertions(+), 2 deletions(-) diff --git a/tokio/src/fs/mod.rs b/tokio/src/fs/mod.rs index 8701d3a1083..315877bc94f 100644 --- a/tokio/src/fs/mod.rs +++ b/tokio/src/fs/mod.rs @@ -297,6 +297,18 @@ cfg_windows! { cfg_io_uring! { pub(crate) mod read_uring; + #[cfg( + // libc::statx is only supported on these platforms + // FIXME: Add musl target env when our minimum supported + // rust version is 1.93. To clarify, statx support is + // introduced to musl in 1.25 as mentioned officially here: + // https://musl.libc.org/releases.html. + // However, rustup target_env building for *-linux-musl + // uses 1.25 musl on all *-linux-musl platforms starting + // in 1.93 stable rust version. + // https://blog.rust-lang.org/2025/12/05/Updating-musl-1.2.5/ + any(target_env = "gnu", target_os = "android") + )] pub(crate) use self::read_uring::read_uring; pub(crate) use self::open_options::UringOpenOptions; diff --git a/tokio/src/fs/read.rs b/tokio/src/fs/read.rs index 24594d84b62..f4de3913144 100644 --- a/tokio/src/fs/read.rs +++ b/tokio/src/fs/read.rs @@ -61,7 +61,16 @@ pub async fn read(path: impl AsRef) -> io::Result> { feature = "io-uring", feature = "rt", feature = "fs", - target_os = "linux" + // libc::statx is only supported on these platforms + // FIXME: Add musl target env when our minimum supported + // rust version is 1.93. To clarify, statx support is + // introduced to musl in 1.25 as mentioned officially here: + // https://musl.libc.org/releases.html. + // However, rustup target_env building for *-linux-musl + // uses 1.25 musl on all *-linux-musl platforms starting + // in 1.93 stable rust version. + // https://blog.rust-lang.org/2025/12/05/Updating-musl-1.2.5/ + any(target_env = "gnu", target_os = "android") ))] { use crate::fs::read_uring; diff --git a/tokio/src/fs/read_uring.rs b/tokio/src/fs/read_uring.rs index 9818b52a96f..79528f52f7c 100644 --- a/tokio/src/fs/read_uring.rs +++ b/tokio/src/fs/read_uring.rs @@ -1,9 +1,11 @@ +#[allow(unused)] // FIXME: remove when MSRV is 1.93 (due to statx on 1.25 musl) use crate::fs::OpenOptions; use crate::runtime::driver::op::Op; use std::io; use std::io::ErrorKind; use std::os::fd::OwnedFd; +#[allow(unused)] // FIXME: remove when MSRV is 1.93 (due to statx on 1.25 musl) use std::path::Path; // this algorithm is inspired from rust std lib version 1.90.0 @@ -14,8 +16,25 @@ const PROBE_SIZE_U32: u32 = PROBE_SIZE as u32; // Max bytes we can read using io uring submission at a time // SAFETY: cannot be higher than u32::MAX for safe cast // Set to read max 64 MiB at time +#[allow(unused)] // FIXME: remove when MSRV is 1.93 (due to statx on 1.25 musl) const MAX_READ_SIZE: usize = 64 * 1024 * 1024; +#[cfg(all( + tokio_unstable, + feature = "io-uring", + feature = "rt", + feature = "fs", + // libc::statx is only supported on these platforms + // FIXME: Add musl target env when our minimum supported + // rust version is 1.93. To clarify, statx support is + // introduced to musl in 1.25 as mentioned officially here: + // https://musl.libc.org/releases.html. + // However, rustup target_env building for *-linux-musl + // uses 1.25 musl on all *-linux-musl platforms starting + // in 1.93 stable rust version. + // https://blog.rust-lang.org/2025/12/05/Updating-musl-1.2.5/ + any(target_env = "gnu", target_os = "android") +))] pub(crate) async fn read_uring(path: &Path) -> io::Result> { let file = OpenOptions::new().read(true).open(path).await?; @@ -38,6 +57,7 @@ pub(crate) async fn read_uring(path: &Path) -> io::Result> { read_to_end_uring(fd, buf).await } +#[allow(unused)] // FIXME: remove when MSRV is 1.93 (due to statx on 1.25 musl) async fn read_to_end_uring(mut fd: OwnedFd, mut buf: Vec) -> io::Result> { let mut offset = 0; let start_cap = buf.capacity(); @@ -82,6 +102,7 @@ async fn read_to_end_uring(mut fd: OwnedFd, mut buf: Vec) -> io::Result, @@ -111,6 +132,7 @@ async fn small_probe_read( // Takes a length to read and returns a single read in the buffer // // Returns the file descriptor, buffer and EOF reached or not +#[allow(unused)] // FIXME: remove when MSRV is 1.93 (due to statx on 1.25 musl) async fn op_read( mut fd: OwnedFd, mut buf: Vec, diff --git a/tokio/src/fs/try_exists.rs b/tokio/src/fs/try_exists.rs index 16ca1401353..2e021eb7277 100644 --- a/tokio/src/fs/try_exists.rs +++ b/tokio/src/fs/try_exists.rs @@ -31,7 +31,15 @@ pub async fn try_exists(path: impl AsRef) -> io::Result { feature = "rt", feature = "fs", // libc::statx is only supported on these platforms - any(target_env = "gnu", target_os = "android", target_env = "musl") + // FIXME: Add musl target env when our minimum supported + // rust version is 1.93. To clarify, statx support is + // introduced to musl in 1.25 as mentioned officially here: + // https://musl.libc.org/releases.html. + // However, rustup target_env building for *-linux-musl + // uses 1.25 musl on all *-linux-musl platforms starting + // in 1.93 stable rust version. + // https://blog.rust-lang.org/2025/12/05/Updating-musl-1.2.5/ + any(target_env = "gnu", target_os = "android") ))] { let handle = crate::runtime::Handle::current(); @@ -49,6 +57,22 @@ pub async fn try_exists(path: impl AsRef) -> io::Result { cfg_io_uring! { #[inline] + #[cfg(all( + tokio_unstable, + feature = "io-uring", + feature = "rt", + feature = "fs", + // libc::statx is only supported on these platforms + // FIXME: Add musl target env when our minimum supported + // rust version is 1.93. To clarify, statx support is + // introduced to musl in 1.25 as mentioned officially here: + // https://musl.libc.org/releases.html. + // However, rustup target_env building for *-linux-musl + // uses 1.25 musl on all *-linux-musl platforms starting + // in 1.93 stable rust version. + // https://blog.rust-lang.org/2025/12/05/Updating-musl-1.2.5/ + any(target_env = "gnu", target_os = "android") + ))] async fn try_exists_uring(path: &Path) -> io::Result { use crate::runtime::driver::op::Op; diff --git a/tokio/src/io/uring/statx.rs b/tokio/src/io/uring/statx.rs index 08001b23f69..8eca50ed695 100644 --- a/tokio/src/io/uring/statx.rs +++ b/tokio/src/io/uring/statx.rs @@ -1,3 +1,20 @@ +#![cfg(all( + tokio_unstable, + feature = "io-uring", + feature = "rt", + feature = "fs", + // libc::statx is only supported on these platforms + // FIXME: Add musl target env when our minimum supported + // rust version is 1.93. To clarify, statx support is + // introduced to musl in 1.25 as mentioned officially here: + // https://musl.libc.org/releases.html. + // However, rustup target_env building for *-linux-musl + // uses 1.25 musl on all *-linux-musl platforms starting + // in 1.93 stable rust version. + // https://blog.rust-lang.org/2025/12/05/Updating-musl-1.2.5/ + any(target_env = "gnu", target_os = "android") +))] + use crate::fs::File; use crate::io::uring::utils::{box_assume_init, box_new_uninit, cstr}; use crate::runtime::driver::op::{CancelData, Cancellable, Completable, CqeResult, Op}; diff --git a/tokio/src/io/uring/utils.rs b/tokio/src/io/uring/utils.rs index 33997b1eb90..cde2442dd49 100644 --- a/tokio/src/io/uring/utils.rs +++ b/tokio/src/io/uring/utils.rs @@ -32,6 +32,7 @@ pub(crate) fn cstr(p: &Path) -> io::Result { } // TODO(MSRV 1.82): When bumping MSRV, switch to `Box::::new_uninit()`. +#[allow(unused)] // FIXME: remove when MSRV is 1.93 (due to statx on 1.25 musl) pub(crate) fn box_new_uninit() -> Box> { // Box::::new_uninit() Box::new(MaybeUninit::uninit()) @@ -42,6 +43,7 @@ pub(crate) fn box_new_uninit() -> Box> { /// /// It is up to the caller to guarantee that the value really is in an initialized state. /// Calling this when the content is not yet fully initialized causes immediate undefined behavior. +#[allow(unused)] // FIXME: remove when MSRV is 1.93 (due to statx on 1.25 musl) pub(crate) unsafe fn box_assume_init(boxed: Box>) -> Box { let raw = Box::into_raw(boxed); // SAFETY: If the caller guarantees that the MaybeUninit is initialized, then diff --git a/tokio/src/runtime/driver/op.rs b/tokio/src/runtime/driver/op.rs index 661a803f14d..16b48aee833 100644 --- a/tokio/src/runtime/driver/op.rs +++ b/tokio/src/runtime/driver/op.rs @@ -6,6 +6,18 @@ use crate::io::uring::write::Write; use crate::runtime::Handle; +#[cfg( + // libc::statx is only supported on these platforms + // FIXME: Add musl target env when our minimum supported + // rust version is 1.93. To clarify, statx support is + // introduced to musl in 1.25 as mentioned officially here: + // https://musl.libc.org/releases.html. + // However, rustup target_env building for *-linux-musl + // uses 1.25 musl on all *-linux-musl platforms starting + // in 1.93 stable rust version. + // https://blog.rust-lang.org/2025/12/05/Updating-musl-1.2.5/ + any(target_env = "gnu", target_os = "android") +)] use crate::io::uring::statx::Statx; use io_uring::cqueue; use io_uring::squeue::Entry; @@ -25,6 +37,18 @@ pub(crate) enum CancelData { Write(Write), ReadVec(Read, OwnedFd>), ReadBuf(Read), + #[cfg( + // libc::statx is only supported on these platforms + // FIXME: Add musl target env when our minimum supported + // rust version is 1.93. To clarify, statx support is + // introduced to musl in 1.25 as mentioned officially here: + // https://musl.libc.org/releases.html. + // However, rustup target_env building for *-linux-musl + // uses 1.25 musl on all *-linux-musl platforms starting + // in 1.93 stable rust version. + // https://blog.rust-lang.org/2025/12/05/Updating-musl-1.2.5/ + any(target_env = "gnu", target_os = "android") + )] Statx(Statx), } diff --git a/tokio/tests/fs_uring_statx.rs b/tokio/tests/fs_uring_statx.rs index 6197ef3dcb8..6514020eb78 100644 --- a/tokio/tests/fs_uring_statx.rs +++ b/tokio/tests/fs_uring_statx.rs @@ -1,4 +1,5 @@ //! Uring file operations tests. + #![cfg(all( tokio_unstable, feature = "io-uring", From 5631da692493238178dde98c4708514408dcd943 Mon Sep 17 00:00:00 2001 From: Mahdi Ali-Raihan Date: Tue, 5 May 2026 14:42:36 -0400 Subject: [PATCH 07/11] Removed pending checks for cancel_op_future since io_uring not available on Linux <5.1, removed stat permission denied test case since it doesn't work on Linux 4.19 --- tokio/src/fs/try_exists.rs | 3 ++ tokio/tests/fs_uring_statx.rs | 84 +++++++++++++++++++---------------- 2 files changed, 49 insertions(+), 38 deletions(-) diff --git a/tokio/src/fs/try_exists.rs b/tokio/src/fs/try_exists.rs index 2e021eb7277..dffc2f3534f 100644 --- a/tokio/src/fs/try_exists.rs +++ b/tokio/src/fs/try_exists.rs @@ -86,5 +86,8 @@ cfg_io_uring! { async fn try_exists_spawn_blocking(path: &Path) -> io::Result { let path = path.to_owned(); + // FIXME: When MSRV is 1.81, change this to + // std::fs::exists() to be consistent with + // all other tokio::fs operations asyncify(move || path.try_exists()).await } diff --git a/tokio/tests/fs_uring_statx.rs b/tokio/tests/fs_uring_statx.rs index 6514020eb78..77ba12b6a31 100644 --- a/tokio/tests/fs_uring_statx.rs +++ b/tokio/tests/fs_uring_statx.rs @@ -9,16 +9,19 @@ ))] use futures::future::Future; +use futures::future::FutureExt; use libc::PATH_MAX; use std::future::poll_fn; use std::io::Write; -use std::os::unix::fs::PermissionsExt; +// use std::os::unix::fs::PermissionsExt; use std::path::PathBuf; use std::sync::mpsc; -use std::task::{Context, Poll, Waker}; +use std::task::Poll; use std::time::Duration; use tempfile::{tempdir, NamedTempFile}; -use tokio::fs::{create_dir, metadata, set_permissions, symlink, try_exists, write}; +use tokio::fs::{ + /*create_dir,*/ /*metadata,*/ /*set_permissions,*/ symlink, try_exists, /*write*/ +}; use tokio::runtime::{Builder, Runtime}; use tokio_test::assert_pending; use tokio_util::task::TaskTracker; @@ -138,32 +141,37 @@ async fn stat_nonexistent_file() { assert!(!exists); } -#[tokio::test] -#[cfg_attr(miri, ignore)] // No `chmod` in miri. -#[cfg(unix)] -async fn stat_permission_denied() { - let dir = tempdir().unwrap(); - let permission_denied_directory_path = dir.path().join("baz"); - create_dir(&permission_denied_directory_path).await.unwrap(); - let permission_denied_file_path = permission_denied_directory_path.join("baz.txt"); - write(&permission_denied_file_path, b"Hello File!") - .await - .unwrap(); - let mut perms = metadata(&permission_denied_directory_path) - .await - .unwrap() - .permissions(); - - perms.set_mode(0o244); - set_permissions(&permission_denied_directory_path, perms) - .await - .unwrap(); - let permission_denied_result = try_exists(permission_denied_file_path).await; - assert_eq!( - permission_denied_result.err().unwrap().kind(), - std::io::ErrorKind::PermissionDenied - ); -} +// Error is not produced on Linux 4.19 (Linux 7.1 it works) +// #[tokio::test] +// #[cfg_attr(miri, ignore)] // No `chmod` in miri. +// #[cfg(unix)] +// async fn stat_permission_denied() { +// let dir = tempdir().unwrap(); +// let permission_denied_directory_path = dir.path().join("baz"); +// create_dir(&permission_denied_directory_path).await.unwrap(); +// let permission_denied_file_path = permission_denied_directory_path.join("baz.txt"); +// write(&permission_denied_file_path, b"Hello File!") +// .await +// .unwrap(); +// let mut perms = metadata(&permission_denied_directory_path) +// .await +// .unwrap() +// .permissions(); + +// perms.set_mode(0o244); +// set_permissions(&permission_denied_directory_path, perms) +// .await +// .unwrap(); +// let permission_denied_result = try_exists(permission_denied_file_path).await; +// assert_eq!( +// permission_denied_result +// .err() +// .unwrap() +// .raw_os_error() +// .unwrap(), +// libc::EACCES +// ); +// } #[tokio::test] #[cfg(unix)] @@ -219,14 +227,16 @@ async fn cancel_op_future() { let (tx, mut rx) = tokio::sync::mpsc::unbounded_channel(); let handle = tokio::spawn(async move { - let fut = try_exists(path.clone()); - tokio::pin!(fut); + poll_fn(|cx| { + let fut = try_exists(path.clone()); - poll_fn(move |_| { // If io_uring is enabled (and not falling back to the thread pool), - // the first poll should return Pending. - assert_pending!(fut.as_mut().poll(&mut Context::from_waker(Waker::noop()))); - tx.send(true).unwrap(); + // the first poll should return Pending. We don't check if the result + // is actually pending because we run some checks on old kernel that + // do not support uring. + let _pending = Box::pin(fut).poll_unpin(cx); + + tx.send(()).unwrap(); Poll::<()>::Pending }) @@ -234,9 +244,7 @@ async fn cancel_op_future() { }); // Wait for the first poll - - let val = rx.recv().await; - assert!(val.unwrap()); + rx.recv().await.unwrap(); handle.abort(); From 25642ec9b4a88ba0b7e4358b5b16ffb68e52d64d Mon Sep 17 00:00:00 2001 From: Mahdi Ali-Raihan Date: Tue, 12 May 2026 18:50:14 -0400 Subject: [PATCH 08/11] Removed redundant cfg attributes on functions, added STATX_BTIME flag in statx operation, use assert pending in cancel ops --- tokio/src/fs/read_uring.rs | 68 +++++++++++++++++------------------ tokio/src/fs/try_exists.rs | 8 ++--- tokio/src/io/uring/statx.rs | 2 +- tokio/tests/fs_uring_statx.rs | 27 +++++++++----- 4 files changed, 55 insertions(+), 50 deletions(-) diff --git a/tokio/src/fs/read_uring.rs b/tokio/src/fs/read_uring.rs index 79528f52f7c..3d9351ef518 100644 --- a/tokio/src/fs/read_uring.rs +++ b/tokio/src/fs/read_uring.rs @@ -19,42 +19,40 @@ const PROBE_SIZE_U32: u32 = PROBE_SIZE as u32; #[allow(unused)] // FIXME: remove when MSRV is 1.93 (due to statx on 1.25 musl) const MAX_READ_SIZE: usize = 64 * 1024 * 1024; -#[cfg(all( - tokio_unstable, - feature = "io-uring", - feature = "rt", - feature = "fs", - // libc::statx is only supported on these platforms - // FIXME: Add musl target env when our minimum supported - // rust version is 1.93. To clarify, statx support is - // introduced to musl in 1.25 as mentioned officially here: - // https://musl.libc.org/releases.html. - // However, rustup target_env building for *-linux-musl - // uses 1.25 musl on all *-linux-musl platforms starting - // in 1.93 stable rust version. - // https://blog.rust-lang.org/2025/12/05/Updating-musl-1.2.5/ - any(target_env = "gnu", target_os = "android") -))] -pub(crate) async fn read_uring(path: &Path) -> io::Result> { - let file = OpenOptions::new().read(true).open(path).await?; - - let size_hint = Op::file_metadata(&file)? - .await - .map(|m| m.len() as usize) - .ok(); - - let fd: OwnedFd = file - .try_into_std() - .expect("unexpected in-flight operation detected") - .into(); - - let mut buf = Vec::new(); - - if let Some(size_hint) = size_hint { - buf.try_reserve(size_hint)?; - } +cfg_io_uring! { + #[cfg( + // libc::statx is only supported on these platforms + // FIXME: Add musl target env when our minimum supported + // rust version is 1.93. To clarify, statx support is + // introduced to musl in 1.25 as mentioned officially here: + // https://musl.libc.org/releases.html. + // However, rustup target_env building for *-linux-musl + // uses 1.25 musl on all *-linux-musl platforms starting + // in 1.93 stable rust version. + // https://blog.rust-lang.org/2025/12/05/Updating-musl-1.2.5/ + any(target_env = "gnu", target_os = "android") + )] + pub(crate) async fn read_uring(path: &Path) -> io::Result> { + let file = OpenOptions::new().read(true).open(path).await?; + + let size_hint = Op::file_metadata(&file)? + .await + .map(|m| m.len() as usize) + .ok(); + + let fd: OwnedFd = file + .try_into_std() + .expect("unexpected in-flight operation detected") + .into(); + + let mut buf = Vec::new(); + + if let Some(size_hint) = size_hint { + buf.try_reserve(size_hint)?; + } - read_to_end_uring(fd, buf).await + read_to_end_uring(fd, buf).await + } } #[allow(unused)] // FIXME: remove when MSRV is 1.93 (due to statx on 1.25 musl) diff --git a/tokio/src/fs/try_exists.rs b/tokio/src/fs/try_exists.rs index dffc2f3534f..01c439356fa 100644 --- a/tokio/src/fs/try_exists.rs +++ b/tokio/src/fs/try_exists.rs @@ -57,11 +57,7 @@ pub async fn try_exists(path: impl AsRef) -> io::Result { cfg_io_uring! { #[inline] - #[cfg(all( - tokio_unstable, - feature = "io-uring", - feature = "rt", - feature = "fs", + #[cfg( // libc::statx is only supported on these platforms // FIXME: Add musl target env when our minimum supported // rust version is 1.93. To clarify, statx support is @@ -72,7 +68,7 @@ cfg_io_uring! { // in 1.93 stable rust version. // https://blog.rust-lang.org/2025/12/05/Updating-musl-1.2.5/ any(target_env = "gnu", target_os = "android") - ))] + )] async fn try_exists_uring(path: &Path) -> io::Result { use crate::runtime::driver::op::Op; diff --git a/tokio/src/io/uring/statx.rs b/tokio/src/io/uring/statx.rs index 8eca50ed695..9ec835c54e6 100644 --- a/tokio/src/io/uring/statx.rs +++ b/tokio/src/io/uring/statx.rs @@ -90,7 +90,7 @@ impl Op { buffer.as_mut_ptr().cast(), ) .flags(flags) - .mask(libc::STATX_BASIC_STATS) + .mask(libc::STATX_BASIC_STATS | libc::STATX_BTIME) .build(); // SAFETY: Parameters are valid for the entire duration of the operation diff --git a/tokio/tests/fs_uring_statx.rs b/tokio/tests/fs_uring_statx.rs index 77ba12b6a31..eacbe594186 100644 --- a/tokio/tests/fs_uring_statx.rs +++ b/tokio/tests/fs_uring_statx.rs @@ -26,6 +26,12 @@ use tokio::runtime::{Builder, Runtime}; use tokio_test::assert_pending; use tokio_util::task::TaskTracker; +use crate::support::io_uring::io_uring_supported; + +mod support { + pub(crate) mod io_uring; +} + fn multi_rt(n: usize) -> Box Runtime> { Box::new(move || { Builder::new_multi_thread() @@ -53,6 +59,10 @@ fn rt_combinations() -> Vec Runtime>> { #[test] fn shutdown_runtime_while_performing_io_uring_ops() { + if !io_uring_supported() { + return; + } + fn run(rt: Runtime) { let (done_tx, done_rx) = mpsc::channel(); let (_tmp, path) = create_tmp_files(1); @@ -221,6 +231,10 @@ async fn stat_path_name_too_long() { #[tokio::test] async fn cancel_op_future() { + if !io_uring_supported() { + return; + } + let (_tmp_file, path): (Vec, Vec) = create_tmp_files(1); let path = path[0].clone(); @@ -230,13 +244,9 @@ async fn cancel_op_future() { poll_fn(|cx| { let fut = try_exists(path.clone()); - // If io_uring is enabled (and not falling back to the thread pool), - // the first poll should return Pending. We don't check if the result - // is actually pending because we run some checks on old kernel that - // do not support uring. - let _pending = Box::pin(fut).poll_unpin(cx); - - tx.send(()).unwrap(); + // the first poll should return Pending. + assert_pending!(Box::pin(fut).poll_unpin(cx)); + tx.send(true).unwrap(); Poll::<()>::Pending }) @@ -244,7 +254,8 @@ async fn cancel_op_future() { }); // Wait for the first poll - rx.recv().await.unwrap(); + let val = rx.recv().await; + assert!(val.unwrap()); handle.abort(); From f1e6d2f75f9c91f1f844513a37d5f2d53678e9a5 Mon Sep 17 00:00:00 2001 From: Mahdi Ali-Raihan Date: Mon, 18 May 2026 17:01:03 -0400 Subject: [PATCH 09/11] Uncommented stat_permission_denied test and make sure it doesn't run on platforms that don't support io_uring, removed unnecessary comments, and added TODO on symlink_metadata for when Metadata::from_statx is stabilized --- tokio/src/io/uring/statx.rs | 12 +++--- tokio/tests/fs_uring_statx.rs | 70 ++++++++++++++++++----------------- 2 files changed, 42 insertions(+), 40 deletions(-) diff --git a/tokio/src/io/uring/statx.rs b/tokio/src/io/uring/statx.rs index 9ec835c54e6..db28b3a944a 100644 --- a/tokio/src/io/uring/statx.rs +++ b/tokio/src/io/uring/statx.rs @@ -101,12 +101,7 @@ impl Op { /// if the path provided points to a symlink location. #[inline] pub(crate) fn metadata(path: &Path) -> io::Result> { - Op::statx( - path, - // we don't need to pass in AT_SYMLINK_FOLLOW here, it'll follow - // by default it seems - libc::AT_STATX_SYNC_AS_STAT, - ) + Op::statx(path, libc::AT_STATX_SYNC_AS_STAT) } /// Retrieves the metadata information of the given file @@ -142,6 +137,11 @@ impl Op { }) } + // TODO: Once `Metadata::from_statx` is stabilized, we can use use this function + // to enable io-uring support on `tokio::fs::symlink_metadata`. + // See this PR for more detail: https://github.com/tokio-rs/tokio/pull/8080 + // See `Metadata::from_statx` tracking issue to see progress: + // https://github.com/rust-lang/rust/issues/156268 /// Retrieves the metadata information of the given path without following symlinks. #[inline] #[allow(dead_code)] diff --git a/tokio/tests/fs_uring_statx.rs b/tokio/tests/fs_uring_statx.rs index eacbe594186..3ddc5866e85 100644 --- a/tokio/tests/fs_uring_statx.rs +++ b/tokio/tests/fs_uring_statx.rs @@ -13,15 +13,13 @@ use futures::future::FutureExt; use libc::PATH_MAX; use std::future::poll_fn; use std::io::Write; -// use std::os::unix::fs::PermissionsExt; +use std::os::unix::fs::PermissionsExt; use std::path::PathBuf; use std::sync::mpsc; use std::task::Poll; use std::time::Duration; use tempfile::{tempdir, NamedTempFile}; -use tokio::fs::{ - /*create_dir,*/ /*metadata,*/ /*set_permissions,*/ symlink, try_exists, /*write*/ -}; +use tokio::fs::{create_dir, metadata, set_permissions, symlink, try_exists, write}; use tokio::runtime::{Builder, Runtime}; use tokio_test::assert_pending; use tokio_util::task::TaskTracker; @@ -152,36 +150,40 @@ async fn stat_nonexistent_file() { } // Error is not produced on Linux 4.19 (Linux 7.1 it works) -// #[tokio::test] -// #[cfg_attr(miri, ignore)] // No `chmod` in miri. -// #[cfg(unix)] -// async fn stat_permission_denied() { -// let dir = tempdir().unwrap(); -// let permission_denied_directory_path = dir.path().join("baz"); -// create_dir(&permission_denied_directory_path).await.unwrap(); -// let permission_denied_file_path = permission_denied_directory_path.join("baz.txt"); -// write(&permission_denied_file_path, b"Hello File!") -// .await -// .unwrap(); -// let mut perms = metadata(&permission_denied_directory_path) -// .await -// .unwrap() -// .permissions(); - -// perms.set_mode(0o244); -// set_permissions(&permission_denied_directory_path, perms) -// .await -// .unwrap(); -// let permission_denied_result = try_exists(permission_denied_file_path).await; -// assert_eq!( -// permission_denied_result -// .err() -// .unwrap() -// .raw_os_error() -// .unwrap(), -// libc::EACCES -// ); -// } +#[tokio::test] +#[cfg_attr(miri, ignore)] // No `chmod` in miri. +#[cfg(unix)] +async fn stat_permission_denied() { + if !io_uring_supported() { + return; + } + + let dir = tempdir().unwrap(); + let permission_denied_directory_path = dir.path().join("baz"); + create_dir(&permission_denied_directory_path).await.unwrap(); + let permission_denied_file_path = permission_denied_directory_path.join("baz.txt"); + write(&permission_denied_file_path, b"Hello File!") + .await + .unwrap(); + let mut perms = metadata(&permission_denied_directory_path) + .await + .unwrap() + .permissions(); + + perms.set_mode(0o244); + set_permissions(&permission_denied_directory_path, perms) + .await + .unwrap(); + let permission_denied_result = try_exists(permission_denied_file_path).await; + assert_eq!( + permission_denied_result + .err() + .unwrap() + .raw_os_error() + .unwrap(), + libc::EACCES + ); +} #[tokio::test] #[cfg(unix)] From 383724f01ecef1cdfd27a99cd5b7e63f2d7583ee Mon Sep 17 00:00:00 2001 From: Mahdi Ali-Raihan Date: Tue, 26 May 2026 02:30:03 -0400 Subject: [PATCH 10/11] Statx fd leak drop test added and added STATX_BTIME to file_metadata --- tokio/src/io/uring/statx.rs | 2 +- tokio/tests/fs_uring_statx_fd_leak_test.rs | 166 +++++++++++++++++++++ 2 files changed, 167 insertions(+), 1 deletion(-) create mode 100644 tokio/tests/fs_uring_statx_fd_leak_test.rs diff --git a/tokio/src/io/uring/statx.rs b/tokio/src/io/uring/statx.rs index db28b3a944a..22deba36e2a 100644 --- a/tokio/src/io/uring/statx.rs +++ b/tokio/src/io/uring/statx.rs @@ -122,7 +122,7 @@ impl Op { buffer.as_mut_ptr().cast(), ) .flags(libc::AT_STATX_SYNC_AS_STAT | libc::AT_EMPTY_PATH) - .mask(libc::STATX_BASIC_STATS) + .mask(libc::STATX_BASIC_STATS | libc::STATX_BTIME) .build(); // SAFETY: Parameters are valid for the entire duration of the operation diff --git a/tokio/tests/fs_uring_statx_fd_leak_test.rs b/tokio/tests/fs_uring_statx_fd_leak_test.rs new file mode 100644 index 00000000000..c82f9d4dd49 --- /dev/null +++ b/tokio/tests/fs_uring_statx_fd_leak_test.rs @@ -0,0 +1,166 @@ +#![cfg(all( + tokio_unstable, + feature = "io-uring", + feature = "rt", + feature = "fs", + target_os = "linux" +))] + +mod support { + pub(crate) mod io_uring; +} + +use std::fmt::Debug; +use std::fs; +use std::future::Future; +use std::path::PathBuf; +use std::pin::Pin; +use std::task::{Context, Poll}; +use std::time::Duration; + +use tempfile::NamedTempFile; +use tokio::fs::read; +use tokio::sync::mpsc::{unbounded_channel, UnboundedSender}; +use tokio::time::timeout; +use tokio_test::assert_pending; + +use crate::support::io_uring::io_uring_supported; + +/// Count currently-open fds in this process. +fn fd_count() -> usize { + fs::read_dir("/proc/self/fd").unwrap().count() +} + +/// First poll: +/// - polls the inner `tokio::fs::read()` future once, +/// - expects `Pending` so we know we took the io_uring path, +/// - registers the task waker with Tokio's uring machinery. +/// +/// Second poll: +/// - happens after the kernel completes the open and Tokio stores the CQE as +/// `Lifecycle::Completed(cqe)` and wakes the task, +/// - polls inner future again to execute `Op::file_metadata`/Statx operation +/// +/// Third poll: +/// - happens after the kernel completes the statx and Tokio stores the CQE as +/// `Lifecycle::Completed(cqe)` and wakes the task, +/// - stays pending forever so the task can be aborted. +/// +/// Aborting the task here drops the inner `Op::file_metadata()` future while Tokio +/// still has a completed CQE sitting in the slab. +struct PollOpenOnceThenNeverRepoll { + inner: Pin>, + poll_senders: Vec>, + poll_pending_counts: usize, +} + +impl Future for PollOpenOnceThenNeverRepoll +where + F::Output: Debug, +{ + type Output = F::Output; + + fn poll(mut self: Pin<&mut Self>, cx: &mut Context<'_>) -> Poll { + let poll_count = self.poll_pending_counts; + let sender_count = self.poll_senders.len(); + if poll_count < sender_count { + // We were polled again after the inner statx completed and woke the task. + if poll_count == sender_count - 1 { + self.poll_senders[poll_count].send(()).unwrap(); + self.poll_pending_counts += 1; + } + // The first two poll should return `Poll::Pending`` because it verifies + // that io_uring is enabled and then executes the open operation in + // `tokio::fs::read` + else { + assert_pending!(self.inner.as_mut().poll(cx)); + self.poll_pending_counts += 1; + self.poll_senders[poll_count].send(()).unwrap(); + } + } + + Poll::Pending + } +} + +async fn completed_then_dropped_before_repoll(path: PathBuf) { + // `tokio::fs::read` has an Open operation occurring (via OpenOptions) before doing a + // Statx operation (via `Op::file_metadata`), we must poll and complete the Open operation + // first to then be able to poll the Statx operation. + const POLL_PENDING_COUNT: usize = 3; + let mut poll_senders: Vec> = Vec::new(); + let mut poll_receivers = Vec::new(); + + for _ in 0..POLL_PENDING_COUNT { + let (sender, receiver) = unbounded_channel(); + poll_senders.push(sender); + poll_receivers.push(receiver); + } + + let handle = tokio::spawn(async move { + let fut = read(&path); + PollOpenOnceThenNeverRepoll { + inner: Box::pin(fut), + poll_senders, + poll_pending_counts: 0, + } + .await + }); + + for (i, poll_receiver) in poll_receivers + .iter_mut() + .enumerate() + .take(POLL_PENDING_COUNT) + { + // Wait until the inner open has been polled once and registered with io_uring. + if i == 0 { + poll_receiver.recv().await.unwrap(); + } else { + // Wait until Tokio wakes the task because the open/statx completed. At this point + // the CQE should already be stored as `Lifecycle::Completed(cqe)`. + let _ = timeout(Duration::from_secs(2), poll_receiver.recv()).await; + } + } + + // Abort now, before the inner statx future gets re-polled and consumes the CQE. + handle.abort(); + let err = handle.await.unwrap_err(); + assert!(err.is_cancelled(), "task was not cancelled as expected"); +} + +#[test] +fn uring_completed_then_dropped() { + if !io_uring_supported() { + return; + } + + let rt = tokio::runtime::Builder::new_multi_thread() + .worker_threads(1) + .enable_all() + .build() + .unwrap(); + + rt.block_on(async { + let before = fd_count(); + let tmp = NamedTempFile::new().unwrap(); + let path = tmp.path().to_path_buf(); + + for _ in 0..128 { + completed_then_dropped_before_repoll(path.clone()).await; + } + + // Give completions a moment to settle before counting fds. + tokio::time::sleep(Duration::from_millis(250)).await; + + let after = fd_count(); + let leaked = after.saturating_sub(before); + + // Since we are opening 128 files, we expect that the related fds + // related to this operation will be closed. Since some other fds + // can be opened in the meantime, we expect this number to be higher + // than the counter before opening the files. This number could be + // lower, but to avoid test flakiness we check that this is at most + // half the number of the file we opened to check if there's a leak. + assert!(leaked <= 64); + }); +} From 3a241593d310bd5c07fb7f3466b400d960009e22 Mon Sep 17 00:00:00 2001 From: Mahdi Ali-Raihan Date: Sun, 31 May 2026 02:07:02 -0400 Subject: [PATCH 11/11] Remove unnecessary utils function, refactored code in statx and statx fd leak test, and removed cfg_io_uring gates (localized the feature gate to the pertinent part of the code that uses it in read_uring) --- tokio/src/fs/mod.rs | 12 ---- tokio/src/fs/read_uring.rs | 43 ++++++-------- tokio/src/fs/write.rs | 69 ++++++++++++---------- tokio/src/io/uring/statx.rs | 29 +++++---- tokio/src/io/uring/utils.rs | 21 ------- tokio/tests/fs_uring_statx_fd_leak_test.rs | 11 +--- 6 files changed, 77 insertions(+), 108 deletions(-) diff --git a/tokio/src/fs/mod.rs b/tokio/src/fs/mod.rs index 315877bc94f..8701d3a1083 100644 --- a/tokio/src/fs/mod.rs +++ b/tokio/src/fs/mod.rs @@ -297,18 +297,6 @@ cfg_windows! { cfg_io_uring! { pub(crate) mod read_uring; - #[cfg( - // libc::statx is only supported on these platforms - // FIXME: Add musl target env when our minimum supported - // rust version is 1.93. To clarify, statx support is - // introduced to musl in 1.25 as mentioned officially here: - // https://musl.libc.org/releases.html. - // However, rustup target_env building for *-linux-musl - // uses 1.25 musl on all *-linux-musl platforms starting - // in 1.93 stable rust version. - // https://blog.rust-lang.org/2025/12/05/Updating-musl-1.2.5/ - any(target_env = "gnu", target_os = "android") - )] pub(crate) use self::read_uring::read_uring; pub(crate) use self::open_options::UringOpenOptions; diff --git a/tokio/src/fs/read_uring.rs b/tokio/src/fs/read_uring.rs index 3d9351ef518..ef5b2980979 100644 --- a/tokio/src/fs/read_uring.rs +++ b/tokio/src/fs/read_uring.rs @@ -1,11 +1,9 @@ -#[allow(unused)] // FIXME: remove when MSRV is 1.93 (due to statx on 1.25 musl) use crate::fs::OpenOptions; use crate::runtime::driver::op::Op; use std::io; use std::io::ErrorKind; use std::os::fd::OwnedFd; -#[allow(unused)] // FIXME: remove when MSRV is 1.93 (due to statx on 1.25 musl) use std::path::Path; // this algorithm is inspired from rust std lib version 1.90.0 @@ -16,10 +14,14 @@ const PROBE_SIZE_U32: u32 = PROBE_SIZE as u32; // Max bytes we can read using io uring submission at a time // SAFETY: cannot be higher than u32::MAX for safe cast // Set to read max 64 MiB at time -#[allow(unused)] // FIXME: remove when MSRV is 1.93 (due to statx on 1.25 musl) const MAX_READ_SIZE: usize = 64 * 1024 * 1024; -cfg_io_uring! { +pub(crate) async fn read_uring(path: &Path) -> io::Result> { + let file = OpenOptions::new().read(true).open(path).await?; + + #[cfg(not(any(target_env = "gnu", target_os = "android")))] + let size_hint: Option = file.metadata().await.map(|m| m.len() as usize).ok(); + #[cfg( // libc::statx is only supported on these platforms // FIXME: Add musl target env when our minimum supported @@ -32,30 +34,25 @@ cfg_io_uring! { // https://blog.rust-lang.org/2025/12/05/Updating-musl-1.2.5/ any(target_env = "gnu", target_os = "android") )] - pub(crate) async fn read_uring(path: &Path) -> io::Result> { - let file = OpenOptions::new().read(true).open(path).await?; - - let size_hint = Op::file_metadata(&file)? - .await - .map(|m| m.len() as usize) - .ok(); + let size_hint = Op::file_metadata(&file)? + .await + .map(|m| m.len() as usize) + .ok(); - let fd: OwnedFd = file - .try_into_std() - .expect("unexpected in-flight operation detected") - .into(); + let fd: OwnedFd = file + .try_into_std() + .expect("unexpected in-flight operation detected") + .into(); - let mut buf = Vec::new(); + let mut buf = Vec::new(); - if let Some(size_hint) = size_hint { - buf.try_reserve(size_hint)?; - } - - read_to_end_uring(fd, buf).await + if let Some(size_hint) = size_hint { + buf.try_reserve(size_hint)?; } + + read_to_end_uring(fd, buf).await } -#[allow(unused)] // FIXME: remove when MSRV is 1.93 (due to statx on 1.25 musl) async fn read_to_end_uring(mut fd: OwnedFd, mut buf: Vec) -> io::Result> { let mut offset = 0; let start_cap = buf.capacity(); @@ -100,7 +97,6 @@ async fn read_to_end_uring(mut fd: OwnedFd, mut buf: Vec) -> io::Result, @@ -130,7 +126,6 @@ async fn small_probe_read( // Takes a length to read and returns a single read in the buffer // // Returns the file descriptor, buffer and EOF reached or not -#[allow(unused)] // FIXME: remove when MSRV is 1.93 (due to statx on 1.25 musl) async fn op_read( mut fd: OwnedFd, mut buf: Vec, diff --git a/tokio/src/fs/write.rs b/tokio/src/fs/write.rs index 6c9c195016c..a2b22fd8cbf 100644 --- a/tokio/src/fs/write.rs +++ b/tokio/src/fs/write.rs @@ -48,44 +48,49 @@ pub async fn write(path: impl AsRef, contents: impl AsRef<[u8]>) -> io::Re write_spawn_blocking(path, contents).await } -cfg_io_uring! { - async fn write_uring(path: &Path, mut buf: OwnedBuf) -> io::Result<()> { - use crate::{fs::OpenOptions, runtime::driver::op::Op}; - use std::os::fd::OwnedFd; +#[cfg(all( + tokio_unstable, + feature = "io-uring", + feature = "rt", + feature = "fs", + target_os = "linux" +))] +async fn write_uring(path: &Path, mut buf: OwnedBuf) -> io::Result<()> { + use crate::{fs::OpenOptions, runtime::driver::op::Op}; + use std::os::fd::OwnedFd; - let file = OpenOptions::new() - .write(true) - .create(true) - .truncate(true) - .open(path) - .await?; + let file = OpenOptions::new() + .write(true) + .create(true) + .truncate(true) + .open(path) + .await?; - let mut fd: OwnedFd = file - .try_into_std() - .expect("unexpected in-flight operation detected") - .into(); + let mut fd: OwnedFd = file + .try_into_std() + .expect("unexpected in-flight operation detected") + .into(); - let total: usize = buf.as_ref().len(); - let mut buf_offset: usize = 0; - let mut file_offset: u64 = 0; - while buf_offset < total { - let (res, _buf, _fd) = Op::write_at(fd, buf, buf_offset, file_offset)?.await; + let total: usize = buf.as_ref().len(); + let mut buf_offset: usize = 0; + let mut file_offset: u64 = 0; + while buf_offset < total { + let (res, _buf, _fd) = Op::write_at(fd, buf, buf_offset, file_offset)?.await; - let n = match res { - Ok(0) => return Err(io::ErrorKind::WriteZero.into()), - Ok(n) => n, - Err(e) if e.kind() == io::ErrorKind::Interrupted => 0, - Err(e) => return Err(e), - }; + let n = match res { + Ok(0) => return Err(io::ErrorKind::WriteZero.into()), + Ok(n) => n, + Err(e) if e.kind() == io::ErrorKind::Interrupted => 0, + Err(e) => return Err(e), + }; - buf = _buf; - fd = _fd; - buf_offset += n as usize; - file_offset += n as u64; - } - - Ok(()) + buf = _buf; + fd = _fd; + buf_offset += n as usize; + file_offset += n as u64; } + + Ok(()) } async fn write_spawn_blocking(path: &Path, contents: OwnedBuf) -> io::Result<()> { diff --git a/tokio/src/io/uring/statx.rs b/tokio/src/io/uring/statx.rs index 22deba36e2a..06a3e7e7bb8 100644 --- a/tokio/src/io/uring/statx.rs +++ b/tokio/src/io/uring/statx.rs @@ -16,10 +16,11 @@ ))] use crate::fs::File; -use crate::io::uring::utils::{box_assume_init, box_new_uninit, cstr}; +use crate::io::uring::utils::cstr; use crate::runtime::driver::op::{CancelData, Cancellable, Completable, CqeResult, Op}; use io_uring::{opcode, types}; use libc::statx; +use std::ffi::{CStr, CString}; use std::fmt::{Debug, Formatter}; use std::io; use std::mem::MaybeUninit; @@ -47,8 +48,7 @@ impl Debug for Metadata { pub(crate) struct Statx { /// This field will be read by the kernel during the operation, so we /// need to ensure it is valid for the entire duration of the operation. - #[allow(dead_code)] - path: std::ffi::CString, + _path: CString, buffer: Box>, } @@ -58,12 +58,11 @@ impl Completable for Statx { fn complete(self, cqe: CqeResult) -> Self::Output { // SAFETY: On success, we always receive 0, which should guarantee // that the information about a file is stored inside the - // statx buffer. On failure, we'll receive an Error value, - // avoiding misuse of `.box_assume_init()`. + // statx buffer. On failure, we'll receive an Error value. // Refer to man page description and return value: // https://man7.org/linux/man-pages/man2/statx.2.html cqe.result - .map(|_| Metadata(*unsafe { box_assume_init(self.buffer) })) + .map(|_| Metadata(unsafe { *self.buffer.as_ptr() })) } fn complete_with_error(self, error: io::Error) -> Self::Output { @@ -82,7 +81,7 @@ impl Op { #[inline] fn statx(path: &Path, flags: i32) -> io::Result> { let path = cstr(path)?; - let mut buffer = box_new_uninit::(); + let mut buffer = Box::new(MaybeUninit::::uninit()); let statx_op = opcode::Statx::new( types::Fd(libc::AT_FDCWD), @@ -94,7 +93,15 @@ impl Op { .build(); // SAFETY: Parameters are valid for the entire duration of the operation - Ok(unsafe { Op::new(statx_op, Statx { path, buffer }) }) + Ok(unsafe { + Op::new( + statx_op, + Statx { + _path: path, + buffer, + }, + ) + }) } /// Retrieves the metadata information of the given path, following symlinks @@ -106,8 +113,8 @@ impl Op { /// Retrieves the metadata information of the given file pub(crate) fn file_metadata(file: &File) -> io::Result> { - let mut buffer = box_new_uninit::(); - let empty_path = cstr(Path::new(""))?; + let mut buffer = Box::new(MaybeUninit::::uninit()); + let empty_path: &'static CStr = c""; // io-uring was introduced in linux 5.1 // pass in an empty path instead of null to target the file descriptor @@ -130,7 +137,7 @@ impl Op { Op::new( statx_op, Statx { - path: empty_path, + _path: empty_path.into(), buffer, }, ) diff --git a/tokio/src/io/uring/utils.rs b/tokio/src/io/uring/utils.rs index cde2442dd49..65185936652 100644 --- a/tokio/src/io/uring/utils.rs +++ b/tokio/src/io/uring/utils.rs @@ -1,4 +1,3 @@ -use std::mem::MaybeUninit; use std::os::fd::{AsRawFd, OwnedFd, RawFd}; use std::os::unix::ffi::OsStrExt; use std::sync::Arc; @@ -30,23 +29,3 @@ impl UringFd for ArcFd { pub(crate) fn cstr(p: &Path) -> io::Result { Ok(CString::new(p.as_os_str().as_bytes())?) } - -// TODO(MSRV 1.82): When bumping MSRV, switch to `Box::::new_uninit()`. -#[allow(unused)] // FIXME: remove when MSRV is 1.93 (due to statx on 1.25 musl) -pub(crate) fn box_new_uninit() -> Box> { - // Box::::new_uninit() - Box::new(MaybeUninit::uninit()) -} - -// TODO(MSRV 1.82): When bumping MSRV, switch to `Box::>::assume_init()`. -/// # Safety -/// -/// It is up to the caller to guarantee that the value really is in an initialized state. -/// Calling this when the content is not yet fully initialized causes immediate undefined behavior. -#[allow(unused)] // FIXME: remove when MSRV is 1.93 (due to statx on 1.25 musl) -pub(crate) unsafe fn box_assume_init(boxed: Box>) -> Box { - let raw = Box::into_raw(boxed); - // SAFETY: If the caller guarantees that the MaybeUninit is initialized, then - // costructing the box from a raw mut ptr of MaybeUninit should be safe. - unsafe { Box::from_raw(raw as *mut T) } -} diff --git a/tokio/tests/fs_uring_statx_fd_leak_test.rs b/tokio/tests/fs_uring_statx_fd_leak_test.rs index c82f9d4dd49..c9d1efad1a1 100644 --- a/tokio/tests/fs_uring_statx_fd_leak_test.rs +++ b/tokio/tests/fs_uring_statx_fd_leak_test.rs @@ -64,19 +64,14 @@ where let poll_count = self.poll_pending_counts; let sender_count = self.poll_senders.len(); if poll_count < sender_count { - // We were polled again after the inner statx completed and woke the task. - if poll_count == sender_count - 1 { - self.poll_senders[poll_count].send(()).unwrap(); - self.poll_pending_counts += 1; - } // The first two poll should return `Poll::Pending`` because it verifies // that io_uring is enabled and then executes the open operation in // `tokio::fs::read` - else { + if poll_count != sender_count - 1 { assert_pending!(self.inner.as_mut().poll(cx)); - self.poll_pending_counts += 1; - self.poll_senders[poll_count].send(()).unwrap(); } + self.poll_pending_counts += 1; + self.poll_senders[poll_count].send(()).unwrap(); } Poll::Pending