@@ -49,14 +49,16 @@ use crate::convert::TryInto;
49
49
use crate :: fs:: { File , Metadata } ;
50
50
use crate :: io:: copy:: generic_copy;
51
51
use crate :: io:: {
52
- BufRead , BufReader , BufWriter , Read , Result , StderrLock , StdinLock , StdoutLock , Take , Write ,
52
+ BufRead , BufReader , BufWriter , Error , Read , Result , StderrLock , StdinLock , StdoutLock , Take ,
53
+ Write ,
53
54
} ;
54
55
use crate :: mem:: ManuallyDrop ;
55
56
use crate :: net:: TcpStream ;
56
57
use crate :: os:: unix:: fs:: FileTypeExt ;
57
58
use crate :: os:: unix:: io:: { AsRawFd , FromRawFd , RawFd } ;
58
59
use crate :: process:: { ChildStderr , ChildStdin , ChildStdout } ;
59
- use crate :: sys:: fs:: { copy_regular_files, sendfile_splice, CopyResult , SpliceMode } ;
60
+ use crate :: ptr;
61
+ use crate :: sys:: cvt;
60
62
61
63
#[ cfg( test) ]
62
64
mod tests;
@@ -423,3 +425,145 @@ fn fd_to_meta<T: AsRawFd>(fd: &T) -> FdMeta {
423
425
Err ( _) => FdMeta :: NoneObtained ,
424
426
}
425
427
}
428
+
429
+ pub ( super ) enum CopyResult {
430
+ Ended ( Result < u64 > ) ,
431
+ Fallback ( u64 ) ,
432
+ }
433
+
434
+ /// linux-specific implementation that will attempt to use copy_file_range for copy offloading
435
+ /// as the name says, it only works on regular files
436
+ ///
437
+ /// Callers must handle fallback to a generic copy loop.
438
+ /// `Fallback` may indicate non-zero number of bytes already written
439
+ /// if one of the files' cursor +`max_len` would exceed u64::MAX (`EOVERFLOW`).
440
+ /// If the initial file offset was 0 then `Fallback` will only contain `0`.
441
+ pub ( super ) fn copy_regular_files ( reader : RawFd , writer : RawFd , max_len : u64 ) -> CopyResult {
442
+ use crate :: cmp;
443
+ use crate :: sync:: atomic:: { AtomicBool , Ordering } ;
444
+
445
+ // Kernel prior to 4.5 don't have copy_file_range
446
+ // We store the availability in a global to avoid unnecessary syscalls
447
+ static HAS_COPY_FILE_RANGE : AtomicBool = AtomicBool :: new ( true ) ;
448
+
449
+ unsafe fn copy_file_range (
450
+ fd_in : libc:: c_int ,
451
+ off_in : * mut libc:: loff_t ,
452
+ fd_out : libc:: c_int ,
453
+ off_out : * mut libc:: loff_t ,
454
+ len : libc:: size_t ,
455
+ flags : libc:: c_uint ,
456
+ ) -> libc:: c_long {
457
+ libc:: syscall ( libc:: SYS_copy_file_range , fd_in, off_in, fd_out, off_out, len, flags)
458
+ }
459
+
460
+ let has_copy_file_range = HAS_COPY_FILE_RANGE . load ( Ordering :: Relaxed ) ;
461
+ let mut written = 0u64 ;
462
+ while written < max_len {
463
+ let copy_result = if has_copy_file_range {
464
+ let bytes_to_copy = cmp:: min ( max_len - written, usize:: MAX as u64 ) ;
465
+ // cap to 2GB chunks in case u64::MAX is passed in as file size and the file has a non-zero offset
466
+ // this allows us to copy large chunks without hitting the limit,
467
+ // unless someone sets a file offset close to u64::MAX - 2GB, in which case the fallback would kick in
468
+ let bytes_to_copy = cmp:: min ( bytes_to_copy as usize , 0x8000_0000usize ) ;
469
+ let copy_result = unsafe {
470
+ // We actually don't have to adjust the offsets,
471
+ // because copy_file_range adjusts the file offset automatically
472
+ cvt ( copy_file_range (
473
+ reader,
474
+ ptr:: null_mut ( ) ,
475
+ writer,
476
+ ptr:: null_mut ( ) ,
477
+ bytes_to_copy,
478
+ 0 ,
479
+ ) )
480
+ } ;
481
+ if let Err ( ref copy_err) = copy_result {
482
+ match copy_err. raw_os_error ( ) {
483
+ Some ( libc:: ENOSYS | libc:: EPERM | libc:: EOPNOTSUPP ) => {
484
+ HAS_COPY_FILE_RANGE . store ( false , Ordering :: Relaxed ) ;
485
+ }
486
+ _ => { }
487
+ }
488
+ }
489
+ copy_result
490
+ } else {
491
+ Err ( Error :: from_raw_os_error ( libc:: ENOSYS ) )
492
+ } ;
493
+ match copy_result {
494
+ Ok ( 0 ) if written == 0 => {
495
+ // fallback to work around several kernel bugs where copy_file_range will fail to
496
+ // copy any bytes and return 0 instead of an error if
497
+ // - reading virtual files from the proc filesystem which appear to have 0 size
498
+ // but are not empty. noted in coreutils to affect kernels at least up to 5.6.19.
499
+ // - copying from an overlay filesystem in docker. reported to occur on fedora 32.
500
+ return CopyResult :: Fallback ( 0 ) ;
501
+ }
502
+ Ok ( 0 ) => return CopyResult :: Ended ( Ok ( written) ) , // reached EOF
503
+ Ok ( ret) => written += ret as u64 ,
504
+ Err ( err) => {
505
+ return match err. raw_os_error ( ) {
506
+ // when file offset + max_length > u64::MAX
507
+ Some ( libc:: EOVERFLOW ) => CopyResult :: Fallback ( written) ,
508
+ Some (
509
+ libc:: ENOSYS | libc:: EXDEV | libc:: EINVAL | libc:: EPERM | libc:: EOPNOTSUPP ,
510
+ ) => {
511
+ // Try fallback io::copy if either:
512
+ // - Kernel version is < 4.5 (ENOSYS)
513
+ // - Files are mounted on different fs (EXDEV)
514
+ // - copy_file_range is broken in various ways on RHEL/CentOS 7 (EOPNOTSUPP)
515
+ // - copy_file_range is disallowed, for example by seccomp (EPERM)
516
+ // - copy_file_range cannot be used with pipes or device nodes (EINVAL)
517
+ assert_eq ! ( written, 0 ) ;
518
+ CopyResult :: Fallback ( 0 )
519
+ }
520
+ _ => CopyResult :: Ended ( Err ( err) ) ,
521
+ } ;
522
+ }
523
+ }
524
+ }
525
+ CopyResult :: Ended ( Ok ( written) )
526
+ }
527
+
528
+ #[ derive( PartialEq ) ]
529
+ enum SpliceMode {
530
+ Sendfile ,
531
+ Splice ,
532
+ }
533
+
534
+ /// performs splice or sendfile between file descriptors
535
+ /// Does _not_ fall back to a generic copy loop.
536
+ fn sendfile_splice ( mode : SpliceMode , reader : RawFd , writer : RawFd , len : u64 ) -> CopyResult {
537
+ let mut written = 0u64 ;
538
+ while written < len {
539
+ let chunk_size = crate :: cmp:: min ( len - written, 0x7ffff000_u64 ) as usize ;
540
+
541
+ let result = match mode {
542
+ SpliceMode :: Sendfile => {
543
+ cvt ( unsafe { libc:: sendfile ( writer, reader, ptr:: null_mut ( ) , chunk_size) } )
544
+ }
545
+ SpliceMode :: Splice => cvt ( unsafe {
546
+ libc:: splice ( reader, ptr:: null_mut ( ) , writer, ptr:: null_mut ( ) , chunk_size, 0 )
547
+ } ) ,
548
+ } ;
549
+
550
+ match result {
551
+ Ok ( 0 ) => break , // EOF
552
+ Ok ( ret) => written += ret as u64 ,
553
+ Err ( err) => {
554
+ return match err. raw_os_error ( ) {
555
+ Some ( os_err) if os_err == libc:: EINVAL => {
556
+ // splice/sendfile do not support this particular file descritor (EINVAL)
557
+ assert_eq ! ( written, 0 ) ;
558
+ CopyResult :: Fallback ( 0 )
559
+ }
560
+ Some ( os_err) if mode == SpliceMode :: Sendfile && os_err == libc:: EOVERFLOW => {
561
+ CopyResult :: Fallback ( written)
562
+ }
563
+ _ => CopyResult :: Ended ( Err ( err) ) ,
564
+ } ;
565
+ }
566
+ }
567
+ }
568
+ CopyResult :: Ended ( Ok ( written) )
569
+ }
0 commit comments