-
Notifications
You must be signed in to change notification settings - Fork 456
pkg: rewrite archive extraction with typed tar capabilities #12917
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Draft
Alizter
wants to merge
1
commit into
ocaml:main
Choose a base branch
from
Alizter:push-tkrvytnyslnk
base: main
Could not load branches
Branch not found: {{ refName }}
Loading
Could not load tags
Nothing to show
Loading
Are you sure you want to change the base?
Some commits from the old base branch may be removed from the timeline,
and old review comments may become outdated.
+532
−241
Draft
Changes from all commits
Commits
File filter
Filter by extension
Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
There are no files selected for viewing
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,362 @@ | ||
| open Stdune | ||
| module Process = Dune_engine.Process | ||
| open Fiber.O | ||
|
|
||
| (** {1 Archive Extraction} | ||
|
|
||
| This module handles extraction of .tar, .tar.gz, .tar.bz2, and .zip archives | ||
| using system tools (tar, bsdtar, unzip). The main complexity comes from | ||
| supporting different tar implementations with varying capabilities: | ||
|
|
||
| - BSD tar (libarchive): Auto-detects compression, supports zip extraction | ||
| - GNU tar: Auto-detects compression, no zip support | ||
| - Other (OpenBSD, busybox): Requires explicit -z/-j flags, no zip support | ||
|
|
||
| We use GADTs with polymorphic variants to encode these capabilities at the | ||
| type level. [Tar.t] has a type parameter indicating which formats it supports, | ||
| and [Tar.args] can only be called with formats valid for that tar. This | ||
| prevents invalid combinations (like extracting zip with GNU tar) at compile | ||
| time rather than runtime. | ||
|
|
||
| For zip extraction, we prefer BSD tar when available, falling back to the | ||
| [unzip] binary if BSD tar is not found. *) | ||
|
|
||
| module Format : sig | ||
| (** Supported archive formats: | ||
| - [`Tar]: uncompressed tar (.tar) | ||
| - [`Tar_gz]: gzip-compressed tar (.tar.gz, .tgz) | ||
| - [`Tar_bz2]: bzip2-compressed tar (.tar.bz2, .tbz) | ||
| - [`Zip]: zip archive (.zip) *) | ||
| type t = | ||
| [ `Tar | ||
| | `Tar_gz | ||
| | `Tar_bz2 | ||
| | `Zip | ||
| ] | ||
|
|
||
| (** Detect archive format from filename extension. Returns the format and | ||
| the matched extension. *) | ||
| val of_filename : Filename.t -> (t * string) option | ||
| end = struct | ||
| type t = | ||
| [ `Tar | ||
| | `Tar_gz | ||
| | `Tar_bz2 | ||
| | `Zip | ||
| ] | ||
|
|
||
| let of_filename = | ||
| let extensions = | ||
| [ ".tar", `Tar | ||
| ; ".tar.gz", `Tar_gz | ||
| ; ".tgz", `Tar_gz | ||
| ; ".tar.bz2", `Tar_bz2 | ||
| ; ".tbz", `Tar_bz2 | ||
| ; ".zip", `Zip | ||
| ] | ||
| in | ||
| fun filename -> | ||
| let check_suffix suffix = Filename.check_suffix filename suffix in | ||
| List.find_map extensions ~f:(fun (ext, format) -> | ||
| Option.some_if (check_suffix ext) (format, ext)) | ||
| ;; | ||
| end | ||
|
|
||
| let is_supported filename = Option.is_some (Format.of_filename filename) | ||
| let which bin_name = Bin.which ~path:(Env_path.path Env.initial) bin_name | ||
|
|
||
| module Error = struct | ||
| type t = | ||
| | No_extractor of | ||
| { ext : string | ||
| ; tried : string list | ||
| } | ||
| | Command_failed of | ||
| { bin : Path.t | ||
| ; archive : Path.t | ||
| ; exit_code : int | ||
| ; stderr : string list | ||
| } | ||
| | Read_dir_failed of | ||
| { archive : Path.t | ||
| ; error : Unix_error.Detailed.t | ||
| } | ||
|
|
||
| let message = function | ||
| | No_extractor { ext; tried } -> | ||
| [ Pp.textf "No program found to extract %s files. Tried:" ext | ||
| ; Pp.enumerate tried ~f:Pp.verbatim | ||
| ] | ||
| | Command_failed { bin; archive; exit_code; stderr } -> | ||
| [ Pp.textf "Failed to extract '%s':" (Path.basename archive) | ||
| ; Pp.hovbox | ||
| (Pp.concat | ||
| ~sep:Pp.space | ||
| [ User_message.command (Path.basename bin) | ||
| ; Pp.textf "failed with non-zero exit code '%d' and output:" exit_code | ||
| ]) | ||
| ; Pp.vbox (Pp.concat_map ~sep:Pp.cut ~f:Pp.paragraph stderr) | ||
| ] | ||
| | Read_dir_failed { archive; error } -> | ||
| [ Pp.textf "Failed to extract '%s':" (Path.to_string_maybe_quoted archive) | ||
| ; Pp.text (Unix_error.Detailed.to_string_hum error) | ||
| ] | ||
| ;; | ||
|
|
||
| let raise t = User_error.raise (message t) | ||
| end | ||
|
|
||
| module Tar : sig | ||
| (** Tar executable for current system. The type parameter encodes which | ||
| formats this tar supports - pattern match on [found] to get the right | ||
| type. | ||
|
|
||
| {v | ||
| | Kind | Implementations | Detects compression | Zip support | | ||
| |-------|--------------------------|---------------------|-------------| | ||
| | Bsd | bsdtar, libarchive | yes | yes | | ||
| | | (macOS, FreeBSD, Win10+) | | | | ||
| | | | | | | ||
| | Gnu | GNU tar (most Linux) | yes | no | | ||
| | | | | | | ||
| | Other | OpenBSD tar, busybox tar | no | no | | ||
| v} | ||
|
|
||
| [Other] is the fallback for unknown implementations. We use explicit | ||
| [-z]/[-j] flags which is the safest approach. *) | ||
| type 'fmt t | ||
|
|
||
| (** Archive formats supported by BSD tar. *) | ||
| type bsd = | ||
| [ `Tar | ||
| | `Tar_gz | ||
| | `Tar_bz2 | ||
| | `Zip | ||
| ] | ||
|
|
||
| (** Result of finding tar. Pattern match to get the appropriately typed tar. *) | ||
| type found = | ||
| | Bsd of bsd t | ||
| | Gnu of [ `Tar | `Tar_gz | `Tar_bz2 ] t | ||
| | Other of [ `Tar | `Tar_gz | `Tar_bz2 ] t | ||
|
|
||
| (** Attempt to find tar executable for current system. *) | ||
| val find : found option Fiber.Lazy.t | ||
|
|
||
| (** Names of binaries checked when attempting to find tar executable. *) | ||
| val bin_names : string list | ||
|
|
||
| (** Path to tar executable. *) | ||
| val path : 'a t -> Path.t | ||
|
|
||
| (** [args t format ~archive ~target] provides the arguments for extraction. | ||
| The [format] must be in the set of formats supported by [t]. *) | ||
| val args : 'fmt t -> format:'fmt -> archive:Path.t -> target:Path.t -> string list | ||
| end = struct | ||
| type _ kind = | ||
| | K_bsd : [ `Tar | `Tar_gz | `Tar_bz2 | `Zip ] kind | ||
| | K_gnu : [ `Tar | `Tar_gz | `Tar_bz2 ] kind | ||
| | K_other : [ `Tar | `Tar_gz | `Tar_bz2 ] kind | ||
|
|
||
| type 'fmt t = | ||
| { path : Path.t | ||
| ; kind : 'fmt kind | ||
| } | ||
|
|
||
| type bsd = | ||
| [ `Tar | ||
| | `Tar_gz | ||
| | `Tar_bz2 | ||
| | `Zip | ||
| ] | ||
|
|
||
| type found = | ||
| | Bsd of bsd t | ||
| | Gnu of [ `Tar | `Tar_gz | `Tar_bz2 ] t | ||
| | Other of [ `Tar | `Tar_gz | `Tar_bz2 ] t | ||
|
|
||
| (** The order of binaries we should try from most capable to least + Windows. *) | ||
| let bin_names = [ "bsdtar"; "tar"; "gtar"; "tar.exe" ] | ||
|
|
||
| let find = | ||
| Fiber.Lazy.create (fun () -> | ||
| match List.find_map bin_names ~f:which with | ||
| | None -> Fiber.return None | ||
| | Some path -> | ||
| let+ output, _ = Process.run_capture ~display:Quiet Return path [ "--version" ] in | ||
| let matches s = Re.execp (Re.compile (Re.str s)) output in | ||
| Some | ||
| (if matches "bsdtar" || matches "libarchive" | ||
| then Bsd { path; kind = K_bsd } | ||
| else if matches "GNU tar" | ||
| then Gnu { path; kind = K_gnu } | ||
| else Other { path; kind = K_other })) | ||
| ;; | ||
|
|
||
| let path t = t.path | ||
|
|
||
| let args (type fmt) (t : fmt t) ~(format : fmt) ~archive ~target = | ||
| let decompress_flag = | ||
| match t.kind, format with | ||
| | K_bsd, (`Tar | `Tar_gz | `Tar_bz2 | `Zip) -> [] | ||
| | K_gnu, (`Tar | `Tar_gz | `Tar_bz2) -> [] | ||
| | K_other, `Tar -> [] | ||
| | K_other, `Tar_gz -> [ "-z" ] | ||
|
Collaborator
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. @shonfeder Here is the |
||
| | K_other, `Tar_bz2 -> [ "-j" ] | ||
| in | ||
| [ "-x" ] | ||
| @ decompress_flag | ||
| @ [ "-f"; Path.to_string archive; "-C"; Path.to_string target ] | ||
| ;; | ||
| end | ||
|
|
||
| module Unzip : sig | ||
| (** A binary that can extract zip files. *) | ||
| type t | ||
|
|
||
| (** Find a binary that can extract zip files. *) | ||
| val find : t option Fiber.Lazy.t | ||
|
|
||
| (** Names of binaries checked when attempting to find binary that can extract | ||
| zip files. *) | ||
| val bin_names : string list | ||
|
|
||
| (** Path to binary that can extract zip files. *) | ||
| val path : t -> Path.t | ||
|
|
||
| (** Arguments given to binary that can extract zip files. *) | ||
| val args : t -> archive:Path.t -> target:Path.t -> string list | ||
| end = struct | ||
| type t = | ||
| | Unzip of Path.t | ||
| | Tar of Tar.bsd Tar.t | ||
|
|
||
| let bin_names = Tar.bin_names @ [ "unzip" ] | ||
|
|
||
| let find = | ||
| (* We first check if we have a tar binary capable of unzipping, if not we | ||
| fallback to unzip. *) | ||
| Fiber.Lazy.create (fun () -> | ||
| Fiber.Lazy.force Tar.find | ||
| >>| function | ||
| | Some (Bsd tar) -> Some (Tar tar) | ||
| | Some (Gnu _ | Other _) | None -> | ||
| (match which "unzip" with | ||
| | Some path -> Some (Unzip path) | ||
| | None -> None)) | ||
| ;; | ||
|
|
||
| let path = function | ||
| | Unzip path -> path | ||
| | Tar tar -> Tar.path tar | ||
| ;; | ||
|
|
||
| let args t ~archive ~target = | ||
| match t with | ||
| | Unzip _ -> [ Path.to_string archive; "-d"; Path.to_string target ] | ||
| | Tar tar -> Tar.args tar ~format:`Zip ~archive ~target | ||
| ;; | ||
| end | ||
|
|
||
| module Extractor : sig | ||
| (** [Extractor.run ~archive ~target] extracts the given [archive] using an | ||
| appropriate tool to do so. *) | ||
| val run : archive:Path.t -> target:Path.t -> (unit, Error.t) result Fiber.t | ||
| end = struct | ||
| let no_extractor_error ~ext ~bin_names = | ||
| Fiber.return @@ Error (Error.No_extractor { ext; tried = bin_names }) | ||
| ;; | ||
|
|
||
| let run_cmd ~bin ~args ~archive = | ||
| let temp_stderr = Temp.create File ~prefix:"extract" ~suffix:"stderr" in | ||
| Fiber.finalize ~finally:(fun () -> | ||
| Temp.destroy File temp_stderr; | ||
| Fiber.return ()) | ||
| @@ fun () -> | ||
| let+ (), exit_code = | ||
| Process.run | ||
| ~display:Quiet | ||
| ~stdout_to: | ||
| (Process.Io.make_stdout ~output_on_success:Swallow ~output_limit:1_000_000) | ||
| ~stderr_to:(Process.Io.file temp_stderr Out) | ||
| Return | ||
| bin | ||
| args | ||
| in | ||
| if exit_code <> 0 | ||
| then | ||
| Io.with_file_in temp_stderr ~f:(fun err_channel -> | ||
| let stderr = Io.input_lines err_channel in | ||
| Error (Error.Command_failed { bin; archive; exit_code; stderr })) | ||
| else Ok () | ||
| ;; | ||
|
|
||
| let run ~archive ~target = | ||
| let format, ext = | ||
| Format.of_filename (Path.to_string archive) |> Option.value ~default:(`Tar, ".tar") | ||
| in | ||
| match (format : Format.t) with | ||
| | `Zip -> | ||
| Fiber.Lazy.force Unzip.find | ||
| >>= (function | ||
| | Some unzip -> | ||
| let bin = Unzip.path unzip in | ||
| let args = Unzip.args unzip ~archive ~target in | ||
| run_cmd ~bin ~args ~archive | ||
| | None -> no_extractor_error ~ext ~bin_names:Unzip.bin_names) | ||
| | (`Tar | `Tar_gz | `Tar_bz2) as format -> | ||
| Fiber.Lazy.force Tar.find | ||
| >>= (function | ||
| | Some (Bsd tar) -> | ||
| let bin = Tar.path tar in | ||
| let args = Tar.args tar ~format ~archive ~target in | ||
| run_cmd ~bin ~args ~archive | ||
| | Some (Gnu tar | Other tar) -> | ||
| let bin = Tar.path tar in | ||
| let args = Tar.args tar ~format ~archive ~target in | ||
| run_cmd ~bin ~args ~archive | ||
| | None -> no_extractor_error ~ext ~bin_names:Tar.bin_names) | ||
| ;; | ||
| end | ||
|
|
||
| (** Make extraction atomic by extracting to a temp directory and renaming. *) | ||
| let extract ~archive ~target = | ||
| let target_in_temp = | ||
| let prefix = Path.basename target in | ||
| let suffix = Path.basename archive in | ||
| Temp_dir.dir_for_target ~target ~prefix ~suffix | ||
| in | ||
| Fiber.finalize ~finally:(fun () -> | ||
| Temp.destroy Dir target_in_temp; | ||
| Fiber.return ()) | ||
| @@ fun () -> | ||
| Path.mkdir_p target_in_temp; | ||
| Extractor.run ~archive ~target:target_in_temp | ||
| >>| function | ||
| | Error _ as e -> e | ||
| | Ok () -> | ||
| (* Opam expects only one top-level directory and then strips it. We employ a | ||
| similar heuristic here. Opam is stricter in the multiple top-level entries | ||
| case since it errors, whereas we use the extraction directory as-is. *) | ||
| let target_in_temp = | ||
| match Path.readdir_unsorted_with_kinds target_in_temp with | ||
| | Error error -> Error (Error.Read_dir_failed { archive; error }) | ||
| | Ok [ (fname, S_DIR) ] -> Ok (Path.relative target_in_temp fname) | ||
| | Ok _ -> Ok target_in_temp | ||
| in | ||
| (match target_in_temp with | ||
| | Error _ as e -> e | ||
| | Ok target_in_temp -> | ||
| (* CR-soon Alizter: this might already exist, don't do pointeless work *) | ||
| Path.mkdir_p (Path.parent_exn target); | ||
| (* CR-someday Alizter: Add fallback to copy if EXDEV raised here *) | ||
| Path.rename target_in_temp target; | ||
| Ok ()) | ||
| ;; | ||
|
|
||
| let extract_exn ~archive ~target = | ||
| extract ~archive ~target | ||
| >>| function | ||
| | Ok () -> () | ||
| | Error e -> Error.raise e | ||
| ;; | ||
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,35 @@ | ||
| open Stdune | ||
|
|
||
| (** Returns [true] if the filename has a supported archive extension | ||
| (.tar, .tar.gz, .tgz, .tar.bz2, .tbz, .zip). *) | ||
| val is_supported : Filename.t -> bool | ||
|
|
||
| module Error : sig | ||
| type t = private | ||
| | No_extractor of | ||
| { ext : string | ||
| ; tried : string list | ||
| } | ||
| | Command_failed of | ||
| { bin : Path.t | ||
| ; archive : Path.t | ||
| ; exit_code : int | ||
| ; stderr : string list | ||
| } | ||
| | Read_dir_failed of | ||
| { archive : Path.t | ||
| ; error : Unix_error.Detailed.t | ||
| } | ||
|
|
||
| val message : t -> User_message.Style.t Pp.t list | ||
| val raise : t -> 'a | ||
| end | ||
|
|
||
| (** [extract ~archive ~target] extracts the archive at [archive] into the | ||
| directory at [target], creating the directory if it doesn't already exist. | ||
| The archive format is determined from the file extension. If the extension | ||
| is not recognized, defaults to tar. *) | ||
| val extract : archive:Path.t -> target:Path.t -> (unit, Error.t) result Fiber.t | ||
|
|
||
| (** Same as [extract] but raises [User_error] on failure. *) | ||
| val extract_exn : archive:Path.t -> target:Path.t -> unit Fiber.t |
Oops, something went wrong.
Oops, something went wrong.
Add this suggestion to a batch that can be applied as a single commit.
This suggestion is invalid because no changes were made to the code.
Suggestions cannot be applied while the pull request is closed.
Suggestions cannot be applied while viewing a subset of changes.
Only one suggestion per line can be applied in a batch.
Add this suggestion to a batch that can be applied as a single commit.
Applying suggestions on deleted lines is not supported.
You must change the existing code in this line in order to create a valid suggestion.
Outdated suggestions cannot be applied.
This suggestion has been applied or marked resolved.
Suggestions cannot be applied from pending reviews.
Suggestions cannot be applied on multi-line comments.
Suggestions cannot be applied while the pull request is queued to merge.
Suggestion cannot be applied right now. Please check back later.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
@shonfeder this is the tar binary on Open BSD
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I am not sure why your are pointing these out to me, but thanks :)
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
You mentioned if this solves #12818. The issue there is about openbsd not invoking tar correctly. Here we are doing the suggested fix.
Uh oh!
There was an error while loading. Please reload this page.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
The title of that issue is "If dune tools install fails to install a tool it can break a user's environment". It happened that the failure in that case was caused by the tar issue, but that is covered in #10123, which is a separate issue.
See #12818 (comment)
I think your reply here helps clarify that the answer to my question is 'no', it does not close #12818.