|
| 1 | +open Stdune |
| 2 | +module Process = Dune_engine.Process |
| 3 | +open Fiber.O |
| 4 | + |
| 5 | +(** {1 Archive Extraction} |
| 6 | +
|
| 7 | + This module handles extraction of .tar, .tar.gz, .tar.bz2, and .zip archives |
| 8 | + using system tools (tar, bsdtar, unzip). The main complexity comes from |
| 9 | + supporting different tar implementations with varying capabilities: |
| 10 | +
|
| 11 | + - BSD tar (libarchive): Auto-detects compression, supports zip extraction |
| 12 | + - GNU tar: Auto-detects compression, no zip support |
| 13 | + - Other (OpenBSD, busybox): Requires explicit -z/-j flags, no zip support |
| 14 | +
|
| 15 | + We use GADTs with polymorphic variants to encode these capabilities at the |
| 16 | + type level. [Tar.t] has a type parameter indicating which formats it supports, |
| 17 | + and [Tar.args] can only be called with formats valid for that tar. This |
| 18 | + prevents invalid combinations (like extracting zip with GNU tar) at compile |
| 19 | + time rather than runtime. |
| 20 | +
|
| 21 | + For zip extraction, we prefer BSD tar when available, falling back to the |
| 22 | + [unzip] binary if BSD tar is not found. *) |
| 23 | + |
| 24 | +module Format : sig |
| 25 | + (** Supported archive formats: |
| 26 | + - [`Tar]: uncompressed tar (.tar) |
| 27 | + - [`Tar_gz]: gzip-compressed tar (.tar.gz, .tgz) |
| 28 | + - [`Tar_bz2]: bzip2-compressed tar (.tar.bz2, .tbz) |
| 29 | + - [`Zip]: zip archive (.zip) *) |
| 30 | + type t = |
| 31 | + [ `Tar |
| 32 | + | `Tar_gz |
| 33 | + | `Tar_bz2 |
| 34 | + | `Zip |
| 35 | + ] |
| 36 | + |
| 37 | + (** Detect archive format from filename extension. Returns the format and |
| 38 | + the matched extension. *) |
| 39 | + val of_filename : Filename.t -> (t * string) option |
| 40 | +end = struct |
| 41 | + type t = |
| 42 | + [ `Tar |
| 43 | + | `Tar_gz |
| 44 | + | `Tar_bz2 |
| 45 | + | `Zip |
| 46 | + ] |
| 47 | + |
| 48 | + let of_filename = |
| 49 | + let extensions = |
| 50 | + [ ".tar", `Tar |
| 51 | + ; ".tar.gz", `Tar_gz |
| 52 | + ; ".tgz", `Tar_gz |
| 53 | + ; ".tar.bz2", `Tar_bz2 |
| 54 | + ; ".tbz", `Tar_bz2 |
| 55 | + ; ".zip", `Zip |
| 56 | + ] |
| 57 | + in |
| 58 | + fun filename -> |
| 59 | + let check_suffix suffix = Filename.check_suffix filename suffix in |
| 60 | + List.find_map extensions ~f:(fun (ext, format) -> |
| 61 | + Option.some_if (check_suffix ext) (format, ext)) |
| 62 | + ;; |
| 63 | +end |
| 64 | + |
| 65 | +let is_supported filename = Option.is_some (Format.of_filename filename) |
| 66 | +let which bin_name = Bin.which ~path:(Env_path.path Env.initial) bin_name |
| 67 | + |
| 68 | +module Error = struct |
| 69 | + type t = |
| 70 | + | No_extractor of |
| 71 | + { ext : string |
| 72 | + ; tried : string list |
| 73 | + } |
| 74 | + | Command_failed of |
| 75 | + { bin : Path.t |
| 76 | + ; archive : Path.t |
| 77 | + ; exit_code : int |
| 78 | + ; stderr : string list |
| 79 | + } |
| 80 | + | Read_dir_failed of |
| 81 | + { archive : Path.t |
| 82 | + ; error : Unix_error.Detailed.t |
| 83 | + } |
| 84 | + |
| 85 | + let message = function |
| 86 | + | No_extractor { ext; tried } -> |
| 87 | + [ Pp.textf "No program found to extract %s files. Tried:" ext |
| 88 | + ; Pp.enumerate tried ~f:Pp.verbatim |
| 89 | + ] |
| 90 | + | Command_failed { bin; archive; exit_code; stderr } -> |
| 91 | + [ Pp.textf "Failed to extract '%s':" (Path.basename archive) |
| 92 | + ; Pp.hovbox |
| 93 | + (Pp.concat |
| 94 | + ~sep:Pp.space |
| 95 | + [ User_message.command (Path.basename bin) |
| 96 | + ; Pp.textf "failed with non-zero exit code '%d' and output:" exit_code |
| 97 | + ]) |
| 98 | + ; Pp.vbox (Pp.concat_map ~sep:Pp.cut ~f:Pp.paragraph stderr) |
| 99 | + ] |
| 100 | + | Read_dir_failed { archive; error } -> |
| 101 | + [ Pp.textf "Failed to extract '%s':" (Path.to_string_maybe_quoted archive) |
| 102 | + ; Pp.text (Unix_error.Detailed.to_string_hum error) |
| 103 | + ] |
| 104 | + ;; |
| 105 | + |
| 106 | + let raise t = User_error.raise (message t) |
| 107 | +end |
| 108 | + |
| 109 | +module Tar : sig |
| 110 | + (** Tar executable for current system. The type parameter encodes which |
| 111 | + formats this tar supports - pattern match on [found] to get the right |
| 112 | + type. |
| 113 | +
|
| 114 | + {v |
| 115 | + | Kind | Implementations | Detects compression | Zip support | |
| 116 | + |-------|--------------------------|---------------------|-------------| |
| 117 | + | Bsd | bsdtar, libarchive | yes | yes | |
| 118 | + | | (macOS, FreeBSD, Win10+) | | | |
| 119 | + | | | | | |
| 120 | + | Gnu | GNU tar (most Linux) | yes | no | |
| 121 | + | | | | | |
| 122 | + | Other | OpenBSD tar, busybox tar | no | no | |
| 123 | + v} |
| 124 | +
|
| 125 | + [Other] is the fallback for unknown implementations. We use explicit |
| 126 | + [-z]/[-j] flags which is the safest approach. *) |
| 127 | + type 'fmt t |
| 128 | + |
| 129 | + (** Archive formats supported by BSD tar. *) |
| 130 | + type bsd = |
| 131 | + [ `Tar |
| 132 | + | `Tar_gz |
| 133 | + | `Tar_bz2 |
| 134 | + | `Zip |
| 135 | + ] |
| 136 | + |
| 137 | + (** Result of finding tar. Pattern match to get the appropriately typed tar. *) |
| 138 | + type found = |
| 139 | + | Bsd of bsd t |
| 140 | + | Gnu of [ `Tar | `Tar_gz | `Tar_bz2 ] t |
| 141 | + | Other of [ `Tar | `Tar_gz | `Tar_bz2 ] t |
| 142 | + |
| 143 | + (** Attempt to find tar executable for current system. *) |
| 144 | + val find : found option Fiber.Lazy.t |
| 145 | + |
| 146 | + (** Names of binaries checked when attempting to find tar executable. *) |
| 147 | + val bin_names : string list |
| 148 | + |
| 149 | + (** Path to tar executable. *) |
| 150 | + val path : 'a t -> Path.t |
| 151 | + |
| 152 | + (** [args t format ~archive ~target] provides the arguments for extraction. |
| 153 | + The [format] must be in the set of formats supported by [t]. *) |
| 154 | + val args : 'fmt t -> format:'fmt -> archive:Path.t -> target:Path.t -> string list |
| 155 | +end = struct |
| 156 | + type _ kind = |
| 157 | + | K_bsd : [ `Tar | `Tar_gz | `Tar_bz2 | `Zip ] kind |
| 158 | + | K_gnu : [ `Tar | `Tar_gz | `Tar_bz2 ] kind |
| 159 | + | K_other : [ `Tar | `Tar_gz | `Tar_bz2 ] kind |
| 160 | + |
| 161 | + type 'fmt t = |
| 162 | + { path : Path.t |
| 163 | + ; kind : 'fmt kind |
| 164 | + } |
| 165 | + |
| 166 | + type bsd = |
| 167 | + [ `Tar |
| 168 | + | `Tar_gz |
| 169 | + | `Tar_bz2 |
| 170 | + | `Zip |
| 171 | + ] |
| 172 | + |
| 173 | + type found = |
| 174 | + | Bsd of bsd t |
| 175 | + | Gnu of [ `Tar | `Tar_gz | `Tar_bz2 ] t |
| 176 | + | Other of [ `Tar | `Tar_gz | `Tar_bz2 ] t |
| 177 | + |
| 178 | + (** The order of binaries we should try from most capable to least + Windows. *) |
| 179 | + let bin_names = [ "bsdtar"; "tar"; "gtar"; "tar.exe" ] |
| 180 | + |
| 181 | + let find = |
| 182 | + Fiber.Lazy.create (fun () -> |
| 183 | + match List.find_map bin_names ~f:which with |
| 184 | + | None -> Fiber.return None |
| 185 | + | Some path -> |
| 186 | + let+ output, _ = Process.run_capture ~display:Quiet Return path [ "--version" ] in |
| 187 | + let matches s = Re.execp (Re.compile (Re.str s)) output in |
| 188 | + Some |
| 189 | + (if matches "bsdtar" || matches "libarchive" |
| 190 | + then Bsd { path; kind = K_bsd } |
| 191 | + else if matches "GNU tar" |
| 192 | + then Gnu { path; kind = K_gnu } |
| 193 | + else Other { path; kind = K_other })) |
| 194 | + ;; |
| 195 | + |
| 196 | + let path t = t.path |
| 197 | + |
| 198 | + let args (type fmt) (t : fmt t) ~(format : fmt) ~archive ~target = |
| 199 | + let decompress_flag = |
| 200 | + match t.kind, format with |
| 201 | + | K_bsd, (`Tar | `Tar_gz | `Tar_bz2 | `Zip) -> [] |
| 202 | + | K_gnu, (`Tar | `Tar_gz | `Tar_bz2) -> [] |
| 203 | + | K_other, `Tar -> [] |
| 204 | + | K_other, `Tar_gz -> [ "-z" ] |
| 205 | + | K_other, `Tar_bz2 -> [ "-j" ] |
| 206 | + in |
| 207 | + [ "-x" ] |
| 208 | + @ decompress_flag |
| 209 | + @ [ "-f"; Path.to_string archive; "-C"; Path.to_string target ] |
| 210 | + ;; |
| 211 | +end |
| 212 | + |
| 213 | +module Unzip : sig |
| 214 | + (** A binary that can extract zip files. *) |
| 215 | + type t |
| 216 | + |
| 217 | + (** Find a binary that can extract zip files. *) |
| 218 | + val find : t option Fiber.Lazy.t |
| 219 | + |
| 220 | + (** Names of binaries checked when attempting to find binary that can extract |
| 221 | + zip files. *) |
| 222 | + val bin_names : string list |
| 223 | + |
| 224 | + (** Path to binary that can extract zip files. *) |
| 225 | + val path : t -> Path.t |
| 226 | + |
| 227 | + (** Arguments given to binary that can extract zip files. *) |
| 228 | + val args : t -> archive:Path.t -> target:Path.t -> string list |
| 229 | +end = struct |
| 230 | + type t = |
| 231 | + | Unzip of Path.t |
| 232 | + | Tar of Tar.bsd Tar.t |
| 233 | + |
| 234 | + let bin_names = Tar.bin_names @ [ "unzip" ] |
| 235 | + |
| 236 | + let find = |
| 237 | + (* We first check if we have a tar binary capable of unzipping, if not we |
| 238 | + fallback to unzip. *) |
| 239 | + Fiber.Lazy.create (fun () -> |
| 240 | + Fiber.Lazy.force Tar.find |
| 241 | + >>| function |
| 242 | + | Some (Bsd tar) -> Some (Tar tar) |
| 243 | + | Some (Gnu _ | Other _) | None -> |
| 244 | + (match which "unzip" with |
| 245 | + | Some path -> Some (Unzip path) |
| 246 | + | None -> None)) |
| 247 | + ;; |
| 248 | + |
| 249 | + let path = function |
| 250 | + | Unzip path -> path |
| 251 | + | Tar tar -> Tar.path tar |
| 252 | + ;; |
| 253 | + |
| 254 | + let args t ~archive ~target = |
| 255 | + match t with |
| 256 | + | Unzip _ -> [ Path.to_string archive; "-d"; Path.to_string target ] |
| 257 | + | Tar tar -> Tar.args tar ~format:`Zip ~archive ~target |
| 258 | + ;; |
| 259 | +end |
| 260 | + |
| 261 | +module Extractor : sig |
| 262 | + (** [Extractor.run ~archive ~target] extracts the given [archive] using an |
| 263 | + appropriate tool to do so. *) |
| 264 | + val run : archive:Path.t -> target:Path.t -> (unit, Error.t) result Fiber.t |
| 265 | +end = struct |
| 266 | + let no_extractor_error ~ext ~bin_names = |
| 267 | + Fiber.return @@ Error (Error.No_extractor { ext; tried = bin_names }) |
| 268 | + ;; |
| 269 | + |
| 270 | + let run_cmd ~bin ~args ~archive = |
| 271 | + let temp_stderr = Temp.create File ~prefix:"extract" ~suffix:"stderr" in |
| 272 | + Fiber.finalize ~finally:(fun () -> |
| 273 | + Temp.destroy File temp_stderr; |
| 274 | + Fiber.return ()) |
| 275 | + @@ fun () -> |
| 276 | + let+ (), exit_code = |
| 277 | + Process.run |
| 278 | + ~display:Quiet |
| 279 | + ~stdout_to: |
| 280 | + (Process.Io.make_stdout ~output_on_success:Swallow ~output_limit:1_000_000) |
| 281 | + ~stderr_to:(Process.Io.file temp_stderr Out) |
| 282 | + Return |
| 283 | + bin |
| 284 | + args |
| 285 | + in |
| 286 | + if exit_code <> 0 |
| 287 | + then |
| 288 | + Io.with_file_in temp_stderr ~f:(fun err_channel -> |
| 289 | + let stderr = Io.input_lines err_channel in |
| 290 | + Error (Error.Command_failed { bin; archive; exit_code; stderr })) |
| 291 | + else Ok () |
| 292 | + ;; |
| 293 | + |
| 294 | + let run ~archive ~target = |
| 295 | + let format, ext = |
| 296 | + Format.of_filename (Path.to_string archive) |> Option.value ~default:(`Tar, ".tar") |
| 297 | + in |
| 298 | + match (format : Format.t) with |
| 299 | + | `Zip -> |
| 300 | + Fiber.Lazy.force Unzip.find |
| 301 | + >>= (function |
| 302 | + | Some unzip -> |
| 303 | + let bin = Unzip.path unzip in |
| 304 | + let args = Unzip.args unzip ~archive ~target in |
| 305 | + run_cmd ~bin ~args ~archive |
| 306 | + | None -> no_extractor_error ~ext ~bin_names:Unzip.bin_names) |
| 307 | + | (`Tar | `Tar_gz | `Tar_bz2) as format -> |
| 308 | + Fiber.Lazy.force Tar.find |
| 309 | + >>= (function |
| 310 | + | Some (Bsd tar) -> |
| 311 | + let bin = Tar.path tar in |
| 312 | + let args = Tar.args tar ~format ~archive ~target in |
| 313 | + run_cmd ~bin ~args ~archive |
| 314 | + | Some (Gnu tar | Other tar) -> |
| 315 | + let bin = Tar.path tar in |
| 316 | + let args = Tar.args tar ~format ~archive ~target in |
| 317 | + run_cmd ~bin ~args ~archive |
| 318 | + | None -> no_extractor_error ~ext ~bin_names:Tar.bin_names) |
| 319 | + ;; |
| 320 | +end |
| 321 | + |
| 322 | +(** Make extraction atomic by extracting to a temp directory and renaming. *) |
| 323 | +let extract ~archive ~target = |
| 324 | + let target_in_temp = |
| 325 | + let prefix = Path.basename target in |
| 326 | + let suffix = Path.basename archive in |
| 327 | + Temp_dir.dir_for_target ~target ~prefix ~suffix |
| 328 | + in |
| 329 | + Fiber.finalize ~finally:(fun () -> |
| 330 | + Temp.destroy Dir target_in_temp; |
| 331 | + Fiber.return ()) |
| 332 | + @@ fun () -> |
| 333 | + Path.mkdir_p target_in_temp; |
| 334 | + Extractor.run ~archive ~target:target_in_temp |
| 335 | + >>| function |
| 336 | + | Error _ as e -> e |
| 337 | + | Ok () -> |
| 338 | + (* Opam expects only one top-level directory and then strips it. We employ a |
| 339 | + similar heuristic here. Opam is stricter in the multiple top-level entries |
| 340 | + case since it errors, whereas we use the extraction directory as-is. *) |
| 341 | + let target_in_temp = |
| 342 | + match Path.readdir_unsorted_with_kinds target_in_temp with |
| 343 | + | Error error -> Error (Error.Read_dir_failed { archive; error }) |
| 344 | + | Ok [ (fname, S_DIR) ] -> Ok (Path.relative target_in_temp fname) |
| 345 | + | Ok _ -> Ok target_in_temp |
| 346 | + in |
| 347 | + (match target_in_temp with |
| 348 | + | Error _ as e -> e |
| 349 | + | Ok target_in_temp -> |
| 350 | + (* CR-soon Alizter: this might already exist, don't do pointeless work *) |
| 351 | + Path.mkdir_p (Path.parent_exn target); |
| 352 | + (* CR-someday Alizter: Add fallback to copy if EXDEV raised here *) |
| 353 | + Path.rename target_in_temp target; |
| 354 | + Ok ()) |
| 355 | +;; |
| 356 | + |
| 357 | +let extract_exn ~archive ~target = |
| 358 | + extract ~archive ~target |
| 359 | + >>| function |
| 360 | + | Ok () -> () |
| 361 | + | Error e -> Error.raise e |
| 362 | +;; |
0 commit comments