-
-
Notifications
You must be signed in to change notification settings - Fork 297
Addition of granular bcachefs formatting and multi-disk support #961
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,93 @@ | ||
| { | ||
| disko.devices = { | ||
| disk = { | ||
| bcachefsmain = { | ||
| device = "/dev/disk/by-path/virtio-pci-0000:00:08.0"; | ||
| type = "disk"; | ||
| content = { | ||
| type = "gpt"; | ||
| partitions = { | ||
| ESP = { | ||
| end = "500M"; | ||
| type = "EF00"; | ||
| content = { | ||
| type = "filesystem"; | ||
| format = "vfat"; | ||
| mountpoint = "/boot"; | ||
| mountOptions = [ "umask=0077" ]; | ||
| }; | ||
| }; | ||
| root = { | ||
| name = "root"; | ||
| end = "-0"; | ||
| content = { | ||
| type = "filesystem"; | ||
| format = "ext4"; | ||
| mountpoint = "/"; | ||
| }; | ||
| }; | ||
| }; | ||
| }; | ||
| }; | ||
|
|
||
| bcachefsdisk1 = { | ||
| type = "disk"; | ||
| device = "/dev/disk/by-path/virtio-pci-0000:00:0a.0"; | ||
| content = { | ||
| type = "gpt"; | ||
| partitions = { | ||
| bcachefs = { | ||
| size = "100%"; | ||
| content = { | ||
| type = "bcachefs_member"; | ||
| pool = "pool1"; | ||
| label = "fast"; | ||
| discard = true; | ||
| dataAllowed = [ "journal" "btree" ]; | ||
| }; | ||
| }; | ||
| }; | ||
| }; | ||
| }; | ||
| bcachefsdisk2 = { | ||
| type = "disk"; | ||
| device = "/dev/disk/by-path/virtio-pci-0000:00:0b.0"; | ||
| content = { | ||
| type = "gpt"; | ||
| partitions = { | ||
| bcachefs = { | ||
| size = "100%"; | ||
| content = { | ||
| type = "bcachefs_member"; | ||
| pool = "pool1"; | ||
| label = "slow"; | ||
| durability = 2; | ||
| dataAllowed = [ "user" ]; | ||
| }; | ||
| }; | ||
| }; | ||
| }; | ||
| }; | ||
| # use whole disk, ignore partitioning | ||
| # disk3 = { | ||
| # type = "disk"; | ||
| # device = "/dev/vde"; | ||
| # content = { | ||
| # type = "bcachefs_member"; | ||
| # pool = "pool1"; | ||
| # label = "main"; | ||
| # }; | ||
| # }; | ||
| }; | ||
|
|
||
| bcachefs = { | ||
| pool1 = { | ||
| type = "bcachefs"; | ||
|
|
||
| mountpoint = "/mnt/pool"; | ||
| formatOptions = [ "--compression=zstd" ]; | ||
| mountOptions = [ "verbose" "degraded" ]; | ||
| }; | ||
| }; | ||
| }; | ||
| } |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,223 @@ | ||
| # lib/types/bcachefs.nix | ||
| { config, options, lib, diskoLib, ... }: | ||
| { | ||
| options = { | ||
| name = lib.mkOption { | ||
| type = lib.types.str; | ||
| default = config._module.args.name; | ||
| description = "Name of the bcachefs pool"; | ||
| }; | ||
|
|
||
| type = lib.mkOption { | ||
| type = lib.types.enum [ "bcachefs" ]; | ||
| default = "bcachefs"; | ||
| internal = true; | ||
| description = "Type"; | ||
| }; | ||
|
|
||
| formatOptions = lib.mkOption { | ||
| type = lib.types.listOf lib.types.str; | ||
| default = []; | ||
| description = "Additional options for bcachefs format"; | ||
| }; | ||
|
|
||
| mountpoint = lib.mkOption { | ||
| type = lib.types.str; | ||
| description = "Mount point for the bcachefs pool"; | ||
| }; | ||
|
|
||
| mountOptions = lib.mkOption { | ||
| type = lib.types.listOf lib.types.str; | ||
| default = [ "defaults" ]; | ||
| description = "Options to pass to mount"; | ||
| apply = opts: lib.lists.unique (opts ++ [ "nofail" ]); | ||
| }; | ||
|
|
||
| uuid = lib.mkOption { | ||
| type = lib.types.strMatching "[[:xdigit:]]{8}-[[:xdigit:]]{4}-[[:xdigit:]]{4}-[[:xdigit:]]{4}-[[:xdigit:]]{12}"; | ||
| default = let | ||
| # Generate a deterministic but random-looking UUID based on the pool name | ||
| # This avoids the need for impure access to nixpkgs at evaluation time | ||
| hash = builtins.hashString "sha256" "${config.name}"; | ||
| hexChars = builtins.substring 0 32 hash; | ||
| p1 = builtins.substring 0 8 hexChars; | ||
| p2 = builtins.substring 8 4 hexChars; | ||
| p3 = builtins.substring 12 4 hexChars; | ||
| p4 = builtins.substring 16 4 hexChars; | ||
| p5 = builtins.substring 20 12 hexChars; | ||
| in | ||
| "${p1}-${p2}-${p3}-${p4}-${p5}"; | ||
| defaultText = "generated deterministically based on pool name"; | ||
| example = "809b3a2b-828a-4730-95e1-75b6343e415a"; | ||
| description = '' | ||
| The UUID of the bcachefs filesystem. | ||
| If not provided, a deterministic UUID will be generated based on the pool name. | ||
| ''; | ||
| }; | ||
|
|
||
| content = diskoLib.deviceType { parent = config; device = "/dev/bcachefs/${config.name}"; }; | ||
|
|
||
| _meta = lib.mkOption { | ||
| internal = true; | ||
| readOnly = true; | ||
| type = diskoLib.jsonType; | ||
| default = lib.optionalAttrs (config.content != null) (config.content._meta ["bcachefs" config.name ]); | ||
|
|
||
| description = "Metadata"; | ||
| }; | ||
|
|
||
| _create = diskoLib.mkCreateOption { | ||
| inherit config options; | ||
| default = '' | ||
| echo BCACHEFS POSITION | ||
| # Read member info from runtime dir - one argument per line | ||
| readarray -t members < <(cat "$disko_devices_dir/bcachefs-${config.name}-members" || true) | ||
| readarray -t member_args < <(cat "$disko_devices_dir/bcachefs-${config.name}-args" || true) | ||
|
|
||
| # Format if needed | ||
| if bcachefs show-super "''${members[0]}" >/dev/null 2>&1 && ! (bcachefs show-super "''${members[0]}" 2>&1 | grep -qi "Not a bcachefs superblock"); then | ||
| # Superblock exists and is valid, no reformat needed | ||
| echo "Found existing bcachefs filesystem, skipping format." | ||
| else | ||
| # Need to format - either show-super failed with non-zero exit code | ||
| # or it returned "Not a bcachefs superblock" message | ||
| echo "No valid bcachefs filesystem found, formatting..." | ||
| # bcachefs format --force "''${member_args[@]}" ${toString config.formatOptions} | ||
| # Add some sleep and sync to ensure all previous operations are complete | ||
|
|
||
| sync | ||
| sleep 1 | ||
|
|
||
| # Try formatting with additional error handling | ||
| format_attempts=0 | ||
| max_attempts=3 | ||
| format_success=false | ||
|
|
||
| while [ $format_attempts -lt $max_attempts ] && [ "$format_success" = "false" ]; do | ||
| format_attempts=$((format_attempts + 1)) | ||
| echo "Format attempt $format_attempts of $max_attempts..." | ||
|
|
||
| if bcachefs format --force --uuid=${config.uuid} "''${member_args[@]}" ${toString config.formatOptions}; then | ||
| format_success=true | ||
| echo "Format successful" | ||
| else | ||
| format_exit=$? | ||
| echo "Format failed with exit code $format_exit, waiting before retry..." | ||
| sync | ||
| sleep 2 | ||
| fi | ||
| done | ||
|
|
||
| if [ "$format_success" = "false" ]; then | ||
| echo "Failed to format bcachefs filesystem after $max_attempts attempts" | ||
| exit 1 | ||
| fi | ||
|
|
||
| udevadm trigger --subsystem-match=block | ||
| udevadm settle | ||
| fi | ||
|
|
||
| ${lib.optionalString (config.content != null) config.content._create} | ||
|
Collaborator
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. not sure if this makes sense? I don't think we can have other filesystems on top of bcachefs?
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I think |
||
| ''; | ||
| }; | ||
|
|
||
| _mount = diskoLib.mkMountOption { | ||
| inherit config options; | ||
| default = { | ||
| fs.${config.mountpoint} = '' | ||
| if ! findmnt "${config.mountpoint}" > /dev/null 2>&1; then | ||
| ### Hacky work around since bcachefs is broken on earlier kernels | ||
| mkdir -p "${config.mountpoint}" | ||
|
|
||
| # Capture both the exit code and output of the mount command | ||
| output=$(bcachefs mount ${lib.optionalString (config.mountOptions != []) "-o ${lib.concatStringsSep "," config.mountOptions}"} UUID="${config.uuid}" "${config.mountpoint}" 2>&1 || true) | ||
|
|
||
| # Check if the error contains "No such device" | ||
| if echo "$output" | grep -iq "no such device"; then | ||
| echo "Notice: bcachefs mount failed with 'No such device'. This is expected on kernels < 6.13." | ||
| echo "Current kernel version: $(uname -r)" | ||
| echo "The mount will succeed when you boot into your final system with a newer kernel." | ||
| else | ||
|
Comment on lines
+136
to
+140
Collaborator
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. wouldn't it be better to fail here instead of silently skipping? since we will end up with a wrong mounted filesystems afterwards
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Yes, that is the downfall of the current implementation. I wrote a little more in the comments of this PR. It is essentially dead, because of some weird upstream bugs in bcachefs and systemd that prevent multi-disk UUIDs from mounting on boot, so does and will fail in almost every test I have completed using the disko testing framework. I worked around this by ignoring this error during provisioning and adding a custom systemd mount on my servers. This issue makes this PR DOA for the time being :( |
||
| # Propagate the output and exit code if it's not the expected error | ||
| echo "$output" | ||
| exit 1 | ||
| fi | ||
| fi | ||
| ''; | ||
| }; | ||
| }; | ||
|
|
||
| _unmount = diskoLib.mkUnmountOption { | ||
| inherit config options; | ||
| default = { | ||
| fs.${config.mountpoint} = '' | ||
| if findmnt "${config.mountpoint}" > /dev/null 2>&1; then | ||
| umount "${config.mountpoint}" | ||
| fi | ||
| ''; | ||
| }; | ||
| }; | ||
| _config = lib.mkOption { | ||
| internal = true; | ||
| readOnly = true; | ||
| default =[ | ||
| { | ||
| # Basic bcachefs support | ||
| boot.supportedFilesystems = [ "bcachefs" ]; | ||
| boot.kernelModules = [ "bcachefs" ]; | ||
| # use latest kernel | ||
| # boot.kernelPackages = config._pkgs.linuxPackages_latest; | ||
|
|
||
| # environment.systemPackages = with lib.pkgs; [ | ||
| # bcachefs-tools | ||
| # util-linux | ||
| # ]; | ||
|
|
||
| } | ||
| { | ||
| fileSystems.${config.mountpoint} = { | ||
| device = "UUID=${config.uuid}"; | ||
| fsType = "bcachefs"; | ||
| options = config.mountOptions; | ||
| }; | ||
| # Add systemd environment variable for the mount unit | ||
| systemd.services."mount-${lib.escapeSystemdPath config.mountpoint}".serviceConfig.Environment = "BCACHEFS_BLOCK_SCAN=1"; | ||
| systemd.services."unlock-bcachefs-${lib.escapeSystemdPath config.mountpoint}".serviceConfig.Environment = "BCACHEFS_BLOCK_SCAN=1"; | ||
|
|
||
| ############################################################################## | ||
| # WORKAROUND: Until the following can be addressed. This means using | ||
| # multi-disk bcachefs as a boot/root partition is not possible in its current | ||
| # form with disko | ||
| # https://github.com/koverstreet/bcachefs-tools/issues/308 | ||
| # https://github.com/systemd/systemd/issues/8234#issuecomment-1868238750 | ||
| ############################################################################## | ||
|
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. NixOS has some special work around for this, though I am not sure what exactly it is. I do have a working multi-device bcachefs rootfs server that has been running for months, though currently I am struggling to get a working bootloader on an arch install for a friend.
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. If you are able to point me to that work around please post it here. I also worked around this by having an external systemd mount for my non-root pools with a no-fail attribute and retry counter. However due to this weird boot-dependency issue this PR is blocked as it can't pass the VM tests which test root and reboots. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I'll try to figure out what it is, probably something with the initramfs, though I don't know much about thoses, but it probably will help with the Arch Install. My config is here: https://github.com/Silverdev2482/Router-Server bcachefs systems just works out of the box, I also can get you any other info about that server, though I am currently on my phone. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Well I got it working on the arch install with systemd-boot, I still get some mounting errors on boot, but they appear to be mostly ignorable, besides the delay waiting for it to pass. I think what did it was removing fsck from some config file for the initramfs. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I'm not too good with VMs, but I could learn, what VM tests are failing? maybe if I try to get the VM's to boot I could understand why it doesn't work. Though I'm not familiar with disko, just manual formatting, but I would like to help.
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. One of the problems is found here In theory, we could skip these tests, but I am opposed because it would require users to explicitly provide their own post boot mounting strategy, and that is not maintainable, reproducible or easily debuggable. Furthermore, it means a multi-disk setup can't be used as a boot FS (at least at this present time) See the testing guide for more on how the disko test VM works. It is derived from a Nixos testing framework There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I seem to be a bit in over my head, but I haven't given up hope, Right now I'm just trying to spin up the VM, and then I will try to set up a multi device root VM. |
||
| systemd.services."mount-${lib.replaceStrings ["/"] ["-"] config.mountpoint}" = { | ||
| description = "Mount bcachefs filesystem at ${config.mountpoint}"; | ||
| before = [ "local-fs.target" ]; | ||
| requires = [ "local-fs-pre.target" ]; | ||
| after = [ "local-fs-pre.target" ]; | ||
| environment = { | ||
| BCACHEFS_BLOCK_SCAN = "1"; | ||
| }; | ||
| script = '' | ||
| mkdir -p ${config.mountpoint} | ||
| mount -t bcachefs UUID=${config.uuid} ${config.mountpoint} -o ${lib.concatStringsSep "," config.mountOptions} -o X-mount.mkdir | ||
| ''; | ||
| serviceConfig = { | ||
| Type = "oneshot"; | ||
| RemainAfterExit = true; | ||
| }; | ||
| }; | ||
| ############################################################################## | ||
| } | ||
| ]; | ||
| }; | ||
|
|
||
| _pkgs = lib.mkOption { | ||
| internal = true; | ||
| readOnly = true; | ||
| type = lib.types.functionTo (lib.types.listOf lib.types.package); | ||
| default = pkgs: [ pkgs.bcachefs-tools ]; | ||
| }; | ||
| }; | ||
| } | ||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I guess this code is not needed?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I guess not if I would be changing the default behavior to just hash the name. Good catch