From 2ce1db4eff91312504218e6be73ea4ccba1f94a4 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Magnus=20M=C3=A5nsson?= Date: Thu, 23 Sep 2021 18:13:03 +0200 Subject: [PATCH 1/4] Adding tool mergerfs.consolidate-dirs --- README.md | 20 +++ src/mergerfs.consolidate-dirs | 254 ++++++++++++++++++++++++++++++++++ 2 files changed, 274 insertions(+) create mode 100755 src/mergerfs.consolidate-dirs diff --git a/README.md b/README.md index 74f9199..65e3125 100644 --- a/README.md +++ b/README.md @@ -232,6 +232,26 @@ optional arguments: -h, --help Print this help. ``` + +### mergerfs.consolidate-dirs + +Consolidate directories so that each of them only exists on one a single drive, recursively. The approach is that the tool loops through given directories, looks up the source drives, checks the space used per source directory, and moves the data from the smaller ones into the largest one. Ending with a single directory. + +Requires `rsync` to be installed. + +``` +usage: mergerfs.consolidate-dirs [] ... + +positional arguments: + dir directory to consolidate, can be repeated + +optional arguments: + -v, --verbose Verbose printing + -e, --execute Execute `rsync` commands as well as print them. + -h, --help Print this help. +``` + + ## SUPPORT #### Contact / Issue submission diff --git a/src/mergerfs.consolidate-dirs b/src/mergerfs.consolidate-dirs new file mode 100755 index 0000000..ae917f2 --- /dev/null +++ b/src/mergerfs.consolidate-dirs @@ -0,0 +1,254 @@ +#!/usr/bin/env python3 + +# Copyright (c) 2016, Antonio SJ Musumeci +# +# Permission to use, copy, modify, and/or distribute this software for any +# purpose with or without fee is hereby granted, provided that the above +# copyright notice and this permission notice appear in all copies. +# +# THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES +# WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF +# MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR +# ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES +# WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN +# ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF +# OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + +import argparse +import ctypes +import errno +import fnmatch +import io +import os +import shlex +import stat +import subprocess +import sys + + +_libc = ctypes.CDLL("libc.so.6",use_errno=True) +_lgetxattr = _libc.lgetxattr +_lgetxattr.argtypes = [ctypes.c_char_p,ctypes.c_char_p,ctypes.c_void_p,ctypes.c_size_t] +def lgetxattr(path,name): + if type(path) == str: + path = path.encode(errors='backslashreplace') + if type(name) == str: + name = name.encode(errors='backslashreplace') + length = 64 + while True: + buf = ctypes.create_string_buffer(length) + res = _lgetxattr(path,name,buf,ctypes.c_size_t(length)) + if res >= 0: + return buf.raw[0:res] + else: + err = ctypes.get_errno() + if err == errno.ERANGE: + length *= 2 + elif err == errno.ENODATA: + return None + else: + raise IOError(err,os.strerror(err),path) + + +def ismergerfs(path): + try: + lgetxattr(path,'user.mergerfs.version') + return True + except IOError as e: + return False + + +def mergerfs_control_file(basedir): + if basedir == '/': + return None + ctrlfile = os.path.join(basedir,'.mergerfs') + if os.path.exists(ctrlfile): + return ctrlfile + else: + dirname = os.path.dirname(basedir) + return mergerfs_control_file(dirname) + + +def mergerfs_srcmounts(ctrlfile): + srcmounts = lgetxattr(ctrlfile,'user.mergerfs.srcmounts') + srcmounts = srcmounts.decode(errors='backslashreplace').split(':') + return srcmounts + + +def execute_cmd(args): + return subprocess.call(args) + + +def execute_cmds(args): + if isinstance(args[0], list): + for args2 in args: + execute_cmd(args2) + else: + return execute_cmd(args) + + +def print_margs(margs): + for args in margs[:-1]: + quoted = [shlex.quote(arg) for arg in args] + print(' '.join(quoted), end = ' && ') + + args = margs[-1] + quoted = [shlex.quote(arg) for arg in args] + print(' '.join(quoted)) + + +def build_move_file(src,tgt): + return ['rsync', + '-avHAXWES', + '--numeric-ids', + '--progress', + '--remove-source-files', + src, + tgt] + + +def print_help(): + help = \ +''' +usage: mergerfs.consolidate-dirs [] + +Consolidate listed directories into a single drive by moving the smaller ones into the largest. +Automatically skips directories that are not spread between multiple source drives. + +Usage example: +user@host:/srv/mergerfs/movies$ mergerfs.consolidate-dirs * + +positional arguments: + dirs Directories to consolidate + +optional arguments: + -v, --verbose Verbose + -e, --execute Execute `rsync` commands as well as print them. + -h, --help Print this help. +''' + print(help) + + +def buildargparser(): + parser = argparse.ArgumentParser(add_help=False) + parser.add_argument('dirs', + type=str, + nargs='*', + default=None) + parser.add_argument('-v','--verbose', + dest='verbose', + action='store_true') + parser.add_argument('-e','--execute', + dest='execute', + action='store_true') + parser.add_argument('-h','--help', + action='store_true') + + return parser + + +def mergerfs_sourcepaths(fullpath): + attr = xattr_allpaths(fullpath) + if not attr: + dirname = os.path.dirname(fullpath) + basename = os.path.basename(fullpath) + attr = xattr_allpaths(dirname) + attr = attr.split('\0') + attr = [os.path.join(path,basename) + for path in attr + if os.path.lexists(os.path.join(path,basename))] + else: + attr = attr.decode('utf-8').split('\0') + return [x.rstrip('/') for x in attr] + +def xattr_allpaths(fullpath): + return lgetxattr(fullpath,'user.mergerfs.allpaths') + + + +def main(): + sys.stdout = io.TextIOWrapper(sys.stdout.buffer, + encoding='utf8', + errors='backslashreplace', + line_buffering=True) + sys.stderr = io.TextIOWrapper(sys.stderr.buffer, + encoding='utf8', + errors='backslashreplace', + line_buffering=True) + + parser = buildargparser() + args = parser.parse_args() + + if args.help or not args.dirs: + print_help() + sys.exit(0) + + execute = args.execute + verbose = args.verbose + + + real_dirs = [] + for dir in args.dirs: + real_dir = os.path.realpath(dir) + + if not os.path.isdir(real_dir): + if(verbose): + print("%s is not a directory, skipping" % real_dir) + continue + + ctrlfile = mergerfs_control_file(real_dir) + if not ismergerfs(ctrlfile): + print("%s is not a mergerfs mount" % real_dir) + sys.exit(1) + + real_dirs.append( real_dir ) + + + try: + for dir in real_dirs: + if(verbose): + print("Considering %s" % dir) + sourcepaths = mergerfs_sourcepaths(dir) + if(len(sourcepaths) == 1): + if(verbose): + print(" Already consolidated, skipping") + continue + + sizes = {} + for sourcepath in sourcepaths: + for (root,dirs,files) in os.walk(sourcepath): + if len(files) <= 1: + continue + + total_size = 0 + for file in files: + fullpath = os.path.join(root,file) + st = os.lstat(fullpath) + if not stat.S_ISREG(st.st_mode): + continue + total_size += st.st_size + sizes[sourcepath] = total_size + + sizes = sorted(sizes.items(), key=lambda item: item[1]) + + if(verbose): + for path,size in sizes: + print(" %s : %d bytes" % (path, size)) + + tgt = os.path.dirname(sizes[-1][0]).rstrip('/') + '/' + for src,size in sizes[:-1]: + margs = [ build_move_file(src,tgt), [ 'rmdir', src ] ] + print_margs(margs) + if execute: + if(verbose): + print("Consolidating %s" % dir) + execute_cmds(margs) + + except (KeyboardInterrupt,BrokenPipeError): + pass + + sys.exit(0) + + +if __name__ == "__main__": + main() From a4c12d740a8656afc1a7d0c35d352e3f53c0106c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Magnus=20M=C3=A5nsson?= Date: Thu, 23 Sep 2021 18:35:26 +0200 Subject: [PATCH 2/4] Fixing delete of directories with directories in them. Fixes consilidation of empty directories. --- src/mergerfs.consolidate-dirs | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/src/mergerfs.consolidate-dirs b/src/mergerfs.consolidate-dirs index ae917f2..6052d33 100755 --- a/src/mergerfs.consolidate-dirs +++ b/src/mergerfs.consolidate-dirs @@ -217,8 +217,6 @@ def main(): sizes = {} for sourcepath in sourcepaths: for (root,dirs,files) in os.walk(sourcepath): - if len(files) <= 1: - continue total_size = 0 for file in files: @@ -237,7 +235,7 @@ def main(): tgt = os.path.dirname(sizes[-1][0]).rstrip('/') + '/' for src,size in sizes[:-1]: - margs = [ build_move_file(src,tgt), [ 'rmdir', src ] ] + margs = [ build_move_file(src,tgt), [ 'find', src, '-type', 'd', '-empty', '-delete' ] ] print_margs(margs) if execute: if(verbose): From 61b6c217eb182ebd83b8a2aac208f0ebfc72c994 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Magnus=20M=C3=A5nsson?= Date: Thu, 23 Sep 2021 20:09:48 +0200 Subject: [PATCH 3/4] Use the recursive size of the source directories. Adding formatting to the verbose size prints. --- src/mergerfs.consolidate-dirs | 32 +++++++++++++++++++++----------- 1 file changed, 21 insertions(+), 11 deletions(-) diff --git a/src/mergerfs.consolidate-dirs b/src/mergerfs.consolidate-dirs index 6052d33..a24fbc4 100755 --- a/src/mergerfs.consolidate-dirs +++ b/src/mergerfs.consolidate-dirs @@ -164,6 +164,25 @@ def mergerfs_sourcepaths(fullpath): def xattr_allpaths(fullpath): return lgetxattr(fullpath,'user.mergerfs.allpaths') +def dir_size(path): + total_size = 0 + for dirpath, dirnames, filenames in os.walk(path): + for f in filenames: + fp = os.path.join(dirpath, f) + # skip if it is symbolic link + if not os.path.islink(fp): + total_size += os.path.getsize(fp) + + return total_size + +def bytes_to_human(num): + for unit in ["", "K", "M", "G", "T"]: + if abs(num) < 1024.0 or unit == 'T': + return "%3.1f%s" % (num, unit) + num /= 1024.0 + return None + + def main(): @@ -216,22 +235,13 @@ def main(): sizes = {} for sourcepath in sourcepaths: - for (root,dirs,files) in os.walk(sourcepath): - - total_size = 0 - for file in files: - fullpath = os.path.join(root,file) - st = os.lstat(fullpath) - if not stat.S_ISREG(st.st_mode): - continue - total_size += st.st_size - sizes[sourcepath] = total_size + sizes[sourcepath] = dir_size(sourcepath) sizes = sorted(sizes.items(), key=lambda item: item[1]) if(verbose): for path,size in sizes: - print(" %s : %d bytes" % (path, size)) + print(" %s %s" % (bytes_to_human(size), path)) tgt = os.path.dirname(sizes[-1][0]).rstrip('/') + '/' for src,size in sizes[:-1]: From 8ae2e7bad470131821cbe3ac245433200aa3875a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Magnus=20M=C3=A5nsson?= Date: Thu, 23 Sep 2021 21:53:11 +0200 Subject: [PATCH 4/4] Added consolidate-dirs to Makefile --- Makefile | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/Makefile b/Makefile index 9f54783..63f4904 100644 --- a/Makefile +++ b/Makefile @@ -33,7 +33,8 @@ APPS = mergerfs.fsck \ mergerfs.dedup \ mergerfs.ctl \ mergerfs.balance \ - mergerfs.consolidate + mergerfs.consolidate \ + mergerfs.consolidate-dirs install: @for APP in $(APPS); \