-
Notifications
You must be signed in to change notification settings - Fork 0
/
mergerfs-uncache.py
174 lines (153 loc) · 5.96 KB
/
mergerfs-uncache.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
#!/usr/bin/python3
import argparse
import shutil
import subprocess
import syslog
import time
from pathlib import Path
"""
Source: https://github.com/notthebee/infra/blob/29aacdb50ee28d3728b0fbcd542f2fa4d5396219/roles/filesystems/mergerfs/templates/mergerfs-uncache.j2
Youtube video that sent me here: https://www.youtube.com/watch?v=MucGkPUMjNo
"""
if __name__ == "__main__":
"""
Uncaching utility. This scripts assumes that you have a cache-like
mount point, for which you want to preserve a certain amount of free
space by moving heavy/rarely-accessed files to a slower mount point.
The script, in its simplest form, can be run as:
::
$ ./mergerfs-uncache.py -s /mnt/cache -d /mnt/slow -t 75
In this way least accessed files will be moved one after the other
until the percentage of used capacity will be less than the target.
Other options are also available. Please consider this is a work in
progress.
"""
parser = argparse.ArgumentParser()
parser.add_argument(
"-s",
"--source",
dest="source",
type=Path,
help="Source path (i.e. cache pool root path.",
)
parser.add_argument(
"-d",
"--destination",
dest="destination",
type=Path,
help="Destination path (i.e. slow pool root path.",
)
parser.add_argument(
"--num-files",
dest="num_files",
default=-1,
type=int,
help="Maximum number of files moved away from cache.",
)
parser.add_argument(
"--time-limit",
dest="time_limit",
default=-1,
type=int,
help="Time limit for the whole process (in seconds). Once reached program exits.",
)
parser.add_argument(
"-t",
"--target",
dest="target",
type=float,
help="Desired max cache usage, in percentage (e.g. 70).",
)
parser.add_argument(
"-v", "--verbose", help="Increase output verbosity.", action="store_true"
)
args = parser.parse_args()
# Some general checks
cache_path: Path = args.source
if not cache_path.is_dir():
raise NotADirectoryError(f"{cache_path} is not a valid directory.")
slow_path: Path = args.destination
if not slow_path.is_dir():
raise NotADirectoryError(f"{slow_path} is not a valid directory.")
last_id = args.num_files
time_limit = args.time_limit
target = float(args.target)
if target <= 1 or target >= 100:
raise ValueError(
f"Target value is in percentage, i.e. in the range of (0, 100). Found {target} instead."
)
cache_stats = shutil.disk_usage(cache_path)
usage_percentage = 100 * cache_stats.used / cache_stats.total
syslog.syslog(
syslog.LOG_INFO,
f"Uncaching from {cache_path} ({usage_percentage:.2f}% used) to {slow_path}.",
)
if usage_percentage <= target:
syslog.syslog(
syslog.LOG_INFO,
f"Target of {target}% of used capacity already reached. Exiting.",
)
exit(0)
syslog.syslog(syslog.LOG_INFO, "Computing candidates...")
candidates = sorted(
[(c, c.stat()) for c in cache_path.glob("**/*") if c.is_file()],
key=lambda p: p[1].st_atime,
)
t_start = time.monotonic()
syslog.syslog(syslog.LOG_INFO, "Processing candidates...")
cache_used = cache_stats.used
for c_id, (c_path, c_stat) in enumerate(candidates):
syslog.syslog(syslog.LOG_DEBUG, f"{c_path}")
if not c_path.exists():
# Since rsync moves also other hard links it might be that
# some files are not existing anymore. However, invoking rsync
# for each file (instead of directories) does not preserve
# hard links.
syslog.syslog(syslog.LOG_WARNING, f"{c_path} does not exist.")
continue
# Rsync options
# -a, --archive archive mode; equals -rlptgoD (no -H,-A,-X)
# -x, --one-file-system don't cross filesystem boundaries
# -q, --quiet suppress non-error messages
# -H, --hard-links preserve hard links
# -A, --acls preserve ACLs (implies --perms)
# -X, --xattrs preserve extended attributes
# -W, --whole-file copy files whole (without delta-xfer algorithm)
# -E, --executability preserve the file's executability
# -S, --sparse turn sequences of nulls into sparse blocks
# -R, --relative use relative path names
# --preallocate allocate dest files before writing them
# --remove-source-files sender removes synchronized files (non-dirs)
subprocess.call(
[
"rsync",
"-axqHAXWESR",
"--preallocate",
"--remove-source-files",
f"{cache_path}/./{c_path.relative_to(cache_path)}",
f"{slow_path}/",
]
)
cache_used -= c_stat.st_size
# Evaluate early breaking conditions
if last_id >= 0 and c_id >= last_id - 1:
syslog.syslog(
syslog.LOG_INFO, f"Maximum number of moved files reached ({last_id})."
)
break
if time_limit >= 0 and time.monotonic() - t_start > time_limit:
syslog.syslog(
syslog.LOG_INFO, f"Time limit reached ({time_limit} seconds)."
)
break
if (100 * cache_used / cache_stats.total) <= target:
syslog.syslog(
syslog.LOG_INFO, f"Target of maximum used capacity reached ({target})."
)
break
cache_stats = shutil.disk_usage(cache_path)
usage_percentage = 100 * cache_stats.used / cache_stats.total
syslog.syslog(
syslog.LOG_INFO,
f"Process completed in {round(time.monotonic() - t_start)} seconds. Current usage percentage is {usage_percentage:.2f}%.",
)