-
Notifications
You must be signed in to change notification settings - Fork 100
Prune orphaned plugin instance files
Jennings Zhang edited this page Sep 1, 2023
·
4 revisions
Files created by plugin instances are not removed from swift when the plugin instance is deleted. This is a script which you run in manage.py shell
to prune orphaned plugin instance files.
Warning: this could take a while and use a lot of memory, depending on the number of files in swift.
from plugininstances.models import PluginInstanceFile
from core.storage import connect_storage
from django.conf import settings
# maybe need this
# https://stackoverflow.com/a/27194927
import django
django.setup()
# username of user who owns the feeds of the plugin instances you want to prune
USER = 'rudolph'
swift_manager = connect_storage(settings)
known_files = frozenset(f.fname.name for f in PluginInstanceFile.objects.all())
swift_files = swift_manager.ls(USER)
orphans = [f for f in swift_files if f.startswith(f'{USER}/feed_') and f not in known_files]
# optionally, save to a file for review
with open('/tmp/orphans.txt', 'w') as of:
for o in orphans:
_ = of.write(o)
_ = of.write('\n')
# optionally, count how much you're going to delete
connection = swift_manager.get_connection()
infos = [ # this takes a while
connection.head_object(swift_manager.container_name, orphan_name)
for orphan_name in orphans
]
total_size = sum(int(info['content-length']) for info in infos)
print(f'Found {total_size / 1e9:.3f}GB of orphaned data.')
# are you sure you want to do this?
for i, orphaned_file in enumerate(orphans):
swift_manager.delete_obj(orphaned_file)
print(f'\rDeleting {i} / {len(orphans)}', end='')
print(' done. ')
The code below affects all users, has progress bars, and is more tightly coupled to Swift and a deprecated version of CUBE.
Before running this code, install tqdm
:
pip install tqdm
Code:
from typing import FrozenSet
from plugininstances.models import PluginInstanceFile
from core.swiftmanager import SwiftManager
from django.conf import settings
from tqdm import tqdm
swift_manager = SwiftManager(settings.SWIFT_CONTAINER_NAME, settings.SWIFT_CONNECTION_PARAMS)
conn = swift_manager.get_connection()
# takes a while and a lot of RAM
print('Listing all files in Swift container...')
swift_container = conn.get_container('users', full_listing=True)
swift_files = swift_container[1]
with tqdm(swift_files, desc='Checking which files were from a feed...') as pbar:
swift_feed_files = {
file_info['name']: file_info['bytes']
for file_info in pbar
if file_info['name'].split('/', maxsplit=1)[1].startswith('feed_')
}
with tqdm(PluginInstanceFile.objects.all(), total=PluginInstanceFile.objects.count(), desc='Listing files in DB...') as pbar:
db_feed_files: FrozenSet[str] = frozenset(f.fname.name for f in pbar)
with tqdm(desc='Getting orphans...'):
orphans = set(swift_feed_files.keys()) - db_feed_files
orphan_bytes = sum(swift_feed_files[f] for f in orphans)
orphan_gb = orphan_bytes / 1e9
print(f'Found {len(orphans)} orphans, {orphan_gb:.2f}GB data')
with tqdm(orphans, desc='Deleting orphans from Swift...') as pbar:
for orphan in pbar:
swift_manager.delete_obj(orphan)