-
Notifications
You must be signed in to change notification settings - Fork 3
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
TAG MOP Workflows #55
Merged
Merged
Changes from 21 commits
Commits
Show all changes
23 commits
Select commit
Hold shift + click to select a range
53dee5b
first Mop WDL commit
yueyaog b4c2b03
added mop to dockstore
yueyaog c23058a
report number of sys files to remove in the output
yueyaog bc3a2b4
Let the tasks to be executed in order
yueyaog 503997a
updated TAG mop command
yueyaog ced2d00
output mop sys files
yueyaog a2819a7
separate failed removal and mop WDL
yueyaog 9cae31b
only delete sys files when there is any
yueyaog 88328d6
Only mop when there is files to mop
yueyaog d03c094
make the mop output optional
yueyaog e418294
output the file that were mopped
yueyaog fa5818f
update output file names
yueyaog c3bc236
update meta info
yueyaog 5fabad7
print sys files
yueyaog 03a9cc0
added list of sys files that will be deleted
yueyaog 33474ec
updated output name
yueyaog c9bce71
output the number of failed submission to clean
yueyaog 7db0e62
remove files even they are in the old Terra workspaces
yueyaog caa8202
remove inrelevant input in json
yueyaog e759ff3
print out comment if this workspace has been cleaned up
yueyaog 1f755d8
update input json again
yueyaog 6381ad6
updated based on PR comments
yueyaog 9e5f954
Merge branch 'master' into tag-mop-yg
yueyaog File filter
Filter by extension
Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,7 @@ | ||
{ | ||
"TAG_Mop.mopDocker": "String (optional, default = \"us.gcr.io/tag-team-160914/neovax-parsley:2.2.1.0\")", | ||
"TAG_Mop.workspaceName": "String", | ||
"TAG_Mop.namespace": "String (optional, default = \"broadtagteam\")", | ||
"TAG_Mop.runMop": "Boolean" | ||
} | ||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,132 @@ | ||
version 1.0 | ||
|
||
workflow TAG_Mop{ | ||
input{ | ||
String namespace = "broadtagteam" | ||
String workspaceName | ||
String mopDocker = "us.gcr.io/tag-team-160914/neovax-parsley:2.2.1.0" | ||
Boolean runMop | ||
} | ||
|
||
call rmSysfiles { | ||
input: | ||
namespace = namespace, | ||
workspaceName = workspaceName, | ||
mopDocker = mopDocker | ||
} | ||
|
||
if (runMop){ | ||
call mop { | ||
input: | ||
namespace = namespace, | ||
workspaceName = workspaceName, | ||
mopDocker = mopDocker, | ||
sysfiles = rmSysfiles.deleted_sys_files | ||
|
||
} | ||
} | ||
|
||
|
||
output{ | ||
Int num_deleted_sys_files = rmSysfiles.deleted_sys_files | ||
File deleted_sys_files = rmSysfiles.sys_files_to_delete | ||
Int? num_mopped_files = mop.num_of_files_to_mop | ||
File? mopped_files = mop.mopped_files | ||
} | ||
|
||
meta { | ||
author: "Yueyao Gao" | ||
email: "[email protected]" | ||
description: "TAG Mop contains three sub-workflows: rmSysfiles and mop. rmSysfiles removes system files that were generated from submissions from a Terra workspace. mop runs the FISS Mop function. Suggest to run after cleanupFailedSubmission.wdl" | ||
} | ||
|
||
} | ||
|
||
task rmSysfiles { | ||
input{ | ||
String namespace | ||
String workspaceName | ||
String mopDocker | ||
} | ||
command <<< | ||
source activate NeoVax-Input-Parser | ||
python <<CODE | ||
from google.cloud import storage | ||
import firecloud.api as fapi | ||
import subprocess | ||
|
||
namespace = "~{namespace}" | ||
workspaceName = "~{workspaceName}" | ||
bucket_name = fapi.get_workspace(namespace, workspaceName).json()['workspace']['bucketName'] | ||
|
||
# Collect the system files to delete | ||
storage_client = storage.Client() | ||
blobs = storage_client.list_blobs(bucket_name, projection='full') | ||
patterns_to_remove = ["stdout.log", "stderr.log", "localization.sh", "gcs_transfer.sh", "/stdout","/stderr","/rc","-rc.txt",'/memory_retry_rc','/output','/script','/exec.sh'] | ||
sys_files_to_delete = [] | ||
for blob in blobs: | ||
for pattern in patterns_to_remove: | ||
if blob.name.endswith(pattern): | ||
sys_files_to_delete.append(f"gs://{bucket_name}/{blob.name}") | ||
|
||
# Output the number of system files to delete | ||
with open('num_of_sys_files_to_delete.txt', 'w') as f: | ||
f.write(str(len(sys_files_to_delete))) | ||
print(f"System Files to Delete in {namespace}/{workspaceName}: ", len(sys_files_to_delete)) | ||
with open('sys_files_to_delete.txt', 'w') as f: | ||
for file in sys_files_to_delete: | ||
f.write(file + '\n') | ||
|
||
if len(sys_files_to_delete) == 0: | ||
print("No system files to delete") | ||
else: | ||
for pattern in set([i.split('/')[-1] for i in sys_files_to_delete]): | ||
subprocess.run(['gsutil', '-m', 'rm', f'gs://{bucket_name}/**/{pattern}']) | ||
|
||
CODE | ||
|
||
>>> | ||
output{ | ||
Int deleted_sys_files = read_int("num_of_sys_files_to_delete.txt") | ||
File sys_files_to_delete = "sys_files_to_delete.txt" | ||
} | ||
runtime { | ||
docker: mopDocker | ||
memory: "32 GiB" | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. make mem and cpus optional |
||
cpu: 8 | ||
} | ||
} | ||
|
||
task mop { | ||
input{ | ||
String namespace | ||
String workspaceName | ||
String mopDocker | ||
Int sysfiles | ||
} | ||
command <<< | ||
source activate NeoVax-Input-Parser | ||
# The number of system files that were deleted | ||
echo "System Files Deleted: ~{sysfiles}" | ||
# Dry run Mop | ||
fissfc mop -w ~{workspaceName} -p ~{namespace} --dry-run > mop_dry_run.txt | ||
echo Files to mop:" $(cat mop_dry_run.txt | wc -l)" | ||
cat mop_dry_run.txt | wc -l > num_of_files_to_mop.txt | ||
|
||
# Mop | ||
if [ $(cat mop_dry_run.txt | wc -l) -eq 0 ]; then | ||
echo "No files to mop" | ||
else | ||
fissfc mop -w ~{workspaceName} -p ~{namespace} | ||
fi | ||
>>> | ||
output{ | ||
Int num_of_files_to_mop = read_int("num_of_files_to_mop.txt") | ||
File mopped_files = "mop_dry_run.txt" | ||
} | ||
runtime { | ||
docker: mopDocker | ||
memory: "32 GiB" | ||
cpu: 8 | ||
} | ||
} |
Oops, something went wrong.
Add this suggestion to a batch that can be applied as a single commit.
This suggestion is invalid because no changes were made to the code.
Suggestions cannot be applied while the pull request is closed.
Suggestions cannot be applied while viewing a subset of changes.
Only one suggestion per line can be applied in a batch.
Add this suggestion to a batch that can be applied as a single commit.
Applying suggestions on deleted lines is not supported.
You must change the existing code in this line in order to create a valid suggestion.
Outdated suggestions cannot be applied.
This suggestion has been applied or marked resolved.
Suggestions cannot be applied from pending reviews.
Suggestions cannot be applied on multi-line comments.
Suggestions cannot be applied while the pull request is queued to merge.
Suggestion cannot be applied right now. Please check back later.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
add namespace broadtagteam in the client call