-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathingest_sample_to_old_db.py
executable file
·70 lines (56 loc) · 2.05 KB
/
ingest_sample_to_old_db.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
#!/usr/bin/env python3
import re
import argparse
import csv
from DB_OPS import create_connection, Update_Samples_Table
def main():
parser = argparse.ArgumentParser(prog='generate_genpipes_inputs.py', description="Ingest listed samples to old DB.")
# parser.add_argument(
# '--type',
# required=True,
# help="Type of analysis: either RNA or DNA",
# choices=['DNA', 'RNA']
# )
parser.add_argument(
'-i',
'--input',
required=True,
help="Input file is a csv formatted 'patient,sample_dn,sample_dt,sample_rt'",
)
args = parser.parse_args()
# sequencing_type = args.type
beluga_db = "/lustre03/project/6007512/C3G/projects/MOH_PROCESSING/DATABASE/MOH_analysis.db"
#Connect to the db
connection = create_connection(beluga_db)
with open(args.input, mode='r') as file:
# Create a CSV reader object
csv_reader = csv.reader(file)
# Iterate over each row in the CSV file
for row in csv_reader:
patient, sample_dn, sample_dt, sample_rt = row
sample_dn = sample_dn if sample_dn else 'NA'
sample_dt = sample_dt if sample_dt else 'NA'
sample_rt = sample_rt if sample_rt else 'NA'
# Determine which sample field to use for parsing
sample = sample_dn if sample_dn != 'NA' else (sample_dt if sample_dt != 'NA' else sample_rt)
# Parse cohort and institution
result = re.search(r"^((MoHQ-(JG|CM|GC|MU|MR|XX|HM|CQ)-\w+)-\w+)-\w+-\w+(D|R)(T|N)", sample)
cohort = result.group(2)
institution = result.group(3)
Update_Samples_Table(
connection,
patient,
patient,
institution,
cohort,
sample_dn,
sample_dn,
sample_dt,
sample_dt,
sample_rt,
sample_rt
)
connection.commit()
connection.close()
if __name__ == '__main__':
main()