forked from avikaprasad22/genescope_backend
-
Notifications
You must be signed in to change notification settings - Fork 2
Expand file tree
/
Copy pathmutations.py
More file actions
39 lines (31 loc) · 1.04 KB
/
mutations.py
File metadata and controls
39 lines (31 loc) · 1.04 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
import csv
import json
import random
input_file = "variant_summary.txt" # Make sure this file is in the same folder
output_file = "mutation_dataset.json"
# Define valid mutation types
VALID_TYPES = {
"single nucleotide variant": "substitution",
"deletion": "deletion",
"insertion": "insertion"
}
filtered_data = []
with open(input_file, encoding="utf-8") as f:
reader = csv.DictReader(f, delimiter="\t")
for row in reader:
gene = row["GeneSymbol"]
condition = row["PhenotypeList"]
mutation_type = row["Type"]
if mutation_type in VALID_TYPES and condition:
filtered_data.append({
"gene": gene,
"mutation": VALID_TYPES[mutation_type],
"condition": condition.split(",")[0].strip()
})
# Randomize and limit
random.shuffle(filtered_data)
filtered_data = filtered_data[:1000]
# Save to JSON
with open(output_file, "w") as out:
json.dump(filtered_data, out, indent=2)
print(f"✅ Saved {len(filtered_data)} mutations to {output_file}")