-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy path01_fetch_projects.py
48 lines (43 loc) · 1.22 KB
/
01_fetch_projects.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
import argparse
from gdcutil import GDCClient
# https://api.gdc.cancer.gov/projects/_mapping
ENDPOINT = '/projects'
fields = [
"project_id",
"name",
# "dbgap_accession_number",
"program.program_id",
"program.name",
"program.dbgap_accession_number",
"disease_type",
"primary_site",
"releasable",
"released",
"state",
"summary.case_count",
"summary.file_count",
"summary.file_size",
"summary.data_categories.case_count",
"summary.data_categories.data_category",
"summary.data_categories.file_count",
"summary.experimental_strategies.case_count",
"summary.experimental_strategies.experimental_strategy",
"summary.experimental_strategies.file_count",
]
nested = [
"summary.data_categories",
"summary.experimental_strategies",
]
filters = {
"op": "=",
"content": {
"field": "program.name",
"value": ["TCGA"]
}
}
if __name__ == '__main__':
parser = argparse.ArgumentParser(description='Fetch project metadata from GDC API')
parser.add_argument('-o', '--output', type=str, required=True, help='Output file path')
args = parser.parse_args()
client = GDCClient(ENDPOINT, fields, filters)
client.to_json(args.output)