-
Notifications
You must be signed in to change notification settings - Fork 1
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
1 parent
5497c12
commit 79527dc
Showing
22 changed files
with
752 additions
and
24 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,11 @@ | ||
[{ | ||
"operation": "summarize_column_values", | ||
"description": "Summarize the column values in an excerpt.", | ||
"parameters": { | ||
"summary_name": "column_values", | ||
"summary_filename": "column_values", | ||
"skip_columns": ["onset", "duration", "sample", "HED"], | ||
"value_columns": ["stim_file", "response_time"], | ||
"max_categorical": 100 | ||
} | ||
}] |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,21 @@ | ||
[{ | ||
"operation": "summarize_hed_tags", | ||
"description": "Summarize the HED tags in the dataset.", | ||
"parameters": { | ||
"summary_name": "summarize_hed_tags", | ||
"summary_filename": "summarize_hed_tags", | ||
"tags": { | ||
"Sensory events": ["Sensory-event", "Sensory-presentation", "Sensory-attribute", | ||
"Experimental-stimulus", "Task-stimulus-role", | ||
"Task-attentional-demand", "Incidental", "Instructional", "Warning"], | ||
"Agent actions": ["Agent-action", "Agent", "Action", "Agent-task-role", | ||
"Task-action-type", "Participant-response"], | ||
"Objects": ["Item"], | ||
"Other events": ["Event", "Task-event-role", "Mishap"], | ||
"Exclude tags": ["Def", "Definition", "Event-context", "Def-expand", "Label", "Description"] | ||
}, | ||
"include_context": true, | ||
"replace_defs": true, | ||
"remove_types": ["Condition-variable", "Task"] | ||
} | ||
}] |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,13 @@ | ||
#!/bin/bash | ||
#SBATCH -J hed-summary | ||
#SBATCH --partition=shared | ||
#SBATCH --nodes=1 | ||
#SBATCH --mem=128G | ||
#SBATCH --account=csd403 | ||
#SBATCH --no-requeue | ||
#SBATCH -t 48:00:00 | ||
#SBATCH --ntasks-per-node=1 | ||
#SBATCH --output="/home/dtyoung/NEMAR-pipeline/HED/summary/hed_summary.out" | ||
#SBATCH -e /home/dtyoung/NEMAR-pipeline/HED/summary/hed_summary.err | ||
cd /home/dtyoung/NEMAR-pipeline/HED/summary | ||
python run_hed_summary.py |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,10 @@ | ||
[{ | ||
"operation": "summarize_hed_type", | ||
"description": "Summarize conditional variable", | ||
"parameters": { | ||
"summary_name": "hed_type_summary", | ||
"summary_filename": "hed_type_summary", | ||
"type_tag": "condition-variable" | ||
} | ||
}] | ||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,3 @@ | ||
failed_and_has_events = ['ds004043', 'ds002691', 'ds004278', 'ds004011', 'ds004033', 'ds004603', 'ds000117', 'ds004368', 'ds003987', 'ds004019', 'ds003885', 'ds004315', 'ds003602', 'ds003844', 'ds002723', 'ds004252', 'ds004577', 'ds004078', 'ds004561', 'ds004256', 'ds002034', 'ds004317', 'ds002725', 'ds004080', 'ds003638', 'ds004357', 'ds003352', 'ds003710', 'ds004330', 'ds003848', 'ds003766', 'ds002761', 'ds004346', 'ds004212', 'ds004505', 'ds003195', 'ds004477', 'ds004152', 'ds004515', 'ds004264', 'ds004196', 'ds004395', 'ds002721', 'ds001787', 'ds001810', 'ds002893', 'ds004018', 'ds003816', 'ds004519', 'ds004554', 'ds004574', 'ds004107', 'ds004446', 'ds004572', 'ds003505', 'ds003801', 'ds004532', 'ds003570', 'ds004262', 'ds003800', 'ds004100', 'ds004147', 'ds004295', 'ds004306', 'ds004580', 'ds004444', 'ds004511', 'ds004197', 'ds002720', 'ds004473', 'ds002158', 'ds003194', 'ds004215', 'ds002833', 'ds004367', 'ds003670', 'ds004369', 'ds004151', 'ds004194', 'ds003039', 'ds004579', 'ds002778', 'ds002718', 'ds004460', 'ds003374', 'ds000248', 'ds003753', 'ds004229', 'ds004575', 'ds004457', 'ds004347', 'ds002791', 'ds001971', 'ds004017', 'ds003751', 'ds003876', 'ds003688', 'ds003754', 'ds003694', 'ds004502', 'ds003822', 'ds004356', 'ds003922', 'ds004148', 'ds002799', 'ds002722', 'ds002094', 'ds004024', 'ds003838', 'ds004584', 'ds003739', 'ds004040', 'ds004521', 'ds004276', 'ds004015', 'ds003190', 'ds004448', 'ds002680', 'ds004010', 'ds004588', 'ds002578', 'ds004520', 'ds004284', 'ds002218', 'ds004348', 'ds003846', 'ds002724', 'ds003887', 'ds004447', 'ds003774'] | ||
failed = ['ds004043', 'ds002691', 'ds004278', 'ds004033', 'ds004011', 'ds004603', 'ds000117', 'ds004368', 'ds003775', 'ds003987', 'ds004019', 'ds004551', 'ds004315', 'ds003602', 'ds003885', 'ds003844', 'ds002723', 'ds003947', 'ds002885', 'ds004252', 'ds004577', 'ds002001', 'ds004078', 'ds004166', 'ds004561', 'ds004256', 'ds002034', 'ds003702', 'ds004317', 'ds002725', 'ds004080', 'ds004200', 'ds003638', 'ds004357', 'ds003352', 'ds003710', 'ds004330', 'ds003848', 'ds002336', 'ds003766', 'ds002761', 'ds004346', 'ds004212', 'ds004447', 'ds003195', 'ds004477', 'ds004152', 'ds003810', 'ds004515', 'ds004264', 'ds004196', 'ds004395', 'ds002721', 'ds001787', 'ds001810', 'ds002893', 'ds004018', 'ds003816', 'ds004519', 'ds004554', 'ds004574', 'ds003555', 'ds004381', 'ds004107', 'ds004446', 'ds004572', 'ds003505', 'ds003801', 'ds004532', 'ds003570', 'ds004262', 'ds004398', 'ds004127', 'ds003800', 'ds004100', 'ds004147', 'ds004295', 'ds004306', 'ds004580', 'ds004444', 'ds004511', 'ds004197', 'ds004000', 'ds002720', 'ds004473', 'ds002158', 'ds003194', 'ds004215', 'ds003944', 'ds002833', 'ds004367', 'ds003670', 'ds004369', 'ds003078', 'ds004151', 'ds003969', 'ds004075', 'ds004408', 'ds004194', 'ds003039', 'ds004579', 'ds003626', 'ds002718', 'ds002778', 'ds004460', 'ds003374', 'ds000248', 'ds001784', 'ds000246', 'ds003753', 'ds003768', 'ds004229', 'ds002908', 'ds004575', 'ds004457', 'ds004347', 'ds002791', 'ds001971', 'ds004017', 'ds003751', 'ds002338', 'ds003876', 'ds003688', 'ds003754', 'ds003694', 'ds004502', 'ds003822', 'ds004356', 'ds003922', 'metadata', 'ds001849', 'ds004148', 'ds002799', 'ds002722', 'ds002094', 'ds004024', 'ds003838', 'ds003805', 'ds004022', 'ds004584', 'ds003739', 'ds004040', 'ds004521', 'ds004276', 'ds003190', 'ds000247', 'ds004015', 'ds004448', 'ds002680', 'ds004067', 'ds004010', 'ds004588', 'ds002578', 'ds004520', 'ds004284', 'ds004186', 'ds002218', 'ds004504', 'ds004348', 'ds003846', 'ds002724', 'ds003887', 'ds003380', 'ds004505', 'ds003774'] | ||
finished = ['ds001785', 'ds002550', 'ds002712', 'ds002814', 'ds003004', 'ds003029', 'ds003061', 'ds003082', 'ds003104', 'ds003343', 'ds003392', 'ds003420', 'ds003421', 'ds003458', 'ds003474', 'ds003478', 'ds003483', 'ds003490', 'ds003498', 'ds003506', 'ds003509', 'ds003516', 'ds003517', 'ds003518', 'ds003519', 'ds003522', 'ds003523', 'ds003568', 'ds003574', 'ds003620', 'ds003633', 'ds003645', 'ds003655', 'ds003682', 'ds003690', 'ds003703', 'ds003708', 'ds003825', 'ds004105', 'ds004106', 'ds004117', 'ds004118', 'ds004119', 'ds004120', 'ds004121', 'ds004122', 'ds004123', 'ds004350', 'ds004362'] |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,36 @@ | ||
|
||
import sys | ||
sys.path.insert(0, "../hed_python") | ||
from hed.tools.remodeling.cli.run_remodel import main | ||
import os | ||
import re | ||
import shutil | ||
|
||
raw_dir = '/expanse/projects/nemar/openneuro' | ||
model_path = './column_values_summary_cmd.json' | ||
outputdir = '/expanse/projects/nemar/openneuro/processed/event_summaries' | ||
start = False | ||
# dsnumbers = ['ds004043', 'ds002691', 'ds004278', 'ds004033', 'ds004011', 'ds004603', 'ds000117', 'ds004368', 'ds003775', 'ds003987', 'ds004019', 'ds004551', 'ds004315', 'ds003602', 'ds003885', 'ds003844', 'ds002723', 'ds003947', 'ds002885', 'ds004252', 'ds004577', 'ds002001', 'ds004078', 'ds004166', 'ds004561', 'ds004256', 'ds002034', 'ds003702', 'ds004317', 'ds002725', 'ds004080', 'ds004200', 'ds003638', 'ds004357', 'ds003352', 'ds003710', 'ds004330', 'ds003848', 'ds002336', 'ds003766', 'ds002761', 'ds004346', 'ds004212', 'ds004447', 'ds003195', 'ds004477', 'ds004152', 'ds003810', 'ds004515', 'ds004264', 'ds004196', 'ds004395', 'ds002721', 'ds001787', 'ds001810', 'ds002893', 'ds004018', 'ds003816', 'ds004519', 'ds004554', 'ds004574', 'ds003555', 'ds004381', 'ds004107', 'ds004446', 'ds004572', 'ds003505', 'ds003801', 'ds004532', 'ds003570', 'ds004262', 'ds004398', 'ds004127', 'ds003800', 'ds004100', 'ds004147', 'ds004295', 'ds004306', 'ds004580', 'ds004444', 'ds004511', 'ds004197', 'ds004000', 'ds002720', 'ds004473', 'ds002158', 'ds003194', 'ds004215', 'ds003944', 'ds002833', 'ds004367', 'ds003670', 'ds004369', 'ds003078', 'ds004151', 'ds003969', 'ds004075', 'ds004408', 'ds004194', 'ds003039', 'ds004579', 'ds003626', 'ds002718', 'ds002778', 'ds004460', 'ds003374', 'ds000248', 'ds001784', 'ds000246', 'ds003753', 'ds003768', 'ds004229', 'ds002908', 'ds004575', 'ds004457', 'ds004347', 'ds002791', 'ds001971', 'ds004017', 'ds003751', 'ds002338', 'ds003876', 'ds003688', 'ds003754', 'ds003694', 'ds004502', 'ds003822', 'ds004356', 'ds003922', 'metadata', 'ds001849', 'ds004148', 'ds002799', 'ds002722', 'ds002094', 'ds004024', 'ds003838', 'ds003805', 'ds004022', 'ds004584', 'ds003739', 'ds004040', 'ds004521', 'ds004276', 'ds003190', 'ds000247', 'ds004015', 'ds004448', 'ds002680', 'ds004067', 'ds004010', 'ds004588', 'ds002578', 'ds004520', 'ds004284', 'ds004186', 'ds002218', 'ds004504', 'ds004348', 'ds003846', 'ds002724', 'ds003887', 'ds003380', 'ds004505', 'ds003774'] | ||
dsnumbers = ['ds003380', 'ds003768', 'ds002338', 'ds003944', 'ds004460', 'ds004398', 'ds004381', 'ds003775', 'ds003805', 'ds004022', 'ds000247', 'ds004551', 'ds004408', 'ds004127', 'ds003555', 'ds002885', 'ds002718', 'ds002001'] | ||
dsnumbers = ['ds003645'] | ||
# for idx, f in enumerate(os.listdir(processed_dir)): | ||
for f in dsnumbers: | ||
print(f'processing {f}') | ||
data_root = os.path.join(raw_dir, f) | ||
work_dir = os.path.join(outputdir, f) | ||
if not os.path.exists(work_dir): | ||
os.mkdir(work_dir) | ||
if os.path.isdir(data_root): | ||
arg_list1 = [data_root, model_path, '-x', 'derivatives', 'code', 'stimuli', '-nb', '-nu', '-w', work_dir, '-b', '-i', 'none', '-v'] | ||
try: | ||
main(arg_list1) | ||
summary_outputdir = os.path.join(work_dir, 'remodel', 'summaries', 'column_values') | ||
summaries = [file for file in os.listdir(summary_outputdir) if re.match('column_values.*.json', file)] | ||
if len(summaries) > 0: | ||
summaries.sort() | ||
summary_outputfile = os.path.join(summary_outputdir, summaries[-1]) | ||
shutil.copyfile(summary_outputfile, work_dir+'/events_report.json') | ||
except Exception as e: | ||
print(f"Error for {f}") | ||
print(e) | ||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,109 @@ | ||
|
||
import sys | ||
sys.path.insert(0, "../hed_python") | ||
from hed.tools.remodeling.cli.run_remodel import main | ||
from hed.tools.visualization import summary_to_dict, create_wordcloud, word_cloud_to_svg | ||
import os | ||
import re | ||
import json | ||
|
||
def generate_json_report(hed_summary, output): | ||
summary = {} | ||
summary['Main tags'] = {} | ||
summary['Other tags'] = [] | ||
summary['Condition variables'] = {} | ||
with open(hed_summary,'r') as f: | ||
hed_summary = json.load(f) | ||
nfiles = hed_summary['Overall summary']['Total files'] | ||
nevents = hed_summary['Overall summary']['Total events'] | ||
summary['event files'] = nfiles | ||
summary['events'] = nevents | ||
summary['events/file'] = nevents/nfiles | ||
main_tags_summary_dict = hed_summary['Overall summary']['Specifics']['Main tags'] | ||
other_tags_summary_dict = hed_summary['Overall summary']['Specifics']['Other tags'] | ||
# iterate through main tags | ||
for key in main_tags_summary_dict.keys(): | ||
main_tag = key | ||
summary['Main tags'][main_tag] = [] | ||
|
||
for tag_dict in main_tags_summary_dict[main_tag]: | ||
summary['Main tags'][main_tag].append({'tag': tag_dict['tag'], 'events': tag_dict['events']}) | ||
|
||
for tag_dict in other_tags_summary_dict: | ||
summary['Other tags'].append({'tag': tag_dict['tag'], 'events': tag_dict['events']}) | ||
|
||
# if hed_type_summary: | ||
# with open(hed_type_summary,'r') as f: | ||
# hed_type_summary = json.load(f) | ||
# for key in hed_type_summary['Overall summary']['Specifics']['details'].keys(): | ||
# main_tag = key | ||
# summary['Condition variables'][main_tag] = [] | ||
# for level_key in hed_type_summary['Overall summary']['Specifics']['details'][main_tag]['level_counts'].keys(): | ||
# desc = hed_type_summary['Overall summary']['Specifics']['details'][main_tag]['level_counts'][level_key]['description'] | ||
# file_count = hed_type_summary['Overall summary']['Specifics']['details'][main_tag]['level_counts'][level_key]['files'] | ||
# evt_count = hed_type_summary['Overall summary']['Specifics']['details'][main_tag]['level_counts'][level_key]['events'] | ||
# summary['Condition variables'][main_tag].append({'level': level_key, 'description': desc, 'events': evt_count, 'files': file_count}) | ||
|
||
with open(output, 'w') as out: | ||
json.dump(summary, out) | ||
|
||
return summary | ||
|
||
def generate_wordcloud(summary_file, work_dir): | ||
with open(summary_file) as fin: | ||
hed_summary = json.load(fin) | ||
loaded_dict = summary_to_dict(hed_summary) | ||
|
||
word_cloud = create_wordcloud(loaded_dict, mask_path="./word_mask.png", height=400, width=None) | ||
svg_data = word_cloud_to_svg(word_cloud) | ||
with open(work_dir+"/word_cloud.svg", "w") as outfile: | ||
outfile.writelines(svg_data) | ||
|
||
|
||
raw_dir = '/expanse/projects/nemar/openneuro' | ||
hed_summary_model_path = './hed_summary_cmd.json' | ||
outputdir = '/expanse/projects/nemar/openneuro/processed/event_summaries' | ||
start = False | ||
error_logfile = './run_hed_summary.err' | ||
fid_err = open(error_logfile, 'w') | ||
run_wordcloud = False | ||
|
||
# TODO: use NEMAR database | ||
dsnumbers = ['ds004635','ds004588','ds004554','ds004521','ds004520','ds004519','ds004362','ds004350','ds004166','ds004123','ds004122','ds004121','ds004120','ds004119','ds004118','ds004117','ds004106','ds004105','ds003645','ds003061','ds002718'] | ||
for f in dsnumbers: | ||
print(f'processing {f}') | ||
try: | ||
data_root = os.path.join(raw_dir, f) | ||
work_dir = os.path.join(outputdir, f) | ||
if not os.path.exists(work_dir): | ||
os.mkdir(work_dir) | ||
if os.path.isdir(data_root): | ||
arg_list1 = [data_root, hed_summary_model_path, '-x', 'derivatives', 'code', 'stimuli', 'sourcedata', '.datalad', | ||
'-nu', '-nb', '-w', work_dir, '-b', '-i', 'none', "-v"] | ||
main(arg_list1) | ||
hed_summary_outputdir = os.path.join(work_dir, 'remodel', 'summaries', 'summarize_hed_tags') | ||
hed_summaries = [file for file in os.listdir(hed_summary_outputdir) if re.match('summarize_hed_tags.json', file)] | ||
if len(hed_summaries) > 0: | ||
hed_summaries.sort() | ||
hed_summary_outputfile = os.path.join(hed_summary_outputdir, hed_summaries[-1]) | ||
|
||
# hed_type_summary_outputdir = os.path.join(work_dir, 'remodel', 'summaries', 'hed_type_summary') | ||
# hed_type_summaries = [file for file in os.listdir(hed_type_summary_outputdir) if re.match('hed_type_summary.json', file)] | ||
# hed_type_summary_outputfile = None | ||
# if len(hed_type_summaries) > 0: | ||
# hed_type_summaries.sort() | ||
# hed_type_summary_outputfile = os.path.join(hed_type_summary_outputdir, hed_type_summaries[-1]) | ||
# generate_json_report(hed_summary_outputfile, hed_type_summary_outputfile, work_dir+'/hed_report.json') | ||
generate_json_report(hed_summary_outputfile, work_dir+'/hed_report.json') | ||
|
||
if run_wordcloud: | ||
# Generate word cloud | ||
generate_wordcloud(hed_summary_outputfile, work_dir) | ||
except Exception as e: | ||
fid_err.write(f'Error processing {f}: {e}\n') | ||
|
||
fid_err.close() | ||
|
||
def get_hed_datasets(): | ||
# TODO | ||
return |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,39 @@ | ||
|
||
import sys | ||
sys.path.insert(0, "../hed_python") | ||
from hed.tools.visualization import summary_to_dict, create_wordcloud, word_cloud_to_svg | ||
import os | ||
import re | ||
import shutil | ||
import json | ||
|
||
raw_dir = '/expanse/projects/nemar/openneuro' | ||
hed_summary_model_path = './hed_summary_cmd.json' | ||
hed_type_summary_model_path = './hed_type_summary_cmd.json' | ||
outputdir = '/expanse/projects/nemar/openneuro/processed/event_summaries' | ||
|
||
dsnumbers = ['ds004635','ds004588','ds004554','ds004521','ds004520','ds004519','ds004362','ds004350','ds004166','ds004123','ds004122','ds004121','ds004120','ds004119','ds004118','ds004117','ds004106','ds004105','ds003645','ds003061','ds002718'] | ||
#dsnumbers = ['ds004123','ds004122','ds004121','ds004120','ds004119','ds004118','ds004117','ds004106','ds004105'] # Kay's datasets | ||
# dsnumbers = ['ds004123'] | ||
for f in dsnumbers: | ||
print(f'processing {f}') | ||
data_root = os.path.join(raw_dir, f) | ||
work_dir = os.path.join(outputdir, f) | ||
hed_summary_outputfile = work_dir+'/remodel/summaries/summarize_hed_tags/summarize_hed_tags.json' | ||
if not os.path.exists(work_dir): | ||
os.mkdir(work_dir) | ||
if os.path.isdir(data_root): | ||
try: | ||
with open(hed_summary_outputfile,'r') as fin: | ||
hed_summary = json.load(fin) | ||
loaded_dict = summary_to_dict(hed_summary) | ||
|
||
word_cloud = create_wordcloud(loaded_dict, mask_path="./word_mask.png", height=400, width=None) | ||
svg_data = word_cloud_to_svg(word_cloud) | ||
with open(work_dir+"/word_cloud.svg", "w") as outfile: | ||
outfile.writelines(svg_data) | ||
except Exception as e: | ||
print(f"Error for {f}") | ||
print(e) | ||
|
||
|
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Oops, something went wrong.