Skip to content

Commit

Permalink
latest change
Browse files Browse the repository at this point in the history
  • Loading branch information
dungscout96 committed Sep 25, 2023
1 parent 5497c12 commit 79527dc
Show file tree
Hide file tree
Showing 22 changed files with 752 additions and 24 deletions.
11 changes: 11 additions & 0 deletions HED/summary/column_values_summary_cmd.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
[{
"operation": "summarize_column_values",
"description": "Summarize the column values in an excerpt.",
"parameters": {
"summary_name": "column_values",
"summary_filename": "column_values",
"skip_columns": ["onset", "duration", "sample", "HED"],
"value_columns": ["stim_file", "response_time"],
"max_categorical": 100
}
}]
21 changes: 21 additions & 0 deletions HED/summary/hed_summary_cmd.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
[{
"operation": "summarize_hed_tags",
"description": "Summarize the HED tags in the dataset.",
"parameters": {
"summary_name": "summarize_hed_tags",
"summary_filename": "summarize_hed_tags",
"tags": {
"Sensory events": ["Sensory-event", "Sensory-presentation", "Sensory-attribute",
"Experimental-stimulus", "Task-stimulus-role",
"Task-attentional-demand", "Incidental", "Instructional", "Warning"],
"Agent actions": ["Agent-action", "Agent", "Action", "Agent-task-role",
"Task-action-type", "Participant-response"],
"Objects": ["Item"],
"Other events": ["Event", "Task-event-role", "Mishap"],
"Exclude tags": ["Def", "Definition", "Event-context", "Def-expand", "Label", "Description"]
},
"include_context": true,
"replace_defs": true,
"remove_types": ["Condition-variable", "Task"]
}
}]
13 changes: 13 additions & 0 deletions HED/summary/hed_summary_sbatch
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
#!/bin/bash
#SBATCH -J hed-summary
#SBATCH --partition=shared
#SBATCH --nodes=1
#SBATCH --mem=128G
#SBATCH --account=csd403
#SBATCH --no-requeue
#SBATCH -t 48:00:00
#SBATCH --ntasks-per-node=1
#SBATCH --output="/home/dtyoung/NEMAR-pipeline/HED/summary/hed_summary.out"
#SBATCH -e /home/dtyoung/NEMAR-pipeline/HED/summary/hed_summary.err
cd /home/dtyoung/NEMAR-pipeline/HED/summary
python run_hed_summary.py
10 changes: 10 additions & 0 deletions HED/summary/hed_type_summary_cmd.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
[{
"operation": "summarize_hed_type",
"description": "Summarize conditional variable",
"parameters": {
"summary_name": "hed_type_summary",
"summary_filename": "hed_type_summary",
"type_tag": "condition-variable"
}
}]

3 changes: 3 additions & 0 deletions HED/summary/notes.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
failed_and_has_events = ['ds004043', 'ds002691', 'ds004278', 'ds004011', 'ds004033', 'ds004603', 'ds000117', 'ds004368', 'ds003987', 'ds004019', 'ds003885', 'ds004315', 'ds003602', 'ds003844', 'ds002723', 'ds004252', 'ds004577', 'ds004078', 'ds004561', 'ds004256', 'ds002034', 'ds004317', 'ds002725', 'ds004080', 'ds003638', 'ds004357', 'ds003352', 'ds003710', 'ds004330', 'ds003848', 'ds003766', 'ds002761', 'ds004346', 'ds004212', 'ds004505', 'ds003195', 'ds004477', 'ds004152', 'ds004515', 'ds004264', 'ds004196', 'ds004395', 'ds002721', 'ds001787', 'ds001810', 'ds002893', 'ds004018', 'ds003816', 'ds004519', 'ds004554', 'ds004574', 'ds004107', 'ds004446', 'ds004572', 'ds003505', 'ds003801', 'ds004532', 'ds003570', 'ds004262', 'ds003800', 'ds004100', 'ds004147', 'ds004295', 'ds004306', 'ds004580', 'ds004444', 'ds004511', 'ds004197', 'ds002720', 'ds004473', 'ds002158', 'ds003194', 'ds004215', 'ds002833', 'ds004367', 'ds003670', 'ds004369', 'ds004151', 'ds004194', 'ds003039', 'ds004579', 'ds002778', 'ds002718', 'ds004460', 'ds003374', 'ds000248', 'ds003753', 'ds004229', 'ds004575', 'ds004457', 'ds004347', 'ds002791', 'ds001971', 'ds004017', 'ds003751', 'ds003876', 'ds003688', 'ds003754', 'ds003694', 'ds004502', 'ds003822', 'ds004356', 'ds003922', 'ds004148', 'ds002799', 'ds002722', 'ds002094', 'ds004024', 'ds003838', 'ds004584', 'ds003739', 'ds004040', 'ds004521', 'ds004276', 'ds004015', 'ds003190', 'ds004448', 'ds002680', 'ds004010', 'ds004588', 'ds002578', 'ds004520', 'ds004284', 'ds002218', 'ds004348', 'ds003846', 'ds002724', 'ds003887', 'ds004447', 'ds003774']
failed = ['ds004043', 'ds002691', 'ds004278', 'ds004033', 'ds004011', 'ds004603', 'ds000117', 'ds004368', 'ds003775', 'ds003987', 'ds004019', 'ds004551', 'ds004315', 'ds003602', 'ds003885', 'ds003844', 'ds002723', 'ds003947', 'ds002885', 'ds004252', 'ds004577', 'ds002001', 'ds004078', 'ds004166', 'ds004561', 'ds004256', 'ds002034', 'ds003702', 'ds004317', 'ds002725', 'ds004080', 'ds004200', 'ds003638', 'ds004357', 'ds003352', 'ds003710', 'ds004330', 'ds003848', 'ds002336', 'ds003766', 'ds002761', 'ds004346', 'ds004212', 'ds004447', 'ds003195', 'ds004477', 'ds004152', 'ds003810', 'ds004515', 'ds004264', 'ds004196', 'ds004395', 'ds002721', 'ds001787', 'ds001810', 'ds002893', 'ds004018', 'ds003816', 'ds004519', 'ds004554', 'ds004574', 'ds003555', 'ds004381', 'ds004107', 'ds004446', 'ds004572', 'ds003505', 'ds003801', 'ds004532', 'ds003570', 'ds004262', 'ds004398', 'ds004127', 'ds003800', 'ds004100', 'ds004147', 'ds004295', 'ds004306', 'ds004580', 'ds004444', 'ds004511', 'ds004197', 'ds004000', 'ds002720', 'ds004473', 'ds002158', 'ds003194', 'ds004215', 'ds003944', 'ds002833', 'ds004367', 'ds003670', 'ds004369', 'ds003078', 'ds004151', 'ds003969', 'ds004075', 'ds004408', 'ds004194', 'ds003039', 'ds004579', 'ds003626', 'ds002718', 'ds002778', 'ds004460', 'ds003374', 'ds000248', 'ds001784', 'ds000246', 'ds003753', 'ds003768', 'ds004229', 'ds002908', 'ds004575', 'ds004457', 'ds004347', 'ds002791', 'ds001971', 'ds004017', 'ds003751', 'ds002338', 'ds003876', 'ds003688', 'ds003754', 'ds003694', 'ds004502', 'ds003822', 'ds004356', 'ds003922', 'metadata', 'ds001849', 'ds004148', 'ds002799', 'ds002722', 'ds002094', 'ds004024', 'ds003838', 'ds003805', 'ds004022', 'ds004584', 'ds003739', 'ds004040', 'ds004521', 'ds004276', 'ds003190', 'ds000247', 'ds004015', 'ds004448', 'ds002680', 'ds004067', 'ds004010', 'ds004588', 'ds002578', 'ds004520', 'ds004284', 'ds004186', 'ds002218', 'ds004504', 'ds004348', 'ds003846', 'ds002724', 'ds003887', 'ds003380', 'ds004505', 'ds003774']
finished = ['ds001785', 'ds002550', 'ds002712', 'ds002814', 'ds003004', 'ds003029', 'ds003061', 'ds003082', 'ds003104', 'ds003343', 'ds003392', 'ds003420', 'ds003421', 'ds003458', 'ds003474', 'ds003478', 'ds003483', 'ds003490', 'ds003498', 'ds003506', 'ds003509', 'ds003516', 'ds003517', 'ds003518', 'ds003519', 'ds003522', 'ds003523', 'ds003568', 'ds003574', 'ds003620', 'ds003633', 'ds003645', 'ds003655', 'ds003682', 'ds003690', 'ds003703', 'ds003708', 'ds003825', 'ds004105', 'ds004106', 'ds004117', 'ds004118', 'ds004119', 'ds004120', 'ds004121', 'ds004122', 'ds004123', 'ds004350', 'ds004362']
36 changes: 36 additions & 0 deletions HED/summary/run_event_summary.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,36 @@

import sys
sys.path.insert(0, "../hed_python")
from hed.tools.remodeling.cli.run_remodel import main
import os
import re
import shutil

raw_dir = '/expanse/projects/nemar/openneuro'
model_path = './column_values_summary_cmd.json'
outputdir = '/expanse/projects/nemar/openneuro/processed/event_summaries'
start = False
# dsnumbers = ['ds004043', 'ds002691', 'ds004278', 'ds004033', 'ds004011', 'ds004603', 'ds000117', 'ds004368', 'ds003775', 'ds003987', 'ds004019', 'ds004551', 'ds004315', 'ds003602', 'ds003885', 'ds003844', 'ds002723', 'ds003947', 'ds002885', 'ds004252', 'ds004577', 'ds002001', 'ds004078', 'ds004166', 'ds004561', 'ds004256', 'ds002034', 'ds003702', 'ds004317', 'ds002725', 'ds004080', 'ds004200', 'ds003638', 'ds004357', 'ds003352', 'ds003710', 'ds004330', 'ds003848', 'ds002336', 'ds003766', 'ds002761', 'ds004346', 'ds004212', 'ds004447', 'ds003195', 'ds004477', 'ds004152', 'ds003810', 'ds004515', 'ds004264', 'ds004196', 'ds004395', 'ds002721', 'ds001787', 'ds001810', 'ds002893', 'ds004018', 'ds003816', 'ds004519', 'ds004554', 'ds004574', 'ds003555', 'ds004381', 'ds004107', 'ds004446', 'ds004572', 'ds003505', 'ds003801', 'ds004532', 'ds003570', 'ds004262', 'ds004398', 'ds004127', 'ds003800', 'ds004100', 'ds004147', 'ds004295', 'ds004306', 'ds004580', 'ds004444', 'ds004511', 'ds004197', 'ds004000', 'ds002720', 'ds004473', 'ds002158', 'ds003194', 'ds004215', 'ds003944', 'ds002833', 'ds004367', 'ds003670', 'ds004369', 'ds003078', 'ds004151', 'ds003969', 'ds004075', 'ds004408', 'ds004194', 'ds003039', 'ds004579', 'ds003626', 'ds002718', 'ds002778', 'ds004460', 'ds003374', 'ds000248', 'ds001784', 'ds000246', 'ds003753', 'ds003768', 'ds004229', 'ds002908', 'ds004575', 'ds004457', 'ds004347', 'ds002791', 'ds001971', 'ds004017', 'ds003751', 'ds002338', 'ds003876', 'ds003688', 'ds003754', 'ds003694', 'ds004502', 'ds003822', 'ds004356', 'ds003922', 'metadata', 'ds001849', 'ds004148', 'ds002799', 'ds002722', 'ds002094', 'ds004024', 'ds003838', 'ds003805', 'ds004022', 'ds004584', 'ds003739', 'ds004040', 'ds004521', 'ds004276', 'ds003190', 'ds000247', 'ds004015', 'ds004448', 'ds002680', 'ds004067', 'ds004010', 'ds004588', 'ds002578', 'ds004520', 'ds004284', 'ds004186', 'ds002218', 'ds004504', 'ds004348', 'ds003846', 'ds002724', 'ds003887', 'ds003380', 'ds004505', 'ds003774']
dsnumbers = ['ds003380', 'ds003768', 'ds002338', 'ds003944', 'ds004460', 'ds004398', 'ds004381', 'ds003775', 'ds003805', 'ds004022', 'ds000247', 'ds004551', 'ds004408', 'ds004127', 'ds003555', 'ds002885', 'ds002718', 'ds002001']
dsnumbers = ['ds003645']
# for idx, f in enumerate(os.listdir(processed_dir)):
for f in dsnumbers:
print(f'processing {f}')
data_root = os.path.join(raw_dir, f)
work_dir = os.path.join(outputdir, f)
if not os.path.exists(work_dir):
os.mkdir(work_dir)
if os.path.isdir(data_root):
arg_list1 = [data_root, model_path, '-x', 'derivatives', 'code', 'stimuli', '-nb', '-nu', '-w', work_dir, '-b', '-i', 'none', '-v']
try:
main(arg_list1)
summary_outputdir = os.path.join(work_dir, 'remodel', 'summaries', 'column_values')
summaries = [file for file in os.listdir(summary_outputdir) if re.match('column_values.*.json', file)]
if len(summaries) > 0:
summaries.sort()
summary_outputfile = os.path.join(summary_outputdir, summaries[-1])
shutil.copyfile(summary_outputfile, work_dir+'/events_report.json')
except Exception as e:
print(f"Error for {f}")
print(e)

109 changes: 109 additions & 0 deletions HED/summary/run_hed_summary.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,109 @@

import sys
sys.path.insert(0, "../hed_python")
from hed.tools.remodeling.cli.run_remodel import main
from hed.tools.visualization import summary_to_dict, create_wordcloud, word_cloud_to_svg
import os
import re
import json

def generate_json_report(hed_summary, output):
summary = {}
summary['Main tags'] = {}
summary['Other tags'] = []
summary['Condition variables'] = {}
with open(hed_summary,'r') as f:
hed_summary = json.load(f)
nfiles = hed_summary['Overall summary']['Total files']
nevents = hed_summary['Overall summary']['Total events']
summary['event files'] = nfiles
summary['events'] = nevents
summary['events/file'] = nevents/nfiles
main_tags_summary_dict = hed_summary['Overall summary']['Specifics']['Main tags']
other_tags_summary_dict = hed_summary['Overall summary']['Specifics']['Other tags']
# iterate through main tags
for key in main_tags_summary_dict.keys():
main_tag = key
summary['Main tags'][main_tag] = []

for tag_dict in main_tags_summary_dict[main_tag]:
summary['Main tags'][main_tag].append({'tag': tag_dict['tag'], 'events': tag_dict['events']})

for tag_dict in other_tags_summary_dict:
summary['Other tags'].append({'tag': tag_dict['tag'], 'events': tag_dict['events']})

# if hed_type_summary:
# with open(hed_type_summary,'r') as f:
# hed_type_summary = json.load(f)
# for key in hed_type_summary['Overall summary']['Specifics']['details'].keys():
# main_tag = key
# summary['Condition variables'][main_tag] = []
# for level_key in hed_type_summary['Overall summary']['Specifics']['details'][main_tag]['level_counts'].keys():
# desc = hed_type_summary['Overall summary']['Specifics']['details'][main_tag]['level_counts'][level_key]['description']
# file_count = hed_type_summary['Overall summary']['Specifics']['details'][main_tag]['level_counts'][level_key]['files']
# evt_count = hed_type_summary['Overall summary']['Specifics']['details'][main_tag]['level_counts'][level_key]['events']
# summary['Condition variables'][main_tag].append({'level': level_key, 'description': desc, 'events': evt_count, 'files': file_count})

with open(output, 'w') as out:
json.dump(summary, out)

return summary

def generate_wordcloud(summary_file, work_dir):
with open(summary_file) as fin:
hed_summary = json.load(fin)
loaded_dict = summary_to_dict(hed_summary)

word_cloud = create_wordcloud(loaded_dict, mask_path="./word_mask.png", height=400, width=None)
svg_data = word_cloud_to_svg(word_cloud)
with open(work_dir+"/word_cloud.svg", "w") as outfile:
outfile.writelines(svg_data)


raw_dir = '/expanse/projects/nemar/openneuro'
hed_summary_model_path = './hed_summary_cmd.json'
outputdir = '/expanse/projects/nemar/openneuro/processed/event_summaries'
start = False
error_logfile = './run_hed_summary.err'
fid_err = open(error_logfile, 'w')
run_wordcloud = False

# TODO: use NEMAR database
dsnumbers = ['ds004635','ds004588','ds004554','ds004521','ds004520','ds004519','ds004362','ds004350','ds004166','ds004123','ds004122','ds004121','ds004120','ds004119','ds004118','ds004117','ds004106','ds004105','ds003645','ds003061','ds002718']
for f in dsnumbers:
print(f'processing {f}')
try:
data_root = os.path.join(raw_dir, f)
work_dir = os.path.join(outputdir, f)
if not os.path.exists(work_dir):
os.mkdir(work_dir)
if os.path.isdir(data_root):
arg_list1 = [data_root, hed_summary_model_path, '-x', 'derivatives', 'code', 'stimuli', 'sourcedata', '.datalad',
'-nu', '-nb', '-w', work_dir, '-b', '-i', 'none', "-v"]
main(arg_list1)
hed_summary_outputdir = os.path.join(work_dir, 'remodel', 'summaries', 'summarize_hed_tags')
hed_summaries = [file for file in os.listdir(hed_summary_outputdir) if re.match('summarize_hed_tags.json', file)]
if len(hed_summaries) > 0:
hed_summaries.sort()
hed_summary_outputfile = os.path.join(hed_summary_outputdir, hed_summaries[-1])

# hed_type_summary_outputdir = os.path.join(work_dir, 'remodel', 'summaries', 'hed_type_summary')
# hed_type_summaries = [file for file in os.listdir(hed_type_summary_outputdir) if re.match('hed_type_summary.json', file)]
# hed_type_summary_outputfile = None
# if len(hed_type_summaries) > 0:
# hed_type_summaries.sort()
# hed_type_summary_outputfile = os.path.join(hed_type_summary_outputdir, hed_type_summaries[-1])
# generate_json_report(hed_summary_outputfile, hed_type_summary_outputfile, work_dir+'/hed_report.json')
generate_json_report(hed_summary_outputfile, work_dir+'/hed_report.json')

if run_wordcloud:
# Generate word cloud
generate_wordcloud(hed_summary_outputfile, work_dir)
except Exception as e:
fid_err.write(f'Error processing {f}: {e}\n')

fid_err.close()

def get_hed_datasets():
# TODO
return
39 changes: 39 additions & 0 deletions HED/summary/run_word_cloud.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,39 @@

import sys
sys.path.insert(0, "../hed_python")
from hed.tools.visualization import summary_to_dict, create_wordcloud, word_cloud_to_svg
import os
import re
import shutil
import json

raw_dir = '/expanse/projects/nemar/openneuro'
hed_summary_model_path = './hed_summary_cmd.json'
hed_type_summary_model_path = './hed_type_summary_cmd.json'
outputdir = '/expanse/projects/nemar/openneuro/processed/event_summaries'

dsnumbers = ['ds004635','ds004588','ds004554','ds004521','ds004520','ds004519','ds004362','ds004350','ds004166','ds004123','ds004122','ds004121','ds004120','ds004119','ds004118','ds004117','ds004106','ds004105','ds003645','ds003061','ds002718']
#dsnumbers = ['ds004123','ds004122','ds004121','ds004120','ds004119','ds004118','ds004117','ds004106','ds004105'] # Kay's datasets
# dsnumbers = ['ds004123']
for f in dsnumbers:
print(f'processing {f}')
data_root = os.path.join(raw_dir, f)
work_dir = os.path.join(outputdir, f)
hed_summary_outputfile = work_dir+'/remodel/summaries/summarize_hed_tags/summarize_hed_tags.json'
if not os.path.exists(work_dir):
os.mkdir(work_dir)
if os.path.isdir(data_root):
try:
with open(hed_summary_outputfile,'r') as fin:
hed_summary = json.load(fin)
loaded_dict = summary_to_dict(hed_summary)

word_cloud = create_wordcloud(loaded_dict, mask_path="./word_mask.png", height=400, width=None)
svg_data = word_cloud_to_svg(word_cloud)
with open(work_dir+"/word_cloud.svg", "w") as outfile:
outfile.writelines(svg_data)
except Exception as e:
print(f"Error for {f}")
print(e)


Binary file added HED/summary/word_mask.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Loading

0 comments on commit 79527dc

Please sign in to comment.