Skip to content

Commit

Permalink
[ENH] add extra csvs with bond-group (#51)
Browse files Browse the repository at this point in the history
Closes #48
  • Loading branch information
mattcieslak authored Dec 4, 2020
1 parent adea0b7 commit 31ad146
Show file tree
Hide file tree
Showing 3 changed files with 66 additions and 2 deletions.
9 changes: 7 additions & 2 deletions bond/bond.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,8 @@
# import ipdb
from tqdm import tqdm
from .constants import ID_VARS, NON_KEY_ENTITIES, IMAGING_PARAMS
from .metadata_merge import check_merging_operations
from .metadata_merge import (
check_merging_operations, group_by_acquisition_sets)
bids.config.set_option('extension_initial_dot', True)


Expand Down Expand Up @@ -380,7 +381,7 @@ def get_param_groups_dataframes(self):

return (big_df, summary)

def get_CSVs(self, path_prefix):
def get_CSVs(self, path_prefix, split_by_session=True):
"""Creates the _summary and _files CSVs for the bids dataset.
Parameters:
Expand All @@ -398,6 +399,10 @@ def get_CSVs(self, path_prefix):
big_df.to_csv(path_prefix + "_files.csv", index=False)
summary.to_csv(path_prefix + "_summary.csv", index=False)

# Calculate the acq groups
group_by_acquisition_sets(path_prefix + "_files.csv", path_prefix,
split_session=split_by_session)

def get_key_groups(self):
'''Identifies the key groups for the bids dataset'''

Expand Down
59 changes: 59 additions & 0 deletions bond/metadata_merge.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
"""Main module."""
import json
from collections import defaultdict
import numpy as np
import pandas as pd
from copy import deepcopy
Expand Down Expand Up @@ -138,3 +139,61 @@ def merge_json_into_json(from_file, to_file,
json.dump(merged_metadata, tofw, indent=4)

return 0


def group_by_acquisition_sets(files_csv, output_prefix, split_session=True):
'''Finds unique sets of Key/Param groups across subjects.
'''
from bids.layout import parse_file_entities
from bids import config
config.set_option('extension_initial_dot', True)

files_df = pd.read_csv(files_csv)
acq_groups = defaultdict(list)
for _, row in files_df.iterrows():
file_entities = parse_file_entities(row.FilePath)

if split_session:
acq_id = (file_entities.get("subject"),
file_entities.get("session"))
acq_groups[acq_id].append((row.KeyGroup, row.ParamGroup))
else:
acq_id = (file_entities.get("subject"), None)
acq_groups[acq_id].append((row.KeyGroup, row.ParamGroup,
file_entities.get("session")))

# Map the contents to a list of subjects/sessions
contents_to_subjects = defaultdict(list)
for key, value in acq_groups.items():
contents_to_subjects[tuple(sorted(value))].append(key)

# Sort them based on how many have that group
content_ids = []
content_id_counts = []
for key, value in contents_to_subjects.items():
content_ids.append(key)
content_id_counts.append(len(value))

descending_order = np.argsort(content_id_counts)[::-1]

# Create a dataframe with the subject, session, groupnum
grouped_sub_sess = []
acq_group_info = []
for groupnum, content_id_row in enumerate(descending_order, start=1):
content_id = content_ids[content_id_row]
acq_group_info.append(
(groupnum, content_id_counts[content_id_row]) + content_id)
for subject, session in contents_to_subjects[content_id]:
grouped_sub_sess.append(
{"subject": subject,
"session": session,
"AcqGroup": groupnum})

# Write the mapping of subject/session to
acq_group_df = pd.DataFrame(grouped_sub_sess)
acq_group_df.to_csv(output_prefix + "_AcqGrouping.csv", index=False)

# Write the summary of acq groups to a text file
with open(output_prefix + "_AcqGroupInfo.txt", "w") as infotxt:
infotxt.write(
"\n".join([" ".join(map(str, line)) for line in acq_group_info]))
Binary file removed notebooks/CCNP_KeyGroups.zip
Binary file not shown.

0 comments on commit 31ad146

Please sign in to comment.