[ENH] add extra csvs with bond-group (#51)

Closes #48
PennLINC · Dec 4, 2020 · 31ad146 · 31ad146
1 parent adea0b7
commit 31ad146
Show file tree

Hide file tree

Showing 3 changed files with 66 additions and 2 deletions.
diff --git a/bond/bond.py b/bond/bond.py
@@ -12,7 +12,8 @@
 # import ipdb
 from tqdm import tqdm
 from .constants import ID_VARS, NON_KEY_ENTITIES, IMAGING_PARAMS
-from .metadata_merge import check_merging_operations
+from .metadata_merge import (
+ check_merging_operations, group_by_acquisition_sets)
 bids.config.set_option('extension_initial_dot', True)
 
 
@@ -380,7 +381,7 @@ def get_param_groups_dataframes(self):
 
  return (big_df, summary)
 
- def get_CSVs(self, path_prefix):
+ def get_CSVs(self, path_prefix, split_by_session=True):
  """Creates the _summary and _files CSVs for the bids dataset.
 
  Parameters:
@@ -398,6 +399,10 @@ def get_CSVs(self, path_prefix):
  big_df.to_csv(path_prefix + "_files.csv", index=False)
  summary.to_csv(path_prefix + "_summary.csv", index=False)
 
+ # Calculate the acq groups
+ group_by_acquisition_sets(path_prefix + "_files.csv", path_prefix,
+ split_session=split_by_session)
+
  def get_key_groups(self):
  '''Identifies the key groups for the bids dataset'''
 

diff --git a/bond/metadata_merge.py b/bond/metadata_merge.py
@@ -1,5 +1,6 @@
 """Main module."""
 import json
+from collections import defaultdict
 import numpy as np
 import pandas as pd
 from copy import deepcopy
@@ -138,3 +139,61 @@ def merge_json_into_json(from_file, to_file,
  json.dump(merged_metadata, tofw, indent=4)
 
  return 0
+
+
+def group_by_acquisition_sets(files_csv, output_prefix, split_session=True):
+ '''Finds unique sets of Key/Param groups across subjects.
+ '''
+ from bids.layout import parse_file_entities
+ from bids import config
+ config.set_option('extension_initial_dot', True)
+
+ files_df = pd.read_csv(files_csv)
+ acq_groups = defaultdict(list)
+ for _, row in files_df.iterrows():
+ file_entities = parse_file_entities(row.FilePath)
+
+ if split_session:
+ acq_id = (file_entities.get("subject"),
+ file_entities.get("session"))
+ acq_groups[acq_id].append((row.KeyGroup, row.ParamGroup))
+ else:
+ acq_id = (file_entities.get("subject"), None)
+ acq_groups[acq_id].append((row.KeyGroup, row.ParamGroup,
+ file_entities.get("session")))
+
+ # Map the contents to a list of subjects/sessions
+ contents_to_subjects = defaultdict(list)
+ for key, value in acq_groups.items():
+ contents_to_subjects[tuple(sorted(value))].append(key)
+
+ # Sort them based on how many have that group
+ content_ids = []
+ content_id_counts = []
+ for key, value in contents_to_subjects.items():
+ content_ids.append(key)
+ content_id_counts.append(len(value))
+
+ descending_order = np.argsort(content_id_counts)[::-1]
+
+ # Create a dataframe with the subject, session, groupnum
+ grouped_sub_sess = []
+ acq_group_info = []
+ for groupnum, content_id_row in enumerate(descending_order, start=1):
+ content_id = content_ids[content_id_row]
+ acq_group_info.append(
+ (groupnum, content_id_counts[content_id_row]) + content_id)
+ for subject, session in contents_to_subjects[content_id]:
+ grouped_sub_sess.append(
+ {"subject": subject,
+ "session": session,
+ "AcqGroup": groupnum})
+
+ # Write the mapping of subject/session to
+ acq_group_df = pd.DataFrame(grouped_sub_sess)
+ acq_group_df.to_csv(output_prefix + "_AcqGrouping.csv", index=False)
+
+ # Write the summary of acq groups to a text file
+ with open(output_prefix + "_AcqGroupInfo.txt", "w") as infotxt:
+ infotxt.write(
+ "\n".join([" ".join(map(str, line)) for line in acq_group_info]))
diff --git a/notebooks/CCNP_KeyGroups.zip b/notebooks/CCNP_KeyGroups.zip