diff --git a/h5_data_organizer.py b/h5_data_organizer.py new file mode 100644 index 0000000..425aa0e --- /dev/null +++ b/h5_data_organizer.py @@ -0,0 +1,33 @@ +import h5py +import pandas as pd + +file = "ltcc_current.h5" + +# Dict to hold DFs 'sex', 'iso' & 'spid' +dfs_by_sex_iso_spid = {} + +with h5py.File(file, "r") as h5_file: + for eid in h5_file.keys(): + attributes = h5_file[eid].attrs + sex = attributes.get("sex") + iso = attributes.get("iso") + spid = attributes.get("spid") + + # Creates a unique key for dict based on sex, iso & spid + key = f"{sex}_{iso}_{spid}" + + if key not in dfs_by_sex_iso_spid: + dfs_by_sex_iso_spid[key] = pd.DataFrame() + + row_data = {"experiment_id": eid, "sex": sex, "iso": iso, "spid": spid} + temp_df = pd.DataFrame([row_data]) + + # Append the DF to the appropriate dict entry + dfs_by_sex_iso_spid[key] = pd.concat( + [dfs_by_sex_iso_spid[key], temp_df], ignore_index=True + ) + +for key, df in dfs_by_sex_iso_spid.items(): + print(f"DataFrame for {key}:") + print(df) + print()