diff --git a/statistiline_filter.py b/statistiline_filter.py index 7b7e9ee..a68cbd7 100644 --- a/statistiline_filter.py +++ b/statistiline_filter.py @@ -1,15 +1,6 @@ -# -*- coding: utf-8 -*- - -#%% import - import h5py import os import pandas as pd -import statsmodels.api as sm - -from statsmodels.formula.api import ols -from scipy.stats import f -from scipy.stats import shapiro h5_file = 'ltcc_current.h5' @@ -23,9 +14,8 @@ with h5py.File(h5_file, 'r') as h5_file: if 'tag' in h5_file[eid].attrs: tag_val = h5_file[eid].attrs['tag'] - # pean tõdema, et siin aitas chatgpt oma soovitusega :/ vaga halb debug oli - if isinstance(tag_val, bytes): - tag_val = tag_val.decode('utf-8') + if isinstance(tag_val, bytes): + tag_val = tag_val.decode('utf-8') puhas_eid = eid.replace(" ", "_").replace(":", "-") fit_result_eid = "fit_results_" + puhas_eid @@ -36,30 +26,25 @@ with h5py.File(h5_file, 'r') as h5_file: ttx_eid_list.append(fit_result_eid) else: teised_eid_list.append(fit_result_eid) - #for attr_key, attr_value in h5_file[eid].attrs.items(): - #print(f"Atribute: {attr_key}, Value: {attr_value}") - -#%% file = 'ltcc_current.h5' with h5py.File(file, 'r') as h5_file: for eid in h5_file.keys(): puhastatud_eid = eid.replace(" ", "_").replace(":", "-") - + atribuudid = h5_file[eid].attrs sex = atribuudid.get('sex') spid = atribuudid.get('spid') - + csv_file_name = f"fit_results_{puhastatud_eid}.csv" - + if os.path.exists(csv_file_name): df = pd.read_csv(csv_file_name) - + df['sex'] = sex - df['spid'] = spid.replace("Mouse AGAT","") + df['spid'] = spid.replace("Mouse AGAT", "") df['eid'] = eid - df.to_csv(csv_file_name, index=False) for fail in os.listdir(): @@ -68,25 +53,23 @@ for fail in os.listdir(): eksperiment_id = fail.replace(".csv", "") if eksperiment_id in sobiv_eid_list: - + df = pd.read_csv(fail) df['tag'] = 'iso' df.to_csv(fail, index=False) elif eksperiment_id in ttx_eid_list: - + df = pd.read_csv(fail) df['tag'] = 'ttx' df.to_csv(fail, index=False) else: - + df = pd.read_csv(fail) df['tag'] = 'teised' df.to_csv(fail, index=False) -#%% - comb_df = pd.DataFrame() for filename in os.listdir(): @@ -95,98 +78,7 @@ for filename in os.listdir(): df = pd.read_csv(filename) if 'tag' in df.columns and df['tag'].isin(['iso', 'ttx']).all(): - + comb_df = pd.concat([comb_df, df], ignore_index=True) print(comb_df) - -#%% lugemine, et teha kindlaks mis tüüpi ANOVA teha kasutades statsmodels packetit, - -sex_counts = comb_df['sex'].value_counts() - -spid_counts = comb_df['spid'].value_counts() - -tag_counts = comb_df['tag'].value_counts() - - -print(sex_counts,spid_counts,tag_counts) - -#%% normaalsuse kontroll -tau_xfer = comb_df['tau_xfer'] - -stat, p = shapiro(tau_xfer) - -alpha = 0.05 -if p > alpha: - print("Andmed on normaalselt jaotunud (ei lükka tagasi nullhüpoteesi)") -else: - print("Andmed ei ole normaalselt jaotunud (lükata tagasi nullhüpotees)") - - - -#%% ANOVA 2 WAY - -comb_df['spid'] = comb_df['spid'].astype('category') -comb_df['sex'] = comb_df['sex'].astype('category') -comb_df['tag'] = comb_df['tag'].astype('category') - -model = ols('tau_xfer ~ C(sex) + C(spid)+ C(tag)', data=comb_df).fit() - -anova_table = sm.stats.anova_lm(model, typ=2) -print(anova_table) - -#%% kriitiline vaartus - -df_between_groups = 2 # Vabadusastmed gruppide vahel -df_within_groups = 67 # Vabadusastmed rühmades (Residual) - -alpha = 0.05 - -critical_f = f.ppf(1 - alpha, df_between_groups, df_within_groups) - -print("Kriitiline F-väärtus:", critical_f) - - -#%% kriitiline vaartus - - -df_between_groups = 2 # Vabadusastmed gruppide vahel (sex ja spid) -df_within_groups = 36 # Vabadusastmed rühmades (Residual) - -alpha = 0.05 - -critical_f = f.ppf(1 - alpha, df_between_groups, df_within_groups) - -print("Kriitiline F-väärtus:", critical_f) - -""" -#%%grupeeringud - -groups = comb_df.groupby('tag') - -iso_group = groups.get_group('iso') -ttx_group = groups.get_group('ttx') - -print("ISO grupp:", iso_group) -print("nTTX grupp:", ttx_group) - -#%% ANOVA jaoks on grupid piisavalt suured, -#allikas https://support.minitab.com/en-us/minitab/help-and-how-to/statistical-modeling/anova/how-to/one-way-anova/before-you-start/data-considerations/ - -f_statistic, p_value = f_oneway(iso_group['tau_xfer'], ttx_group['tau_xfer']) - -#%% f_crit control - -dfn = 2-1 #2 gruppi, miinus 1, hetkel ei lahendanud seda vaga automatiseeritult -dfd = len(iso_group) + len(ttx_group) - 2 #ka ei lahendanud automatiseeritult - -alpha = 0.05 - -f_crit = f.ppf(1 - alpha, dfn, dfd) - -#%% Väljastame tulemused - -print("F-statistika:", f_statistic) -print("P-väärtus:", p_value) -print("F-kriitiline:", f_crit) -"""