formatted statistiline_filter to standard

This commit is contained in:
ralf 2024-07-11 11:15:30 +03:00
parent 645d908000
commit 6e7a727b1a

View File

@ -1,15 +1,6 @@
# -*- coding: utf-8 -*-
#%% import
import h5py import h5py
import os import os
import pandas as pd import pandas as pd
import statsmodels.api as sm
from statsmodels.formula.api import ols
from scipy.stats import f
from scipy.stats import shapiro
h5_file = 'ltcc_current.h5' h5_file = 'ltcc_current.h5'
@ -23,7 +14,6 @@ with h5py.File(h5_file, 'r') as h5_file:
if 'tag' in h5_file[eid].attrs: if 'tag' in h5_file[eid].attrs:
tag_val = h5_file[eid].attrs['tag'] tag_val = h5_file[eid].attrs['tag']
# pean tõdema, et siin aitas chatgpt oma soovitusega :/ vaga halb debug oli
if isinstance(tag_val, bytes): if isinstance(tag_val, bytes):
tag_val = tag_val.decode('utf-8') tag_val = tag_val.decode('utf-8')
@ -36,10 +26,6 @@ with h5py.File(h5_file, 'r') as h5_file:
ttx_eid_list.append(fit_result_eid) ttx_eid_list.append(fit_result_eid)
else: else:
teised_eid_list.append(fit_result_eid) teised_eid_list.append(fit_result_eid)
#for attr_key, attr_value in h5_file[eid].attrs.items():
#print(f"Atribute: {attr_key}, Value: {attr_value}")
#%%
file = 'ltcc_current.h5' file = 'ltcc_current.h5'
@ -59,7 +45,6 @@ with h5py.File(file, 'r') as h5_file:
df['sex'] = sex df['sex'] = sex
df['spid'] = spid.replace("Mouse AGAT", "") df['spid'] = spid.replace("Mouse AGAT", "")
df['eid'] = eid df['eid'] = eid
df.to_csv(csv_file_name, index=False) df.to_csv(csv_file_name, index=False)
for fail in os.listdir(): for fail in os.listdir():
@ -85,8 +70,6 @@ for fail in os.listdir():
df['tag'] = 'teised' df['tag'] = 'teised'
df.to_csv(fail, index=False) df.to_csv(fail, index=False)
#%%
comb_df = pd.DataFrame() comb_df = pd.DataFrame()
for filename in os.listdir(): for filename in os.listdir():
@ -99,94 +82,3 @@ for filename in os.listdir():
comb_df = pd.concat([comb_df, df], ignore_index=True) comb_df = pd.concat([comb_df, df], ignore_index=True)
print(comb_df) print(comb_df)
#%% lugemine, et teha kindlaks mis tüüpi ANOVA teha kasutades statsmodels packetit,
sex_counts = comb_df['sex'].value_counts()
spid_counts = comb_df['spid'].value_counts()
tag_counts = comb_df['tag'].value_counts()
print(sex_counts,spid_counts,tag_counts)
#%% normaalsuse kontroll
tau_xfer = comb_df['tau_xfer']
stat, p = shapiro(tau_xfer)
alpha = 0.05
if p > alpha:
print("Andmed on normaalselt jaotunud (ei lükka tagasi nullhüpoteesi)")
else:
print("Andmed ei ole normaalselt jaotunud (lükata tagasi nullhüpotees)")
#%% ANOVA 2 WAY
comb_df['spid'] = comb_df['spid'].astype('category')
comb_df['sex'] = comb_df['sex'].astype('category')
comb_df['tag'] = comb_df['tag'].astype('category')
model = ols('tau_xfer ~ C(sex) + C(spid)+ C(tag)', data=comb_df).fit()
anova_table = sm.stats.anova_lm(model, typ=2)
print(anova_table)
#%% kriitiline vaartus
df_between_groups = 2 # Vabadusastmed gruppide vahel
df_within_groups = 67 # Vabadusastmed rühmades (Residual)
alpha = 0.05
critical_f = f.ppf(1 - alpha, df_between_groups, df_within_groups)
print("Kriitiline F-väärtus:", critical_f)
#%% kriitiline vaartus
df_between_groups = 2 # Vabadusastmed gruppide vahel (sex ja spid)
df_within_groups = 36 # Vabadusastmed rühmades (Residual)
alpha = 0.05
critical_f = f.ppf(1 - alpha, df_between_groups, df_within_groups)
print("Kriitiline F-väärtus:", critical_f)
"""
#%%grupeeringud
groups = comb_df.groupby('tag')
iso_group = groups.get_group('iso')
ttx_group = groups.get_group('ttx')
print("ISO grupp:", iso_group)
print("nTTX grupp:", ttx_group)
#%% ANOVA jaoks on grupid piisavalt suured,
#allikas https://support.minitab.com/en-us/minitab/help-and-how-to/statistical-modeling/anova/how-to/one-way-anova/before-you-start/data-considerations/
f_statistic, p_value = f_oneway(iso_group['tau_xfer'], ttx_group['tau_xfer'])
#%% f_crit control
dfn = 2-1 #2 gruppi, miinus 1, hetkel ei lahendanud seda vaga automatiseeritult
dfd = len(iso_group) + len(ttx_group) - 2 #ka ei lahendanud automatiseeritult
alpha = 0.05
f_crit = f.ppf(1 - alpha, dfn, dfd)
#%% Väljastame tulemused
print("F-statistika:", f_statistic)
print("P-väärtus:", p_value)
print("F-kriitiline:", f_crit)
"""