From f887c94a70a7dd2bdbe9e0303de300f9362f2da8 Mon Sep 17 00:00:00 2001
From: Ralf Magnus Palmiste <ralf@sysbio.ioc.ee>
Date: Thu, 11 Jul 2024 10:08:48 +0300
Subject: [PATCH] Upload files to "/"

Added the Data, fitter, and statistiline_filter scripts that are necessary working with the model.
---
 Data.py                |  25 +++++
 fitter.py              | 204 +++++++++++++++++++++++++++++++++++++++++
 statistiline_filter.py | 192 ++++++++++++++++++++++++++++++++++++++
 3 files changed, 421 insertions(+)
 create mode 100644 Data.py
 create mode 100644 fitter.py
 create mode 100644 statistiline_filter.py

diff --git a/Data.py b/Data.py
new file mode 100644
index 0000000..49ec8b9
--- /dev/null
+++ b/Data.py
@@ -0,0 +1,25 @@
+import numpy as np
+import h5py
+from scipy.interpolate import interp1d
+
+
+class Data:
+    def __init__(self, filename, group_key):
+        self.filename = filename
+        self.datasets = []
+
+        with h5py.File(self.filename, "r") as h5:
+            
+            if 1:
+                print(f"Atribuudid grupis '{group_key}'")
+                grp = h5[group_key]
+                
+                self.current = grp["current_raw"][()] / grp.attrs["c_mem"]
+                self.current_t = grp["current_t"][()]
+                self.ECal = grp.attrs["vrev"]
+                self.gGaL = grp.attrs["gmax"]
+                self.eid = group_key
+  
+    def get_current_slice(self, times: np.ndarray):
+        interpolator = interp1d(self.current_t, self.current, fill_value="extrapolate")
+        return interpolator(times)
diff --git a/fitter.py b/fitter.py
new file mode 100644
index 0000000..7104892
--- /dev/null
+++ b/fitter.py
@@ -0,0 +1,204 @@
+import time
+import numpy as np
+import pylab as plt
+import pandas as pd
+import re
+
+
+from scipy.optimize import least_squares
+from model import Model
+from Data import Data
+
+
+class Fitter:
+    def __init__(self, model: Model, data: Data, current_fit_range: tuple = (107, 341)) -> None:
+        """
+        current_fit_range : tuple (t0, t1), t0 nad t1 are start and stop times in between current is fitted
+        """
+        self.model = model
+        self.data = data
+        self.current_fit_range = current_fit_range
+        self.fit_results = {}
+
+        self.tspan = [0, 1000]
+        self.dt = 1  # 1.0
+        self.time_points = np.arange(*self.tspan, self.dt)
+
+        self.iteration = 0  # least squares iteration counter
+
+        t0, t1 = self.current_fit_range = current_fit_range
+        self.current_time_indecies = (t0 <= self.time_points) & (self.time_points <= t1)
+        self.measured_current = self.data.get_current_slice(
+            self.time_points[self.current_time_indecies] / 1000
+        )  # calculating in ms but data recorded in sec
+
+    def convolve_current(self, current: np.ndarray, tau=1.5):
+        if np.abs(tau) < 1e-8:
+            return current
+
+        k = np.zeros(current.size)
+        k[k.size // 2 :] = np.exp(-np.arange(k.size // 2) / np.abs(tau))
+        k /= k.sum()
+
+        if tau > 0:
+            return np.convolve(current, k, mode="same")
+        else:
+            return np.convolve(current, k[::-1], mode="same")
+
+    def cost_func(self, parameters: np.ndarray):
+        
+        model = self.model()
+
+        gGaL, ECal, K_pc_half, tau_xfer, tau_RC, offset = parameters
+
+        model.ECaL = ECal
+        model.gCaL = gGaL
+        model.K_pc_half = K_pc_half
+        model.tau_xfer = tau_xfer
+
+        model.solve(times=self.time_points)
+
+        _calc_curr = model.calculated_current()
+        calculated_current = self.convolve_current(_calc_curr, tau=tau_RC)[self.current_time_indecies] + offset
+
+        res = self.measured_current - calculated_current
+        err = np.mean(res**2)  # mean squared error
+        self.iteration += 1
+        print(self.iteration, parameters.tolist(), "err", err)
+        
+        # measured_fluo = self.data.fluo
+        # fluo_interplolator = interp1d(self.time_domain, model.calculated_fluo)
+        # calculated_fluo = fluo_interplolator(self.data.fluo_time)
+
+        if self.iteration < -0:
+            t = self.time_points[self.current_time_indecies]
+            plt.plot(t, _calc_curr[self.current_time_indecies], label="calculated current")
+            plt.plot(t, self.measured_current, label="measured current")
+            plt.plot(t, calculated_current, label="conv calculated current")
+            plt.plot(t, self.measured_current - calculated_current, label="error")
+            plt.xlabel("time, ms")
+            plt.ylabel("current, pA/pF")
+            plt.legend(frameon=False)
+            plt.show()
+            # exit()
+
+        return res  # , measured_fluo - calculated_fluo)
+
+        def optimize(self, init_parameters=None):
+            t0 = time.time()
+            self.iteration = 0
+            if init_parameters is None:
+                m = self.model()
+                K_pc_half = m.K_pc_half
+                tau_xfer = m.tau_xfer
+                tau_RC = 1.5
+                offset = 0
+    
+                d = self.data
+                init_parameters = np.array([d.gGaL, d.ECal, K_pc_half, tau_xfer, tau_RC, offset])
+                print(init_parameters.tolist())
+    
+            bounds = (
+                (0.01, 10, 0.1, 0.1, 0.1, -5, -10),
+                (10, 100, 100, 1, 100, 10, 10),
+            )
+    
+            res = least_squares(self.cost_func, init_parameters, bounds=bounds, xtol=1e-10)
+            print()
+            print("   Parameters: [gGaL, ECal, K_pc_half, tau_xfer, tau_RC, offset]")
+            print("      Initial:", init_parameters.tolist())
+            print("    Optimized:", res.x.tolist())
+            print(" Optim status:", res.status)
+            print("Optim message:", res.message)
+    
+            gGaL, ECal, K_pc_half, tau_xfer, tau_RC, offset = res.x
+    
+            self.fit_results.update({
+            'gGaL': gGaL,
+            'ECal': ECal,
+            'K_pc_half': K_pc_half,
+            'tau_xfer': tau_xfer,
+            'tau_RC': tau_RC,
+            'offset': offset,
+            'mean_squared_error': err })
+            
+    
+            model = self.model()
+    
+            model.ECaL = ECal
+            model.gCaL = gGaL
+            model.K_pc_half = K_pc_half
+            model.tau_xfer = tau_xfer
+    
+            model.solve(times=self.time_points)
+    
+            _calc_curr = model.calculated_current()
+            calculated_current = self.convolve_current(_calc_curr, tau=tau_RC) + offset
+            print("Elapsed time:", time.time() - t0)
+    
+            fig = plt.figure(figsize=(24, 12))
+            ax1 = fig.add_subplot(121)
+            ax2 = fig.add_subplot(122)
+            ax1.plot(1000 * self.data.current_t, self.data.current, label="Measured")
+            ax1.plot(self.time_points, calculated_current, label="Calculated")
+            ax1.set_xlabel("time, ms")
+            ax1.set_ylabel("current, pA/pF")
+            ax1.legend(frameon=False)
+    
+            tp = self.time_points[self.current_time_indecies]
+            ax2.plot(tp, self.measured_current, label="Measured")
+            ax2.plot(tp, calculated_current[self.current_time_indecies], label="Calculated")
+            ax2.set_xlabel("time, ms")
+            ax2.set_ylabel("current, pA/pF")
+            ax2.legend(frameon=False)
+            return res, fig
+
+    def covcor_from_lsq(res):
+        
+        _, s, VT = svd(res.jac, full_matrices=False)
+        threshold = np.finfo(float).eps * max(res.jac.shape) * s[0]
+        s = s[s > threshold]
+        VT = VT[: s.size]
+        cov = np.dot(VT.T / s**2, VT)
+    
+        std = np.sqrt(np.diag(cov))
+        cor = cov / np.outer(std, std)
+        cor[cov == 0] = 0
+    
+        return cov, cor
+
+    def plot_correlation_matrix(cor):
+    plt.imshow(cor, cmap='viridis', interpolation='nearest')
+    plt.colorbar(label='Correlation')
+    plt.title('Correlation Matrix')
+    plt.xlabel('Variables')
+    plt.ylabel('Variables')
+    plt.show()
+
+if __name__ == "__main__":
+
+    filename = "ltcc_current.h5"
+    eid = "0033635a51b096dc449eb9964e70443a67fc16b9587ae3ff6564eea1fa0e3437_2018.06.18 14:48:40"
+
+    data = Data(filename, eid)
+    
+    fit = Fitter(Model, data)
+    
+    fit_hist = pd.DataFrame.from_dict(fit.fit_results, orient='index').T
+    fit_hist.index.name = 'Iterations'
+    
+    res_filename = f"fit_results_{eid}.csv"
+    res_filename = res_filename.replace(" ", "_").replace(":", "-")
+    fit_hist.to_csv(res_filename, index=True)
+    
+    
+    eid_cleaned = re.sub(r'[^\w.-]', '', eid)  # Eemalda kõik eritähed ja jääb alles alphanumbrilised tähed, sidekriipsud ja punktid
+    fig.savefig(f"plot_{eid_cleaned}.png")
+    fig.savefig(f"plot_{eid_cleaned}.pdf")
+    
+    # plot_filename = "fit_plot"
+    # fig.savefig(f"{plot_filename}.png")
+    # fig.savefig(f"{plot_filename}.pdf")
+    fig.savefig("naidis_fit.pdf")
+    
+    plt.show()
diff --git a/statistiline_filter.py b/statistiline_filter.py
new file mode 100644
index 0000000..7b7e9ee
--- /dev/null
+++ b/statistiline_filter.py
@@ -0,0 +1,192 @@
+# -*- coding: utf-8 -*-
+
+#%% import
+
+import h5py
+import os
+import pandas as pd
+import statsmodels.api as sm
+
+from statsmodels.formula.api import ols
+from scipy.stats import f
+from scipy.stats import shapiro
+
+h5_file = 'ltcc_current.h5'
+
+sobiv_eid_list = []
+ttx_eid_list = []
+teised_eid_list = []
+
+with h5py.File(h5_file, 'r') as h5_file:
+
+    for eid in h5_file.keys():
+        if 'tag' in h5_file[eid].attrs:
+            tag_val = h5_file[eid].attrs['tag']
+
+            # pean tõdema, et siin aitas chatgpt oma soovitusega :/ vaga halb debug oli
+             if isinstance(tag_val, bytes):
+            tag_val = tag_val.decode('utf-8')
+
+        puhas_eid = eid.replace(" ", "_").replace(":", "-")
+        fit_result_eid = "fit_results_" + puhas_eid
+
+        if tag_val == 'iso':
+            sobiv_eid_list.append(fit_result_eid)
+        elif tag_val == 'ttx':
+            ttx_eid_list.append(fit_result_eid)
+        else:
+            teised_eid_list.append(fit_result_eid)
+                #for attr_key, attr_value in h5_file[eid].attrs.items():
+                #print(f"Atribute: {attr_key}, Value: {attr_value}")
+
+#%% 
+
+file = 'ltcc_current.h5'
+
+with h5py.File(file, 'r') as h5_file:
+    for eid in h5_file.keys():
+        puhastatud_eid = eid.replace(" ", "_").replace(":", "-")
+        
+        atribuudid = h5_file[eid].attrs
+        sex = atribuudid.get('sex')
+        spid = atribuudid.get('spid')
+        
+        csv_file_name = f"fit_results_{puhastatud_eid}.csv"
+        
+        if os.path.exists(csv_file_name):
+            df = pd.read_csv(csv_file_name)
+            
+            df['sex'] = sex
+            df['spid'] = spid.replace("Mouse AGAT","")
+            df['eid'] = eid
+            
+            df.to_csv(csv_file_name, index=False)
+
+for fail in os.listdir():
+    if fail.endswith('.csv'):
+
+        eksperiment_id = fail.replace(".csv", "")
+
+        if eksperiment_id in sobiv_eid_list:
+            
+            df = pd.read_csv(fail)
+            df['tag'] = 'iso'
+            df.to_csv(fail, index=False)
+
+        elif eksperiment_id in ttx_eid_list:
+            
+            df = pd.read_csv(fail)
+            df['tag'] = 'ttx'
+            df.to_csv(fail, index=False)
+
+        else:
+            
+            df = pd.read_csv(fail)
+            df['tag'] = 'teised'
+            df.to_csv(fail, index=False)
+
+#%%
+
+comb_df = pd.DataFrame()
+
+for filename in os.listdir():
+
+    if filename.endswith('.csv'):
+        df = pd.read_csv(filename)
+
+        if 'tag' in df.columns and df['tag'].isin(['iso', 'ttx']).all():
+        
+            comb_df = pd.concat([comb_df, df], ignore_index=True)
+
+print(comb_df)
+
+#%% lugemine, et teha kindlaks mis tüüpi ANOVA teha kasutades statsmodels packetit, 
+
+sex_counts = comb_df['sex'].value_counts()
+
+spid_counts = comb_df['spid'].value_counts()
+
+tag_counts = comb_df['tag'].value_counts()
+
+
+print(sex_counts,spid_counts,tag_counts)
+
+#%% normaalsuse kontroll
+tau_xfer = comb_df['tau_xfer']
+
+stat, p = shapiro(tau_xfer)
+
+alpha = 0.05
+if p > alpha:
+    print("Andmed on normaalselt jaotunud (ei lükka tagasi nullhüpoteesi)")
+else:
+    print("Andmed ei ole normaalselt jaotunud (lükata tagasi nullhüpotees)")
+
+
+
+#%% ANOVA 2 WAY
+
+comb_df['spid'] = comb_df['spid'].astype('category')
+comb_df['sex'] = comb_df['sex'].astype('category')
+comb_df['tag'] = comb_df['tag'].astype('category')
+
+model = ols('tau_xfer ~ C(sex) + C(spid)+ C(tag)', data=comb_df).fit()
+
+anova_table = sm.stats.anova_lm(model, typ=2)
+print(anova_table)
+
+#%% kriitiline vaartus
+
+df_between_groups = 2  # Vabadusastmed gruppide vahel
+df_within_groups = 67  # Vabadusastmed rühmades (Residual)
+
+alpha = 0.05
+
+critical_f = f.ppf(1 - alpha, df_between_groups, df_within_groups)
+
+print("Kriitiline F-väärtus:", critical_f)
+
+
+#%% kriitiline vaartus
+
+
+df_between_groups = 2  # Vabadusastmed gruppide vahel (sex ja spid)
+df_within_groups = 36  # Vabadusastmed rühmades (Residual)
+
+alpha = 0.05
+
+critical_f = f.ppf(1 - alpha, df_between_groups, df_within_groups)
+
+print("Kriitiline F-väärtus:", critical_f)
+
+"""
+#%%grupeeringud
+
+groups = comb_df.groupby('tag')
+
+iso_group = groups.get_group('iso')
+ttx_group = groups.get_group('ttx')
+
+print("ISO grupp:", iso_group)
+print("nTTX grupp:", ttx_group)
+
+#%% ANOVA jaoks on grupid piisavalt suured, 
+#allikas https://support.minitab.com/en-us/minitab/help-and-how-to/statistical-modeling/anova/how-to/one-way-anova/before-you-start/data-considerations/
+
+f_statistic, p_value = f_oneway(iso_group['tau_xfer'], ttx_group['tau_xfer'])
+
+#%% f_crit control
+
+dfn = 2-1 #2 gruppi, miinus 1, hetkel ei lahendanud seda vaga automatiseeritult
+dfd = len(iso_group) + len(ttx_group) - 2 #ka ei lahendanud automatiseeritult
+
+alpha = 0.05
+
+f_crit = f.ppf(1 - alpha, dfn, dfd)
+
+#%% Väljastame tulemused
+
+print("F-statistika:", f_statistic)
+print("P-väärtus:", p_value)
+print("F-kriitiline:", f_crit)
+"""