Adding python files

2025-12-05 09:50:50 +02:00
parent c6143cd11a
commit 6bd2b100ca
24 changed files with 3076 additions and 0 deletions
--- a/src/database/init.py
+++ b/src/database/init.py
--- a/src/database/db_manager.py
+++ b/src/database/db_manager.py
@@ -0,0 +1,619 @@
+"""
+Database manager for the microscopy object detection application.
+Handles all database operations including CRUD operations, queries, and exports.
+"""
+
+import sqlite3
+import json
+from datetime import datetime
+from typing import List, Dict, Optional, Tuple, Any
+from pathlib import Path
+import csv
+import hashlib
+
+
+class DatabaseManager:
+    """Manages all database operations for the application."""
+
+    def __init__(self, db_path: str = "data/detections.db"):
+        """
+        Initialize database manager.
+
+        Args:
+            db_path: Path to SQLite database file
+        """
+        self.db_path = db_path
+        self._ensure_database_exists()
+
+    def _ensure_database_exists(self) -> None:
+        """Create database and tables if they don't exist."""
+        # Create directory if it doesn't exist
+        Path(self.db_path).parent.mkdir(parents=True, exist_ok=True)
+
+        # Read schema file and execute
+        schema_path = Path(__file__).parent / "schema.sql"
+        with open(schema_path, "r") as f:
+            schema_sql = f.read()
+
+        conn = self.get_connection()
+        try:
+            conn.executescript(schema_sql)
+            conn.commit()
+        finally:
+            conn.close()
+
+    def get_connection(self) -> sqlite3.Connection:
+        """Get database connection with proper settings."""
+        conn = sqlite3.connect(self.db_path)
+        conn.row_factory = sqlite3.Row  # Enable column access by name
+        conn.execute("PRAGMA foreign_keys = ON")  # Enable foreign keys
+        return conn
+
+    # ==================== Model Operations ====================
+
+    def add_model(
+        self,
+        model_name: str,
+        model_version: str,
+        model_path: str,
+        base_model: str = "yolov8s.pt",
+        training_params: Optional[Dict] = None,
+        metrics: Optional[Dict] = None,
+    ) -> int:
+        """
+        Add a new model to the database.
+
+        Args:
+            model_name: Name of the model
+            model_version: Version string
+            model_path: Path to model weights file
+            base_model: Base model used for training
+            training_params: Dictionary of training parameters
+            metrics: Dictionary of validation metrics
+
+        Returns:
+            ID of the inserted model
+        """
+        conn = self.get_connection()
+        try:
+            cursor = conn.cursor()
+            cursor.execute(
+                """
+                INSERT INTO models (model_name, model_version, model_path, base_model, training_params, metrics)
+                VALUES (?, ?, ?, ?, ?, ?)
+                """,
+                (
+                    model_name,
+                    model_version,
+                    model_path,
+                    base_model,
+                    json.dumps(training_params) if training_params else None,
+                    json.dumps(metrics) if metrics else None,
+                ),
+            )
+            conn.commit()
+            return cursor.lastrowid
+        finally:
+            conn.close()
+
+    def get_models(self, filters: Optional[Dict] = None) -> List[Dict]:
+        """
+        Retrieve models from database.
+
+        Args:
+            filters: Optional filters (e.g., {'model_name': 'my_model'})
+
+        Returns:
+            List of model dictionaries
+        """
+        conn = self.get_connection()
+        try:
+            query = "SELECT * FROM models"
+            params = []
+
+            if filters:
+                conditions = []
+                for key, value in filters.items():
+                    conditions.append(f"{key} = ?")
+                    params.append(value)
+                query += " WHERE " + " AND ".join(conditions)
+
+            query += " ORDER BY created_at DESC"
+
+            cursor = conn.cursor()
+            cursor.execute(query, params)
+
+            models = []
+            for row in cursor.fetchall():
+                model = dict(row)
+                # Parse JSON fields
+                if model["training_params"]:
+                    model["training_params"] = json.loads(model["training_params"])
+                if model["metrics"]:
+                    model["metrics"] = json.loads(model["metrics"])
+                models.append(model)
+
+            return models
+        finally:
+            conn.close()
+
+    def get_model_by_id(self, model_id: int) -> Optional[Dict]:
+        """Get model by ID."""
+        models = self.get_models({"id": model_id})
+        return models[0] if models else None
+
+    def update_model(self, model_id: int, updates: Dict) -> bool:
+        """Update model fields."""
+        conn = self.get_connection()
+        try:
+            # Build update query
+            set_clauses = []
+            params = []
+            for key, value in updates.items():
+                if key in ["training_params", "metrics"] and isinstance(value, dict):
+                    value = json.dumps(value)
+                set_clauses.append(f"{key} = ?")
+                params.append(value)
+
+            params.append(model_id)
+
+            query = f"UPDATE models SET {', '.join(set_clauses)} WHERE id = ?"
+            cursor = conn.cursor()
+            cursor.execute(query, params)
+            conn.commit()
+            return cursor.rowcount > 0
+        finally:
+            conn.close()
+
+    # ==================== Image Operations ====================
+
+    def add_image(
+        self,
+        relative_path: str,
+        filename: str,
+        width: int,
+        height: int,
+        captured_at: Optional[datetime] = None,
+        checksum: Optional[str] = None,
+    ) -> int:
+        """
+        Add a new image to the database.
+
+        Args:
+            relative_path: Path relative to image repository
+            filename: Image filename
+            width: Image width in pixels
+            height: Image height in pixels
+            captured_at: When image was captured (if known)
+            checksum: MD5 checksum of image file
+
+        Returns:
+            ID of the inserted image
+        """
+        conn = self.get_connection()
+        try:
+            cursor = conn.cursor()
+            cursor.execute(
+                """
+                INSERT INTO images (relative_path, filename, width, height, captured_at, checksum)
+                VALUES (?, ?, ?, ?, ?, ?)
+                """,
+                (relative_path, filename, width, height, captured_at, checksum),
+            )
+            conn.commit()
+            return cursor.lastrowid
+        except sqlite3.IntegrityError:
+            # Image already exists, return its ID
+            cursor.execute(
+                "SELECT id FROM images WHERE relative_path = ?", (relative_path,)
+            )
+            row = cursor.fetchone()
+            return row["id"] if row else None
+        finally:
+            conn.close()
+
+    def get_image_by_path(self, relative_path: str) -> Optional[Dict]:
+        """Get image by relative path."""
+        conn = self.get_connection()
+        try:
+            cursor = conn.cursor()
+            cursor.execute(
+                "SELECT * FROM images WHERE relative_path = ?", (relative_path,)
+            )
+            row = cursor.fetchone()
+            return dict(row) if row else None
+        finally:
+            conn.close()
+
+    def get_or_create_image(
+        self, relative_path: str, filename: str, width: int, height: int
+    ) -> int:
+        """Get existing image or create new one."""
+        existing = self.get_image_by_path(relative_path)
+        if existing:
+            return existing["id"]
+        return self.add_image(relative_path, filename, width, height)
+
+    # ==================== Detection Operations ====================
+
+    def add_detection(
+        self,
+        image_id: int,
+        model_id: int,
+        class_name: str,
+        bbox: Tuple[float, float, float, float],  # (x_min, y_min, x_max, y_max)
+        confidence: float,
+        metadata: Optional[Dict] = None,
+    ) -> int:
+        """
+        Add a new detection to the database.
+
+        Args:
+            image_id: ID of the image
+            model_id: ID of the model used
+            class_name: Detected object class
+            bbox: Bounding box coordinates (normalized 0-1)
+            confidence: Detection confidence score
+            metadata: Additional metadata
+
+        Returns:
+            ID of the inserted detection
+        """
+        conn = self.get_connection()
+        try:
+            cursor = conn.cursor()
+            x_min, y_min, x_max, y_max = bbox
+            cursor.execute(
+                """
+                INSERT INTO detections (image_id, model_id, class_name, x_min, y_min, x_max, y_max, confidence, metadata)
+                VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?)
+                """,
+                (
+                    image_id,
+                    model_id,
+                    class_name,
+                    x_min,
+                    y_min,
+                    x_max,
+                    y_max,
+                    confidence,
+                    json.dumps(metadata) if metadata else None,
+                ),
+            )
+            conn.commit()
+            return cursor.lastrowid
+        finally:
+            conn.close()
+
+    def add_detections_batch(self, detections: List[Dict]) -> int:
+        """
+        Add multiple detections in a single transaction.
+
+        Args:
+            detections: List of detection dictionaries
+
+        Returns:
+            Number of detections inserted
+        """
+        conn = self.get_connection()
+        try:
+            cursor = conn.cursor()
+            for det in detections:
+                bbox = det["bbox"]
+                cursor.execute(
+                    """
+                    INSERT INTO detections (image_id, model_id, class_name, x_min, y_min, x_max, y_max, confidence, metadata)
+                    VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?)
+                    """,
+                    (
+                        det["image_id"],
+                        det["model_id"],
+                        det["class_name"],
+                        bbox[0],
+                        bbox[1],
+                        bbox[2],
+                        bbox[3],
+                        det["confidence"],
+                        (
+                            json.dumps(det.get("metadata"))
+                            if det.get("metadata")
+                            else None
+                        ),
+                    ),
+                )
+            conn.commit()
+            return len(detections)
+        finally:
+            conn.close()
+
+    def get_detections(
+        self,
+        filters: Optional[Dict] = None,
+        limit: Optional[int] = None,
+        offset: int = 0,
+    ) -> List[Dict]:
+        """
+        Retrieve detections from database.
+
+        Args:
+            filters: Optional filters for querying
+            limit: Maximum number of results
+            offset: Number of results to skip
+
+        Returns:
+            List of detection dictionaries with joined data
+        """
+        conn = self.get_connection()
+        try:
+            query = """
+                SELECT 
+                    d.*,
+                    i.relative_path as image_path,
+                    i.filename as image_filename,
+                    i.width as image_width,
+                    i.height as image_height,
+                    m.model_name,
+                    m.model_version
+                FROM detections d
+                JOIN images i ON d.image_id = i.id
+                JOIN models m ON d.model_id = m.id
+            """
+            params = []
+
+            if filters:
+                conditions = []
+                for key, value in filters.items():
+                    if (
+                        key.startswith("d.")
+                        or key.startswith("i.")
+                        or key.startswith("m.")
+                    ):
+                        conditions.append(f"{key} = ?")
+                    else:
+                        conditions.append(f"d.{key} = ?")
+                    params.append(value)
+                query += " WHERE " + " AND ".join(conditions)
+
+            query += " ORDER BY d.detected_at DESC"
+
+            if limit:
+                query += f" LIMIT {limit} OFFSET {offset}"
+
+            cursor = conn.cursor()
+            cursor.execute(query, params)
+
+            detections = []
+            for row in cursor.fetchall():
+                det = dict(row)
+                # Parse JSON metadata
+                if det.get("metadata"):
+                    det["metadata"] = json.loads(det["metadata"])
+                detections.append(det)
+
+            return detections
+        finally:
+            conn.close()
+
+    def get_detections_for_image(
+        self, image_id: int, model_id: Optional[int] = None
+    ) -> List[Dict]:
+        """Get all detections for a specific image."""
+        filters = {"image_id": image_id}
+        if model_id:
+            filters["model_id"] = model_id
+        return self.get_detections(filters)
+
+    def delete_detections_for_model(self, model_id: int) -> int:
+        """Delete all detections for a specific model."""
+        conn = self.get_connection()
+        try:
+            cursor = conn.cursor()
+            cursor.execute("DELETE FROM detections WHERE model_id = ?", (model_id,))
+            conn.commit()
+            return cursor.rowcount
+        finally:
+            conn.close()
+
+    # ==================== Statistics Operations ====================
+
+    def get_detection_statistics(
+        self, start_date: Optional[datetime] = None, end_date: Optional[datetime] = None
+    ) -> Dict:
+        """
+        Get detection statistics for a date range.
+
+        Returns:
+            Dictionary with statistics (count by class, confidence distribution, etc.)
+        """
+        conn = self.get_connection()
+        try:
+            cursor = conn.cursor()
+
+            # Build date filter
+            date_filter = ""
+            params = []
+            if start_date:
+                date_filter += " AND detected_at >= ?"
+                params.append(start_date)
+            if end_date:
+                date_filter += " AND detected_at <= ?"
+                params.append(end_date)
+
+            # Total detections
+            cursor.execute(
+                f"SELECT COUNT(*) as count FROM detections WHERE 1=1{date_filter}",
+                params,
+            )
+            total_count = cursor.fetchone()["count"]
+
+            # Count by class
+            cursor.execute(
+                f"""
+                SELECT class_name, COUNT(*) as count 
+                FROM detections 
+                WHERE 1=1{date_filter}
+                GROUP BY class_name 
+                ORDER BY count DESC
+            """,
+                params,
+            )
+            class_counts = {
+                row["class_name"]: row["count"] for row in cursor.fetchall()
+            }
+
+            # Average confidence
+            cursor.execute(
+                f"SELECT AVG(confidence) as avg_conf FROM detections WHERE 1=1{date_filter}",
+                params,
+            )
+            avg_confidence = cursor.fetchone()["avg_conf"] or 0
+
+            # Confidence distribution
+            cursor.execute(
+                f"""
+                SELECT 
+                    CASE 
+                        WHEN confidence < 0.3 THEN 'low'
+                        WHEN confidence < 0.7 THEN 'medium'
+                        ELSE 'high'
+                    END as conf_level,
+                    COUNT(*) as count
+                FROM detections
+                WHERE 1=1{date_filter}
+                GROUP BY conf_level
+            """,
+                params,
+            )
+            conf_dist = {row["conf_level"]: row["count"] for row in cursor.fetchall()}
+
+            return {
+                "total_detections": total_count,
+                "class_counts": class_counts,
+                "average_confidence": avg_confidence,
+                "confidence_distribution": conf_dist,
+            }
+        finally:
+            conn.close()
+
+    def get_class_distribution(self, model_id: Optional[int] = None) -> Dict[str, int]:
+        """Get count of detections per class."""
+        conn = self.get_connection()
+        try:
+            cursor = conn.cursor()
+            query = "SELECT class_name, COUNT(*) as count FROM detections"
+            params = []
+
+            if model_id:
+                query += " WHERE model_id = ?"
+                params.append(model_id)
+
+            query += " GROUP BY class_name ORDER BY count DESC"
+
+            cursor.execute(query, params)
+            return {row["class_name"]: row["count"] for row in cursor.fetchall()}
+        finally:
+            conn.close()
+
+    # ==================== Export Operations ====================
+
+    def export_detections_to_csv(
+        self, output_path: str, filters: Optional[Dict] = None
+    ) -> bool:
+        """Export detections to CSV file."""
+        try:
+            detections = self.get_detections(filters)
+
+            with open(output_path, "w", newline="") as csvfile:
+                if not detections:
+                    return True
+
+                fieldnames = [
+                    "id",
+                    "image_path",
+                    "model_name",
+                    "model_version",
+                    "class_name",
+                    "x_min",
+                    "y_min",
+                    "x_max",
+                    "y_max",
+                    "confidence",
+                    "detected_at",
+                ]
+                writer = csv.DictWriter(csvfile, fieldnames=fieldnames)
+                writer.writeheader()
+
+                for det in detections:
+                    row = {k: det[k] for k in fieldnames if k in det}
+                    writer.writerow(row)
+
+            return True
+        except Exception as e:
+            print(f"Error exporting to CSV: {e}")
+            return False
+
+    def export_detections_to_json(
+        self, output_path: str, filters: Optional[Dict] = None
+    ) -> bool:
+        """Export detections to JSON file."""
+        try:
+            detections = self.get_detections(filters)
+
+            # Convert datetime objects to strings
+            for det in detections:
+                if isinstance(det.get("detected_at"), datetime):
+                    det["detected_at"] = det["detected_at"].isoformat()
+
+            with open(output_path, "w") as jsonfile:
+                json.dump(detections, jsonfile, indent=2)
+
+            return True
+        except Exception as e:
+            print(f"Error exporting to JSON: {e}")
+            return False
+
+    # ==================== Annotation Operations ====================
+
+    def add_annotation(
+        self,
+        image_id: int,
+        class_name: str,
+        bbox: Tuple[float, float, float, float],
+        annotator: str,
+        verified: bool = False,
+    ) -> int:
+        """Add manual annotation."""
+        conn = self.get_connection()
+        try:
+            cursor = conn.cursor()
+            x_min, y_min, x_max, y_max = bbox
+            cursor.execute(
+                """
+                INSERT INTO annotations (image_id, class_name, x_min, y_min, x_max, y_max, annotator, verified)
+                VALUES (?, ?, ?, ?, ?, ?, ?, ?)
+                """,
+                (image_id, class_name, x_min, y_min, x_max, y_max, annotator, verified),
+            )
+            conn.commit()
+            return cursor.lastrowid
+        finally:
+            conn.close()
+
+    def get_annotations_for_image(self, image_id: int) -> List[Dict]:
+        """Get all annotations for an image."""
+        conn = self.get_connection()
+        try:
+            cursor = conn.cursor()
+            cursor.execute("SELECT * FROM annotations WHERE image_id = ?", (image_id,))
+            return [dict(row) for row in cursor.fetchall()]
+        finally:
+            conn.close()
+
+    @staticmethod
+    def calculate_checksum(file_path: str) -> str:
+        """Calculate MD5 checksum of a file."""
+        md5_hash = hashlib.md5()
+        with open(file_path, "rb") as f:
+            for chunk in iter(lambda: f.read(4096), b""):
+                md5_hash.update(chunk)
+        return md5_hash.hexdigest()
--- a/src/database/models.py
+++ b/src/database/models.py
@@ -0,0 +1,63 @@
+"""
+Data models for the microscopy object detection application.
+These dataclasses represent the database entities.
+"""
+
+from dataclasses import dataclass
+from datetime import datetime
+from typing import Optional, Dict, Tuple
+
+
+@dataclass
+class Model:
+    """Represents a trained model."""
+
+    id: Optional[int]
+    model_name: str
+    model_version: str
+    model_path: str
+    base_model: str
+    created_at: datetime
+    training_params: Optional[Dict]
+    metrics: Optional[Dict]
+
+
+@dataclass
+class Image:
+    """Represents an image in the database."""
+
+    id: Optional[int]
+    relative_path: str
+    filename: str
+    width: int
+    height: int
+    captured_at: Optional[datetime]
+    added_at: datetime
+    checksum: Optional[str]
+
+
+@dataclass
+class Detection:
+    """Represents a detection result."""
+
+    id: Optional[int]
+    image_id: int
+    model_id: int
+    class_name: str
+    bbox: Tuple[float, float, float, float]  # (x_min, y_min, x_max, y_max)
+    confidence: float
+    detected_at: datetime
+    metadata: Optional[Dict]
+
+
+@dataclass
+class Annotation:
+    """Represents a manual annotation."""
+
+    id: Optional[int]
+    image_id: int
+    class_name: str
+    bbox: Tuple[float, float, float, float]  # (x_min, y_min, x_max, y_max)
+    annotator: str
+    created_at: datetime
+    verified: bool
--- a/src/database/schema.sql
+++ b/src/database/schema.sql
@@ -0,0 +1,70 @@
+-- Microscopy Object Detection Application - Database Schema
+-- SQLite Database Schema for storing models, images, detections, and annotations
+
+-- Models table: stores trained model information
+CREATE TABLE IF NOT EXISTS models (
+    id INTEGER PRIMARY KEY AUTOINCREMENT,
+    model_name TEXT NOT NULL,
+    model_version TEXT NOT NULL,
+    model_path TEXT NOT NULL,
+    base_model TEXT NOT NULL DEFAULT 'yolov8s.pt',
+    created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
+    training_params TEXT,  -- JSON string of training parameters
+    metrics TEXT,          -- JSON string of validation metrics
+    UNIQUE(model_name, model_version)
+);
+
+-- Images table: stores image metadata
+CREATE TABLE IF NOT EXISTS images (
+    id INTEGER PRIMARY KEY AUTOINCREMENT,
+    relative_path TEXT NOT NULL UNIQUE,
+    filename TEXT NOT NULL,
+    width INTEGER,
+    height INTEGER,
+    captured_at TIMESTAMP,
+    added_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
+    checksum TEXT
+);
+
+-- Detections table: stores detection results
+CREATE TABLE IF NOT EXISTS detections (
+    id INTEGER PRIMARY KEY AUTOINCREMENT,
+    image_id INTEGER NOT NULL,
+    model_id INTEGER NOT NULL,
+    class_name TEXT NOT NULL,
+    x_min REAL NOT NULL CHECK(x_min >= 0 AND x_min <= 1),
+    y_min REAL NOT NULL CHECK(y_min >= 0 AND y_min <= 1),
+    x_max REAL NOT NULL CHECK(x_max >= 0 AND x_max <= 1),
+    y_max REAL NOT NULL CHECK(y_max >= 0 AND y_max <= 1),
+    confidence REAL NOT NULL CHECK(confidence >= 0 AND confidence <= 1),
+    detected_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
+    metadata TEXT,  -- JSON string for additional metadata
+    FOREIGN KEY (image_id) REFERENCES images (id) ON DELETE CASCADE,
+    FOREIGN KEY (model_id) REFERENCES models (id) ON DELETE CASCADE
+);
+
+-- Annotations table: stores manual annotations (future feature)
+CREATE TABLE IF NOT EXISTS annotations (
+    id INTEGER PRIMARY KEY AUTOINCREMENT,
+    image_id INTEGER NOT NULL,
+    class_name TEXT NOT NULL,
+    x_min REAL NOT NULL CHECK(x_min >= 0 AND x_min <= 1),
+    y_min REAL NOT NULL CHECK(y_min >= 0 AND y_min <= 1),
+    x_max REAL NOT NULL CHECK(x_max >= 0 AND x_max <= 1),
+    y_max REAL NOT NULL CHECK(y_max >= 0 AND y_max <= 1),
+    annotator TEXT,
+    created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
+    verified BOOLEAN DEFAULT 0,
+    FOREIGN KEY (image_id) REFERENCES images (id) ON DELETE CASCADE
+);
+
+-- Create indexes for performance optimization
+CREATE INDEX IF NOT EXISTS idx_detections_image_id ON detections(image_id);
+CREATE INDEX IF NOT EXISTS idx_detections_model_id ON detections(model_id);
+CREATE INDEX IF NOT EXISTS idx_detections_class_name ON detections(class_name);
+CREATE INDEX IF NOT EXISTS idx_detections_detected_at ON detections(detected_at);
+CREATE INDEX IF NOT EXISTS idx_detections_confidence ON detections(confidence);
+CREATE INDEX IF NOT EXISTS idx_images_relative_path ON images(relative_path);
+CREATE INDEX IF NOT EXISTS idx_images_added_at ON images(added_at);
+CREATE INDEX IF NOT EXISTS idx_annotations_image_id ON annotations(image_id);
+CREATE INDEX IF NOT EXISTS idx_models_created_at ON models(created_at);