Update training

2025-12-10 15:46:26 +02:00
parent f719c7ec40
commit 5d196c3a4a
5 changed files with 1534 additions and 18 deletions
--- a/config/app_config.yaml
+++ b/config/app_config.yaml
@@ -18,6 +18,8 @@ training:
  default_imgsz: 640
  default_patience: 50
  default_lr0: 0.01
  last_dataset_yaml: /home/martin/code/object_detection/data/datasets/data.yaml
  last_dataset_dir: /home/martin/code/object_detection/data/datasets
 detection:
  default_confidence: 0.25
  default_iou: 0.45
--- a/src/database/db_manager.py
+++ b/src/database/db_manager.py
@@ -10,6 +10,13 @@ from typing import List, Dict, Optional, Tuple, Any, Union
 from pathlib import Path
 import csv
 import hashlib
 import yaml
 from src.utils.logger import get_logger
 IMAGE_EXTENSIONS = (".jpg", ".jpeg", ".png", ".tif", ".tiff", ".bmp")
 logger = get_logger(__name__)
 class DatabaseManager:
@@ -861,6 +868,187 @@ class DatabaseManager:
        finally:
            conn.close()
    # ==================== Dataset Utilities ====================
    def compose_data_yaml(
        self,
        dataset_root: str,
        output_path: Optional[str] = None,
        splits: Optional[Dict[str, str]] = None,
    ) -> str:
        """
        Compose a YOLO data.yaml file based on dataset folders and database metadata.
        Args:
            dataset_root: Base directory containing the dataset structure.
            output_path: Optional output path; defaults to <dataset_root>/data.yaml.
            splits: Optional mapping overriding train/val/test image directories (relative
                to dataset_root or absolute paths).
        Returns:
            Path to the generated YAML file.
        """
        dataset_root_path = Path(dataset_root).expanduser()
        if not dataset_root_path.exists():
            raise ValueError(f"Dataset root does not exist: {dataset_root_path}")
        dataset_root_path = dataset_root_path.resolve()
        split_map: Dict[str, str] = {key: "" for key in ("train", "val", "test")}
        if splits:
            for key, value in splits.items():
                if key in split_map and value:
                    split_map[key] = value
        inferred = self._infer_split_dirs(dataset_root_path)
        for key in split_map:
            if not split_map[key]:
                split_map[key] = inferred.get(key, "")
        for required in ("train", "val"):
            if not split_map[required]:
                raise ValueError(
                    "Unable to determine %s image directory under %s. Provide it "
                    "explicitly via the 'splits' argument."
                    % (required, dataset_root_path)
                )
        yaml_splits: Dict[str, str] = {}
        for key, value in split_map.items():
            if not value:
                continue
            yaml_splits[key] = self._normalize_split_value(value, dataset_root_path)
        class_names = self._fetch_annotation_class_names()
        if not class_names:
            class_names = [cls["class_name"] for cls in self.get_object_classes()]
        if not class_names:
            raise ValueError("No object classes available to populate data.yaml")
        names_map = {idx: name for idx, name in enumerate(class_names)}
        payload: Dict[str, Any] = {
            "path": dataset_root_path.as_posix(),
            "train": yaml_splits["train"],
            "val": yaml_splits["val"],
            "names": names_map,
            "nc": len(class_names),
        }
        if yaml_splits.get("test"):
            payload["test"] = yaml_splits["test"]
        output_path_obj = (
            Path(output_path).expanduser()
            if output_path
            else dataset_root_path / "data.yaml"
        )
        output_path_obj.parent.mkdir(parents=True, exist_ok=True)
        with open(output_path_obj, "w", encoding="utf-8") as handle:
            yaml.safe_dump(payload, handle, sort_keys=False)
        logger.info(f"Generated data.yaml at {output_path_obj}")
        return output_path_obj.as_posix()
    def _fetch_annotation_class_names(self) -> List[str]:
        """Return class names referenced by annotations (ordered by class ID)."""
        conn = self.get_connection()
        try:
            cursor = conn.cursor()
            cursor.execute(
                """
                SELECT DISTINCT c.id, c.class_name
                FROM annotations a
                JOIN object_classes c ON a.class_id = c.id
                ORDER BY c.id
                """
            )
            rows = cursor.fetchall()
            return [row["class_name"] for row in rows]
        finally:
            conn.close()
    def _infer_split_dirs(self, dataset_root: Path) -> Dict[str, str]:
        """Infer train/val/test image directories relative to dataset_root."""
        patterns = {
            "train": [
                "train/images",
                "training/images",
                "images/train",
                "images/training",
                "train",
                "training",
            ],
            "val": [
                "val/images",
                "validation/images",
                "images/val",
                "images/validation",
                "val",
                "validation",
            ],
            "test": [
                "test/images",
                "testing/images",
                "images/test",
                "images/testing",
                "test",
                "testing",
            ],
        }
        inferred: Dict[str, str] = {key: "" for key in patterns}
        for split_name, options in patterns.items():
            for relative in options:
                candidate = (dataset_root / relative).resolve()
                if (
                    candidate.exists()
                    and candidate.is_dir()
                    and self._directory_has_images(candidate)
                ):
                    try:
                        inferred[split_name] = candidate.relative_to(
                            dataset_root
                        ).as_posix()
                    except ValueError:
                        inferred[split_name] = candidate.as_posix()
                    break
        return inferred
    def _normalize_split_value(self, split_value: str, dataset_root: Path) -> str:
        """Validate and normalize a split directory to a YAML-friendly string."""
        split_path = Path(split_value).expanduser()
        if not split_path.is_absolute():
            split_path = (dataset_root / split_path).resolve()
        else:
            split_path = split_path.resolve()
        if not split_path.exists() or not split_path.is_dir():
            raise ValueError(f"Split directory not found: {split_path}")
        if not self._directory_has_images(split_path):
            raise ValueError(f"No images found under {split_path}")
        try:
            return split_path.relative_to(dataset_root).as_posix()
        except ValueError:
            return split_path.as_posix()
    @staticmethod
    def _directory_has_images(directory: Path, max_checks: int = 2000) -> bool:
        """Return True if directory tree contains at least one image file."""
        checked = 0
        try:
            for file_path in directory.rglob("*"):
                if not file_path.is_file():
                    continue
                if file_path.suffix.lower() in IMAGE_EXTENSIONS:
                    return True
                checked += 1
                if checked >= max_checks:
                    break
        except Exception:
            return False
        return False
    @staticmethod
    def calculate_checksum(file_path: str) -> str:
        """Calculate MD5 checksum of a file."""
--- a/src/gui/main_window.py
+++ b/src/gui/main_window.py
@@ -297,7 +297,9 @@ class MainWindow(QMainWindow):
            # Save window state before closing
            self._save_window_state()
-            # Save annotation tab state if it exists
+            # Persist tab state and stop background work before exit
            if hasattr(self, "training_tab"):
                self.training_tab.shutdown()
            if hasattr(self, "annotation_tab"):
                self.annotation_tab.save_state()
--- a/src/gui/tabs/training_tab.py
+++ b/src/gui/tabs/training_tab.py
--- a/src/model/yolo_wrapper.py
+++ b/src/model/yolo_wrapper.py
@@ -55,6 +55,7 @@ class YOLOWrapper:
        save_dir: str = "data/models",
        name: str = "custom_model",
        resume: bool = False,
        callbacks: Optional[Dict[str, Callable]] = None,
        **kwargs,
    ) -> Dict[str, Any]:
        """
@@ -69,6 +70,7 @@ class YOLOWrapper:
            save_dir: Directory to save trained model
            name: Name for the training run
            resume: Resume training from last checkpoint
            callbacks: Optional Ultralytics callback dictionary
            **kwargs: Additional training arguments
        Returns: