Update training

This commit is contained in:
2025-12-10 15:46:26 +02:00
parent f719c7ec40
commit 5d196c3a4a
5 changed files with 1534 additions and 18 deletions

View File

@@ -18,6 +18,8 @@ training:
default_imgsz: 640 default_imgsz: 640
default_patience: 50 default_patience: 50
default_lr0: 0.01 default_lr0: 0.01
last_dataset_yaml: /home/martin/code/object_detection/data/datasets/data.yaml
last_dataset_dir: /home/martin/code/object_detection/data/datasets
detection: detection:
default_confidence: 0.25 default_confidence: 0.25
default_iou: 0.45 default_iou: 0.45

View File

@@ -10,6 +10,13 @@ from typing import List, Dict, Optional, Tuple, Any, Union
from pathlib import Path from pathlib import Path
import csv import csv
import hashlib import hashlib
import yaml
from src.utils.logger import get_logger
IMAGE_EXTENSIONS = (".jpg", ".jpeg", ".png", ".tif", ".tiff", ".bmp")
logger = get_logger(__name__)
class DatabaseManager: class DatabaseManager:
@@ -861,6 +868,187 @@ class DatabaseManager:
finally: finally:
conn.close() conn.close()
# ==================== Dataset Utilities ====================
def compose_data_yaml(
self,
dataset_root: str,
output_path: Optional[str] = None,
splits: Optional[Dict[str, str]] = None,
) -> str:
"""
Compose a YOLO data.yaml file based on dataset folders and database metadata.
Args:
dataset_root: Base directory containing the dataset structure.
output_path: Optional output path; defaults to <dataset_root>/data.yaml.
splits: Optional mapping overriding train/val/test image directories (relative
to dataset_root or absolute paths).
Returns:
Path to the generated YAML file.
"""
dataset_root_path = Path(dataset_root).expanduser()
if not dataset_root_path.exists():
raise ValueError(f"Dataset root does not exist: {dataset_root_path}")
dataset_root_path = dataset_root_path.resolve()
split_map: Dict[str, str] = {key: "" for key in ("train", "val", "test")}
if splits:
for key, value in splits.items():
if key in split_map and value:
split_map[key] = value
inferred = self._infer_split_dirs(dataset_root_path)
for key in split_map:
if not split_map[key]:
split_map[key] = inferred.get(key, "")
for required in ("train", "val"):
if not split_map[required]:
raise ValueError(
"Unable to determine %s image directory under %s. Provide it "
"explicitly via the 'splits' argument."
% (required, dataset_root_path)
)
yaml_splits: Dict[str, str] = {}
for key, value in split_map.items():
if not value:
continue
yaml_splits[key] = self._normalize_split_value(value, dataset_root_path)
class_names = self._fetch_annotation_class_names()
if not class_names:
class_names = [cls["class_name"] for cls in self.get_object_classes()]
if not class_names:
raise ValueError("No object classes available to populate data.yaml")
names_map = {idx: name for idx, name in enumerate(class_names)}
payload: Dict[str, Any] = {
"path": dataset_root_path.as_posix(),
"train": yaml_splits["train"],
"val": yaml_splits["val"],
"names": names_map,
"nc": len(class_names),
}
if yaml_splits.get("test"):
payload["test"] = yaml_splits["test"]
output_path_obj = (
Path(output_path).expanduser()
if output_path
else dataset_root_path / "data.yaml"
)
output_path_obj.parent.mkdir(parents=True, exist_ok=True)
with open(output_path_obj, "w", encoding="utf-8") as handle:
yaml.safe_dump(payload, handle, sort_keys=False)
logger.info(f"Generated data.yaml at {output_path_obj}")
return output_path_obj.as_posix()
def _fetch_annotation_class_names(self) -> List[str]:
"""Return class names referenced by annotations (ordered by class ID)."""
conn = self.get_connection()
try:
cursor = conn.cursor()
cursor.execute(
"""
SELECT DISTINCT c.id, c.class_name
FROM annotations a
JOIN object_classes c ON a.class_id = c.id
ORDER BY c.id
"""
)
rows = cursor.fetchall()
return [row["class_name"] for row in rows]
finally:
conn.close()
def _infer_split_dirs(self, dataset_root: Path) -> Dict[str, str]:
"""Infer train/val/test image directories relative to dataset_root."""
patterns = {
"train": [
"train/images",
"training/images",
"images/train",
"images/training",
"train",
"training",
],
"val": [
"val/images",
"validation/images",
"images/val",
"images/validation",
"val",
"validation",
],
"test": [
"test/images",
"testing/images",
"images/test",
"images/testing",
"test",
"testing",
],
}
inferred: Dict[str, str] = {key: "" for key in patterns}
for split_name, options in patterns.items():
for relative in options:
candidate = (dataset_root / relative).resolve()
if (
candidate.exists()
and candidate.is_dir()
and self._directory_has_images(candidate)
):
try:
inferred[split_name] = candidate.relative_to(
dataset_root
).as_posix()
except ValueError:
inferred[split_name] = candidate.as_posix()
break
return inferred
def _normalize_split_value(self, split_value: str, dataset_root: Path) -> str:
"""Validate and normalize a split directory to a YAML-friendly string."""
split_path = Path(split_value).expanduser()
if not split_path.is_absolute():
split_path = (dataset_root / split_path).resolve()
else:
split_path = split_path.resolve()
if not split_path.exists() or not split_path.is_dir():
raise ValueError(f"Split directory not found: {split_path}")
if not self._directory_has_images(split_path):
raise ValueError(f"No images found under {split_path}")
try:
return split_path.relative_to(dataset_root).as_posix()
except ValueError:
return split_path.as_posix()
@staticmethod
def _directory_has_images(directory: Path, max_checks: int = 2000) -> bool:
"""Return True if directory tree contains at least one image file."""
checked = 0
try:
for file_path in directory.rglob("*"):
if not file_path.is_file():
continue
if file_path.suffix.lower() in IMAGE_EXTENSIONS:
return True
checked += 1
if checked >= max_checks:
break
except Exception:
return False
return False
@staticmethod @staticmethod
def calculate_checksum(file_path: str) -> str: def calculate_checksum(file_path: str) -> str:
"""Calculate MD5 checksum of a file.""" """Calculate MD5 checksum of a file."""

View File

@@ -297,7 +297,9 @@ class MainWindow(QMainWindow):
# Save window state before closing # Save window state before closing
self._save_window_state() self._save_window_state()
# Save annotation tab state if it exists # Persist tab state and stop background work before exit
if hasattr(self, "training_tab"):
self.training_tab.shutdown()
if hasattr(self, "annotation_tab"): if hasattr(self, "annotation_tab"):
self.annotation_tab.save_state() self.annotation_tab.save_state()

File diff suppressed because it is too large Load Diff

View File

@@ -55,6 +55,7 @@ class YOLOWrapper:
save_dir: str = "data/models", save_dir: str = "data/models",
name: str = "custom_model", name: str = "custom_model",
resume: bool = False, resume: bool = False,
callbacks: Optional[Dict[str, Callable]] = None,
**kwargs, **kwargs,
) -> Dict[str, Any]: ) -> Dict[str, Any]:
""" """
@@ -69,6 +70,7 @@ class YOLOWrapper:
save_dir: Directory to save trained model save_dir: Directory to save trained model
name: Name for the training run name: Name for the training run
resume: Resume training from last checkpoint resume: Resume training from last checkpoint
callbacks: Optional Ultralytics callback dictionary
**kwargs: Additional training arguments **kwargs: Additional training arguments
Returns: Returns: