Update training #2
@@ -18,6 +18,8 @@ training:
|
||||
default_imgsz: 640
|
||||
default_patience: 50
|
||||
default_lr0: 0.01
|
||||
last_dataset_yaml: /home/martin/code/object_detection/data/datasets/data.yaml
|
||||
last_dataset_dir: /home/martin/code/object_detection/data/datasets
|
||||
detection:
|
||||
default_confidence: 0.25
|
||||
default_iou: 0.45
|
||||
|
||||
@@ -10,6 +10,13 @@ from typing import List, Dict, Optional, Tuple, Any, Union
|
||||
from pathlib import Path
|
||||
import csv
|
||||
import hashlib
|
||||
import yaml
|
||||
|
||||
from src.utils.logger import get_logger
|
||||
|
||||
IMAGE_EXTENSIONS = (".jpg", ".jpeg", ".png", ".tif", ".tiff", ".bmp")
|
||||
|
||||
logger = get_logger(__name__)
|
||||
|
||||
|
||||
class DatabaseManager:
|
||||
@@ -861,6 +868,187 @@ class DatabaseManager:
|
||||
finally:
|
||||
conn.close()
|
||||
|
||||
# ==================== Dataset Utilities ====================
|
||||
|
||||
def compose_data_yaml(
|
||||
self,
|
||||
dataset_root: str,
|
||||
output_path: Optional[str] = None,
|
||||
splits: Optional[Dict[str, str]] = None,
|
||||
) -> str:
|
||||
"""
|
||||
Compose a YOLO data.yaml file based on dataset folders and database metadata.
|
||||
|
||||
Args:
|
||||
dataset_root: Base directory containing the dataset structure.
|
||||
output_path: Optional output path; defaults to <dataset_root>/data.yaml.
|
||||
splits: Optional mapping overriding train/val/test image directories (relative
|
||||
to dataset_root or absolute paths).
|
||||
|
||||
Returns:
|
||||
Path to the generated YAML file.
|
||||
"""
|
||||
dataset_root_path = Path(dataset_root).expanduser()
|
||||
if not dataset_root_path.exists():
|
||||
raise ValueError(f"Dataset root does not exist: {dataset_root_path}")
|
||||
dataset_root_path = dataset_root_path.resolve()
|
||||
|
||||
split_map: Dict[str, str] = {key: "" for key in ("train", "val", "test")}
|
||||
if splits:
|
||||
for key, value in splits.items():
|
||||
if key in split_map and value:
|
||||
split_map[key] = value
|
||||
|
||||
inferred = self._infer_split_dirs(dataset_root_path)
|
||||
for key in split_map:
|
||||
if not split_map[key]:
|
||||
split_map[key] = inferred.get(key, "")
|
||||
|
||||
for required in ("train", "val"):
|
||||
if not split_map[required]:
|
||||
raise ValueError(
|
||||
"Unable to determine %s image directory under %s. Provide it "
|
||||
"explicitly via the 'splits' argument."
|
||||
% (required, dataset_root_path)
|
||||
)
|
||||
|
||||
yaml_splits: Dict[str, str] = {}
|
||||
for key, value in split_map.items():
|
||||
if not value:
|
||||
continue
|
||||
yaml_splits[key] = self._normalize_split_value(value, dataset_root_path)
|
||||
|
||||
class_names = self._fetch_annotation_class_names()
|
||||
if not class_names:
|
||||
class_names = [cls["class_name"] for cls in self.get_object_classes()]
|
||||
if not class_names:
|
||||
raise ValueError("No object classes available to populate data.yaml")
|
||||
|
||||
names_map = {idx: name for idx, name in enumerate(class_names)}
|
||||
payload: Dict[str, Any] = {
|
||||
"path": dataset_root_path.as_posix(),
|
||||
"train": yaml_splits["train"],
|
||||
"val": yaml_splits["val"],
|
||||
"names": names_map,
|
||||
"nc": len(class_names),
|
||||
}
|
||||
if yaml_splits.get("test"):
|
||||
payload["test"] = yaml_splits["test"]
|
||||
|
||||
output_path_obj = (
|
||||
Path(output_path).expanduser()
|
||||
if output_path
|
||||
else dataset_root_path / "data.yaml"
|
||||
)
|
||||
output_path_obj.parent.mkdir(parents=True, exist_ok=True)
|
||||
|
||||
with open(output_path_obj, "w", encoding="utf-8") as handle:
|
||||
yaml.safe_dump(payload, handle, sort_keys=False)
|
||||
|
||||
logger.info(f"Generated data.yaml at {output_path_obj}")
|
||||
return output_path_obj.as_posix()
|
||||
|
||||
def _fetch_annotation_class_names(self) -> List[str]:
|
||||
"""Return class names referenced by annotations (ordered by class ID)."""
|
||||
conn = self.get_connection()
|
||||
try:
|
||||
cursor = conn.cursor()
|
||||
cursor.execute(
|
||||
"""
|
||||
SELECT DISTINCT c.id, c.class_name
|
||||
FROM annotations a
|
||||
JOIN object_classes c ON a.class_id = c.id
|
||||
ORDER BY c.id
|
||||
"""
|
||||
)
|
||||
rows = cursor.fetchall()
|
||||
return [row["class_name"] for row in rows]
|
||||
finally:
|
||||
conn.close()
|
||||
|
||||
def _infer_split_dirs(self, dataset_root: Path) -> Dict[str, str]:
|
||||
"""Infer train/val/test image directories relative to dataset_root."""
|
||||
patterns = {
|
||||
"train": [
|
||||
"train/images",
|
||||
"training/images",
|
||||
"images/train",
|
||||
"images/training",
|
||||
"train",
|
||||
"training",
|
||||
],
|
||||
"val": [
|
||||
"val/images",
|
||||
"validation/images",
|
||||
"images/val",
|
||||
"images/validation",
|
||||
"val",
|
||||
"validation",
|
||||
],
|
||||
"test": [
|
||||
"test/images",
|
||||
"testing/images",
|
||||
"images/test",
|
||||
"images/testing",
|
||||
"test",
|
||||
"testing",
|
||||
],
|
||||
}
|
||||
|
||||
inferred: Dict[str, str] = {key: "" for key in patterns}
|
||||
for split_name, options in patterns.items():
|
||||
for relative in options:
|
||||
candidate = (dataset_root / relative).resolve()
|
||||
if (
|
||||
candidate.exists()
|
||||
and candidate.is_dir()
|
||||
and self._directory_has_images(candidate)
|
||||
):
|
||||
try:
|
||||
inferred[split_name] = candidate.relative_to(
|
||||
dataset_root
|
||||
).as_posix()
|
||||
except ValueError:
|
||||
inferred[split_name] = candidate.as_posix()
|
||||
break
|
||||
return inferred
|
||||
|
||||
def _normalize_split_value(self, split_value: str, dataset_root: Path) -> str:
|
||||
"""Validate and normalize a split directory to a YAML-friendly string."""
|
||||
split_path = Path(split_value).expanduser()
|
||||
if not split_path.is_absolute():
|
||||
split_path = (dataset_root / split_path).resolve()
|
||||
else:
|
||||
split_path = split_path.resolve()
|
||||
|
||||
if not split_path.exists() or not split_path.is_dir():
|
||||
raise ValueError(f"Split directory not found: {split_path}")
|
||||
|
||||
if not self._directory_has_images(split_path):
|
||||
raise ValueError(f"No images found under {split_path}")
|
||||
|
||||
try:
|
||||
return split_path.relative_to(dataset_root).as_posix()
|
||||
except ValueError:
|
||||
return split_path.as_posix()
|
||||
|
||||
@staticmethod
|
||||
def _directory_has_images(directory: Path, max_checks: int = 2000) -> bool:
|
||||
"""Return True if directory tree contains at least one image file."""
|
||||
checked = 0
|
||||
try:
|
||||
for file_path in directory.rglob("*"):
|
||||
if not file_path.is_file():
|
||||
continue
|
||||
if file_path.suffix.lower() in IMAGE_EXTENSIONS:
|
||||
return True
|
||||
checked += 1
|
||||
if checked >= max_checks:
|
||||
break
|
||||
except Exception:
|
||||
return False
|
||||
return False
|
||||
|
||||
@staticmethod
|
||||
def calculate_checksum(file_path: str) -> str:
|
||||
"""Calculate MD5 checksum of a file."""
|
||||
|
||||
@@ -297,7 +297,9 @@ class MainWindow(QMainWindow):
|
||||
# Save window state before closing
|
||||
self._save_window_state()
|
||||
|
||||
# Save annotation tab state if it exists
|
||||
# Persist tab state and stop background work before exit
|
||||
if hasattr(self, "training_tab"):
|
||||
self.training_tab.shutdown()
|
||||
if hasattr(self, "annotation_tab"):
|
||||
self.annotation_tab.save_state()
|
||||
|
||||
|
||||
File diff suppressed because it is too large
Load Diff
@@ -55,6 +55,7 @@ class YOLOWrapper:
|
||||
save_dir: str = "data/models",
|
||||
name: str = "custom_model",
|
||||
resume: bool = False,
|
||||
callbacks: Optional[Dict[str, Callable]] = None,
|
||||
**kwargs,
|
||||
) -> Dict[str, Any]:
|
||||
"""
|
||||
@@ -69,6 +70,7 @@ class YOLOWrapper:
|
||||
save_dir: Directory to save trained model
|
||||
name: Name for the training run
|
||||
resume: Resume training from last checkpoint
|
||||
callbacks: Optional Ultralytics callback dictionary
|
||||
**kwargs: Additional training arguments
|
||||
|
||||
Returns:
|
||||
|
||||
Reference in New Issue
Block a user