Making it installabel package and switching to segmentation mode

2025-12-05 15:51:16 +02:00
parent 9011276584
commit 310e0b2285
20 changed files with 667 additions and 56 deletions
--- a/src/model/inference.py
+++ b/src/model/inference.py
@@ -87,6 +87,7 @@ class InferenceEngine:
                        "class_name": det["class_name"],
                        "bbox": tuple(bbox_normalized),
                        "confidence": det["confidence"],
+                        "segmentation_mask": det.get("segmentation_mask"),
                        "metadata": {"class_id": det["class_id"]},
                    }
                    detection_records.append(record)
@@ -160,6 +161,7 @@ class InferenceEngine:
        conf: float = 0.25,
        bbox_thickness: int = 2,
        bbox_colors: Optional[Dict[str, str]] = None,
+        draw_masks: bool = True,
    ) -> tuple:
        """
        Detect objects and return annotated image.
@@ -169,6 +171,7 @@ class InferenceEngine:
            conf: Confidence threshold
            bbox_thickness: Thickness of bounding boxes
            bbox_colors: Dictionary mapping class names to hex colors
+            draw_masks: Whether to draw segmentation masks (if available)

        Returns:
            Tuple of (detections, annotated_image_array)
@@ -189,12 +192,8 @@ class InferenceEngine:
                bbox_colors = {}
            default_color = self._hex_to_bgr(bbox_colors.get("default", "#00FF00"))

-            # Draw bounding boxes
+            # Draw detections
            for det in detections:
-                # Get absolute coordinates
-                bbox_abs = det["bbox_absolute"]
-                x1, y1, x2, y2 = [int(v) for v in bbox_abs]
-
                # Get color for this class
                class_name = det["class_name"]
                color_hex = bbox_colors.get(
@@ -202,7 +201,33 @@ class InferenceEngine:
                )
                color = self._hex_to_bgr(color_hex)

-                # Draw box
+                # Draw segmentation mask if available and requested
+                if draw_masks and det.get("segmentation_mask"):
+                    mask_normalized = det["segmentation_mask"]
+                    if mask_normalized and len(mask_normalized) > 0:
+                        # Convert normalized coordinates to absolute pixels
+                        mask_points = np.array(
+                            [
+                                [int(pt[0] * width), int(pt[1] * height)]
+                                for pt in mask_normalized
+                            ],
+                            dtype=np.int32,
+                        )
+
+                        # Create a semi-transparent overlay
+                        overlay = img.copy()
+                        cv2.fillPoly(overlay, [mask_points], color)
+                        # Blend with original image (30% opacity)
+                        cv2.addWeighted(overlay, 0.3, img, 0.7, 0, img)
+
+                        # Draw mask contour
+                        cv2.polylines(img, [mask_points], True, color, bbox_thickness)
+
+                # Get absolute coordinates for bounding box
+                bbox_abs = det["bbox_absolute"]
+                x1, y1, x2, y2 = [int(v) for v in bbox_abs]
+
+                # Draw bounding box
                cv2.rectangle(img, (x1, y1), (x2, y2), color, bbox_thickness)

                # Prepare label
--- a/src/model/yolo_wrapper.py
+++ b/src/model/yolo_wrapper.py
@@ -16,7 +16,7 @@ logger = get_logger(__name__)
 class YOLOWrapper:
    """Wrapper for YOLOv8 model operations."""

-    def __init__(self, model_path: str = "yolov8s.pt"):
+    def __init__(self, model_path: str = "yolov8s-seg.pt"):
        """
        Initialize YOLO model.

@@ -282,6 +282,10 @@ class YOLOWrapper:
                boxes = result.boxes
                image_path = str(result.path)
                orig_shape = result.orig_shape  # (height, width)
+                height, width = orig_shape
+
+                # Check if this is a segmentation model with masks
+                has_masks = hasattr(result, "masks") and result.masks is not None

                for i in range(len(boxes)):
                    # Get normalized coordinates
@@ -299,6 +303,33 @@ class YOLOWrapper:
                            float(v) for v in boxes.xyxy[i].cpu().numpy()
                        ],  # Absolute pixels
                    }
+
+                    # Extract segmentation mask if available
+                    if has_masks:
+                        try:
+                            # Get the mask for this detection
+                            mask_data = result.masks.xy[
+                                i
+                            ]  # Polygon coordinates in absolute pixels
+
+                            # Convert to normalized coordinates
+                            if len(mask_data) > 0:
+                                mask_normalized = []
+                                for point in mask_data:
+                                    x_norm = float(point[0]) / width
+                                    y_norm = float(point[1]) / height
+                                    mask_normalized.append([x_norm, y_norm])
+                                detection["segmentation_mask"] = mask_normalized
+                            else:
+                                detection["segmentation_mask"] = None
+                        except Exception as mask_error:
+                            logger.warning(
+                                f"Error extracting mask for detection {i}: {mask_error}"
+                            )
+                            detection["segmentation_mask"] = None
+                    else:
+                        detection["segmentation_mask"] = None
+
                    detections.append(detection)

            return detections