Adding standalone training script and update

2025-12-13 09:28:24 +02:00
parent 908e9a5b82
commit aec0fbf83c
8 changed files with 1434 additions and 290 deletions
--- a/tests/test_training_dataset_prep.py
+++ b/tests/test_training_dataset_prep.py
@@ -18,8 +18,8 @@ from src.utils.image import Image


 def test_float32_3ch_conversion():
-    """Test conversion of 16-bit TIFF to float32 3-channel TIFF."""
-    print("\n=== Testing Float32 3-Channel Conversion ===")
+    """Test conversion of 16-bit TIFF to 16-bit RGB PNG."""
+    print("\n=== Testing 16-bit RGB PNG Conversion ===")

    # Create temporary directory structure
    with tempfile.TemporaryDirectory() as tmpdir:
@@ -42,39 +42,65 @@ def test_float32_3ch_conversion():
        print(f"  Dtype: {test_data.dtype}")
        print(f"  Range: [{test_data.min()}, {test_data.max()}]")

-        # Simulate the conversion process
-        print("\nConverting to float32 3-channel...")
+        # Simulate the conversion process (matching training_tab.py)
+        print("\nConverting to 16-bit RGB PNG using PIL merge...")
        img_obj = Image(test_file)
+        from PIL import Image as PILImage

-        # Convert to float32 [0-1]
-        float_data = img_obj.to_normalized_float32()
+        # Get uint16 data
+        uint16_data = img_obj.data

-        # Replicate to 3 channels
-        if len(float_data.shape) == 2:
-            float_3ch = np.stack([float_data] * 3, axis=-1)
+        # Use PIL's merge method with 'I;16' channels (proper way for 16-bit RGB)
+        if len(uint16_data.shape) == 2:
+            # Grayscale - replicate to RGB
+            r_img = PILImage.fromarray(uint16_data, mode="I;16")
+            g_img = PILImage.fromarray(uint16_data, mode="I;16")
+            b_img = PILImage.fromarray(uint16_data, mode="I;16")
        else:
-            float_3ch = float_data
+            r_img = PILImage.fromarray(uint16_data[:, :, 0], mode="I;16")
+            g_img = PILImage.fromarray(
+                (
+                    uint16_data[:, :, 1]
+                    if uint16_data.shape[2] > 1
+                    else uint16_data[:, :, 0]
+                ),
+                mode="I;16",
+            )
+            b_img = PILImage.fromarray(
+                (
+                    uint16_data[:, :, 2]
+                    if uint16_data.shape[2] > 2
+                    else uint16_data[:, :, 0]
+                ),
+                mode="I;16",
+            )

-        # Save as float32 TIFF
-        output_file = dst_dir / "test_float32_3ch.tif"
-        tifffile.imwrite(output_file, float_3ch.astype(np.float32))
-        print(f"Saved float32 3-channel TIFF: {output_file}")
+        # Merge channels into RGB
+        rgb_img = PILImage.merge("RGB", (r_img, g_img, b_img))

-        # Verify the output
-        loaded = tifffile.imread(output_file)
-        print(f"\nVerifying output:")
+        # Save as PNG
+        output_file = dst_dir / "test_16bit_rgb.png"
+        rgb_img.save(output_file)
+        print(f"Saved 16-bit RGB PNG: {output_file}")
+        print(f"  PIL mode after merge: {rgb_img.mode}")
+
+        # Verify the output - Load with OpenCV (as YOLO does)
+        import cv2
+
+        loaded = cv2.imread(str(output_file), cv2.IMREAD_UNCHANGED)
+        print(f"\nVerifying output (loaded with OpenCV):")
        print(f"  Shape: {loaded.shape}")
        print(f"  Dtype: {loaded.dtype}")
        print(f"  Channels: {loaded.shape[2] if len(loaded.shape) == 3 else 1}")
-        print(f"  Range: [{loaded.min():.6f}, {loaded.max():.6f}]")
+        print(f"  Range: [{loaded.min()}, {loaded.max()}]")
        print(f"  Unique values: {len(np.unique(loaded[:,:,0]))}")

        # Assertions
-        assert loaded.dtype == np.float32, f"Expected float32, got {loaded.dtype}"
+        assert loaded.dtype == np.uint16, f"Expected uint16, got {loaded.dtype}"
        assert loaded.shape[2] == 3, f"Expected 3 channels, got {loaded.shape[2]}"
        assert (
-            0.0 <= loaded.min() <= loaded.max() <= 1.0
-        ), f"Expected [0,1] range, got [{loaded.min()}, {loaded.max()}]"
+            loaded.min() >= 0 and loaded.max() <= 65535
+        ), f"Expected [0,65535] range, got [{loaded.min()}, {loaded.max()}]"

        # Verify all channels are identical (replicated grayscale)
        assert np.array_equal(
@@ -84,21 +110,20 @@ def test_float32_3ch_conversion():
            loaded[:, :, 0], loaded[:, :, 2]
        ), "Channel 0 and 2 should be identical"

-        # Verify float32 precision (not quantized to uint8 steps)
+        # Verify no data loss
        unique_vals = len(np.unique(loaded[:, :, 0]))
        print(f"\n  Precision check:")
        print(f"    Unique values in channel: {unique_vals}")
        print(f"    Source unique values: {len(np.unique(test_data))}")

-        # The final unique values should match source (no loss from conversion)
        assert unique_vals == len(
            np.unique(test_data)
        ), f"Expected {len(np.unique(test_data))} unique values, got {unique_vals}"

        print("\n✓ All conversion tests passed!")
-        print("  - Float32 dtype preserved")
+        print("  - uint16 dtype preserved")
        print("  - 3 channels created")
-        print("  - Range [0-1] maintained")
+        print("  - Range [0-65535] maintained")
        print("  - No precision loss from conversion")
        print("  - Channels properly replicated")