use a small yolov9 model for detection

2025-02-25 18:55:25 -06:00 · 2025-02-13 10:44:46 -06:00 · 2025-02-13 10:44:46 -06:00 · 0dcfcb2cf7
commit 0dcfcb2cf7
parent d6b5dc93cc
1 changed files with 44 additions and 0 deletions
--- a/frigate/embeddings/functions/onnx.py
+++ b/frigate/embeddings/functions/onnx.py
@ -5,6 +5,7 @@ from enum import Enum
 from io import BytesIO
 from typing import Dict, List, Optional, Union

+import cv2
 import numpy as np
 import requests
 from PIL import Image
@ -32,6 +33,7 @@ disable_progress_bar()
 logger = logging.getLogger(__name__)

 FACE_EMBEDDING_SIZE = 160
+LPR_EMBEDDING_SIZE = 256


 class ModelTypeEnum(str, Enum):
@ -41,6 +43,7 @@ class ModelTypeEnum(str, Enum):
    lpr_detect = "lpr_detect"
    lpr_classify = "lpr_classify"
    lpr_recognize = "lpr_recognize"
+    yolov9_lpr_detect = "yolov9_lpr_detect"


 class GenericONNXEmbedding:
@ -148,6 +151,8 @@ class GenericONNXEmbedding:
                self.feature_extractor = []
            elif self.model_type == ModelTypeEnum.lpr_recognize:
                self.feature_extractor = []
+            elif self.model_type == ModelTypeEnum.yolov9_lpr_detect:
+                self.feature_extractor = []

            self.runner = ONNXModelRunner(
                os.path.join(self.download_path, self.model_file),
@ -237,6 +242,45 @@ class GenericONNXEmbedding:
            for img in raw_inputs:
                processed.append({"x": img})
            return processed
+        elif self.model_type == ModelTypeEnum.yolov9_lpr_detect:
+            if isinstance(raw_inputs, list):
+                raise ValueError(
+                    "License plate embedding does not support batch inputs."
+                )
+            # Get image as numpy array
+            img = self._process_image(raw_inputs)
+            height, width, channels = img.shape
+
+            # Resize maintaining aspect ratio
+            if width > height:
+                new_height = int(((height / width) * LPR_EMBEDDING_SIZE) // 4 * 4)
+                img = cv2.resize(img, (LPR_EMBEDDING_SIZE, new_height))
+            else:
+                new_width = int(((width / height) * LPR_EMBEDDING_SIZE) // 4 * 4)
+                img = cv2.resize(img, (new_width, LPR_EMBEDDING_SIZE))
+
+            # Get new dimensions after resize
+            og_h, og_w, channels = img.shape
+
+            # Create black square frame
+            frame = np.full(
+                (LPR_EMBEDDING_SIZE, LPR_EMBEDDING_SIZE, channels),
+                (0, 0, 0),
+                dtype=np.float32,
+            )
+
+            # Center the resized image in the square frame
+            x_center = (LPR_EMBEDDING_SIZE - og_w) // 2
+            y_center = (LPR_EMBEDDING_SIZE - og_h) // 2
+            frame[y_center : y_center + og_h, x_center : x_center + og_w] = img
+
+            # Normalize to 0-1
+            frame = frame / 255.0
+
+            # Convert from HWC to CHW format and add batch dimension
+            frame = np.transpose(frame, (2, 0, 1))
+            frame = np.expand_dims(frame, axis=0)
+            return [{"images": frame}]
        else:
            raise ValueError(f"Unable to preprocess inputs for {self.model_type}")