frigate/detect_objects.py
2019-03-18 07:48:04 -05:00

277 lines
11 KiB
Python

import os
import cv2
import imutils
import time
import datetime
import ctypes
import logging
import multiprocessing as mp
import threading
import json
from contextlib import closing
import numpy as np
from object_detection.utils import visualization_utils as vis_util
from flask import Flask, Response, make_response, send_file
import paho.mqtt.client as mqtt
from frigate.util import tonumpyarray
from frigate.mqtt import MqttMotionPublisher, MqttObjectPublisher
from frigate.objects import ObjectParser, ObjectCleaner, BestPersonFrame
from frigate.motion import detect_motion
from frigate.video import fetch_frames, FrameTracker
from frigate.object_detection import prep_for_detection, detect_objects
RTSP_URL = os.getenv('RTSP_URL')
MQTT_HOST = os.getenv('MQTT_HOST')
MQTT_USER = os.getenv('MQTT_USER')
MQTT_PASS = os.getenv('MQTT_PASS')
MQTT_TOPIC_PREFIX = os.getenv('MQTT_TOPIC_PREFIX')
# REGIONS = "350,0,300,50:400,350,250,50:400,750,250,50"
# REGIONS = "400,350,250,50"
REGIONS = os.getenv('REGIONS')
DEBUG = (os.getenv('DEBUG') == '1')
def main():
DETECTED_OBJECTS = []
recent_motion_frames = {}
# Parse selected regions
regions = []
for region_string in REGIONS.split(':'):
region_parts = region_string.split(',')
region_mask_image = cv2.imread("/config/{}".format(region_parts[5]), cv2.IMREAD_GRAYSCALE)
region_mask = np.where(region_mask_image==[0])
regions.append({
'size': int(region_parts[0]),
'x_offset': int(region_parts[1]),
'y_offset': int(region_parts[2]),
'min_person_area': int(region_parts[3]),
'min_object_size': int(region_parts[4]),
'mask': region_mask,
# Event for motion detection signaling
'motion_detected': mp.Event(),
# create shared array for storing 10 detected objects
# note: this must be a double even though the value you are storing
# is a float. otherwise it stops updating the value in shared
# memory. probably something to do with the size of the memory block
'output_array': mp.Array(ctypes.c_double, 6*10)
})
# capture a single frame and check the frame shape so the correct array
# size can be allocated in memory
video = cv2.VideoCapture(RTSP_URL)
ret, frame = video.read()
if ret:
frame_shape = frame.shape
else:
print("Unable to capture video stream")
exit(1)
video.release()
# compute the flattened array length from the array shape
flat_array_length = frame_shape[0] * frame_shape[1] * frame_shape[2]
# create shared array for storing the full frame image data
shared_arr = mp.Array(ctypes.c_uint8, flat_array_length)
# create shared value for storing the frame_time
shared_frame_time = mp.Value('d', 0.0)
# Lock to control access to the frame
frame_lock = mp.Lock()
# Condition for notifying that a new frame is ready
frame_ready = mp.Condition()
# Condition for notifying that motion status changed globally
motion_changed = mp.Condition()
# Condition for notifying that objects were parsed
objects_parsed = mp.Condition()
# Queue for detected objects
object_queue = mp.Queue()
# array for prepped frame with shape (1, 300, 300, 3)
prepped_frame_array = mp.Array(ctypes.c_uint8, 300*300*3)
# shared value for storing the prepped_frame_time
prepped_frame_time = mp.Value('d', 0.0)
# Condition for notifying that a new prepped frame is ready
prepped_frame_ready = mp.Condition()
# Lock to control access to the prepped frame
prepped_frame_lock = mp.Lock()
# array for prepped frame box [x1, y1, x2, y2]
prepped_frame_box = mp.Array(ctypes.c_uint16, 4)
# shape current frame so it can be treated as an image
frame_arr = tonumpyarray(shared_arr).reshape(frame_shape)
# start the process to capture frames from the RTSP stream and store in a shared array
capture_process = mp.Process(target=fetch_frames, args=(shared_arr,
shared_frame_time, frame_lock, frame_ready, frame_shape, RTSP_URL))
capture_process.daemon = True
# for each region, start a separate process for motion detection and object detection
detection_prep_processes = []
motion_processes = []
for region in regions:
# possibly try putting these on threads and putting prepped
# frames in a queue
detection_prep_process = mp.Process(target=prep_for_detection, args=(shared_arr,
shared_frame_time,
frame_lock, frame_ready,
region['motion_detected'],
frame_shape,
region['size'], region['x_offset'], region['y_offset'],
prepped_frame_array, prepped_frame_time, prepped_frame_ready,
prepped_frame_lock, prepped_frame_box))
detection_prep_process.daemon = True
detection_prep_processes.append(detection_prep_process)
motion_process = mp.Process(target=detect_motion, args=(shared_arr,
shared_frame_time,
frame_lock, frame_ready,
region['motion_detected'],
motion_changed,
frame_shape,
region['size'], region['x_offset'], region['y_offset'],
region['min_object_size'], region['mask'],
DEBUG))
motion_process.daemon = True
motion_processes.append(motion_process)
# create a process for object detection
detection_process = mp.Process(target=detect_objects, args=(
prepped_frame_array, prepped_frame_time,
prepped_frame_lock, prepped_frame_ready,
prepped_frame_box, object_queue, DEBUG
))
detection_process.daemon = True
# start a thread to store recent motion frames for processing
frame_tracker = FrameTracker(frame_arr, shared_frame_time, frame_ready, frame_lock,
recent_motion_frames, motion_changed, [region['motion_detected'] for region in regions])
frame_tracker.start()
# start a thread to store the highest scoring recent person frame
best_person_frame = BestPersonFrame(objects_parsed, recent_motion_frames, DETECTED_OBJECTS,
motion_changed, [region['motion_detected'] for region in regions])
best_person_frame.start()
# start a thread to parse objects from the queue
object_parser = ObjectParser(object_queue, objects_parsed, DETECTED_OBJECTS)
object_parser.start()
# start a thread to expire objects from the detected objects list
object_cleaner = ObjectCleaner(objects_parsed, DETECTED_OBJECTS,
motion_changed, [region['motion_detected'] for region in regions])
object_cleaner.start()
# connect to mqtt and setup last will
def on_connect(client, userdata, flags, rc):
print("On connect called")
# publish a message to signal that the service is running
client.publish(MQTT_TOPIC_PREFIX+'/available', 'online', retain=True)
client = mqtt.Client()
client.on_connect = on_connect
client.will_set(MQTT_TOPIC_PREFIX+'/available', payload='offline', qos=1, retain=True)
if not MQTT_USER is None:
client.username_pw_set(MQTT_USER, password=MQTT_PASS)
client.connect(MQTT_HOST, 1883, 60)
client.loop_start()
# start a thread to publish object scores (currently only person)
mqtt_publisher = MqttObjectPublisher(client, MQTT_TOPIC_PREFIX, objects_parsed, DETECTED_OBJECTS)
mqtt_publisher.start()
# start thread to publish motion status
mqtt_motion_publisher = MqttMotionPublisher(client, MQTT_TOPIC_PREFIX, motion_changed,
[region['motion_detected'] for region in regions])
mqtt_motion_publisher.start()
# start the process of capturing frames
capture_process.start()
print("capture_process pid ", capture_process.pid)
# start the object detection prep processes
for detection_prep_process in detection_prep_processes:
detection_prep_process.start()
print("detection_prep_process pid ", detection_prep_process.pid)
detection_process.start()
print("detection_process pid ", detection_process.pid)
# start the motion detection processes
# for motion_process in motion_processes:
# motion_process.start()
# print("motion_process pid ", motion_process.pid)
# TEMP: short circuit the motion detection
for region in regions:
region['motion_detected'].set()
with motion_changed:
motion_changed.notify_all()
# create a flask app that encodes frames a mjpeg on demand
app = Flask(__name__)
@app.route('/best_person.jpg')
def best_person():
frame = np.zeros(frame_shape, np.uint8) if best_person_frame.best_frame is None else best_person_frame.best_frame
ret, jpg = cv2.imencode('.jpg', frame)
response = make_response(jpg.tobytes())
response.headers['Content-Type'] = 'image/jpg'
return response
@app.route('/')
def index():
# return a multipart response
return Response(imagestream(),
mimetype='multipart/x-mixed-replace; boundary=frame')
def imagestream():
while True:
# max out at 5 FPS
time.sleep(0.2)
# make a copy of the current detected objects
detected_objects = DETECTED_OBJECTS.copy()
# lock and make a copy of the current frame
with frame_lock:
frame = frame_arr.copy()
# convert to RGB for drawing
frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
# draw the bounding boxes on the screen
for obj in detected_objects:
vis_util.draw_bounding_box_on_image_array(frame,
obj['ymin'],
obj['xmin'],
obj['ymax'],
obj['xmax'],
color='red',
thickness=2,
display_str_list=["{}: {}%".format(obj['name'],int(obj['score']*100))],
use_normalized_coordinates=False)
for region in regions:
color = (255,255,255)
if region['motion_detected'].is_set():
color = (0,255,0)
cv2.rectangle(frame, (region['x_offset'], region['y_offset']),
(region['x_offset']+region['size'], region['y_offset']+region['size']),
color, 2)
# convert back to BGR
frame = cv2.cvtColor(frame, cv2.COLOR_RGB2BGR)
# encode the image into a jpg
ret, jpg = cv2.imencode('.jpg', frame)
yield (b'--frame\r\n'
b'Content-Type: image/jpeg\r\n\r\n' + jpg.tobytes() + b'\r\n\r\n')
app.run(host='0.0.0.0', debug=False)
capture_process.join()
for detection_prep_process in detection_prep_processes:
detection_prep_process.join()
for motion_process in motion_processes:
motion_process.join()
detection_process.join()
frame_tracker.join()
best_person_frame.join()
object_parser.join()
object_cleaner.join()
mqtt_publisher.join()
if __name__ == '__main__':
main()