Yes it feels like it. But I have to write the Project and document it somehow. I can use the yolo for now, which makes it a bit easier. This is what I´ve tried with keras so far:
model = keras_cv.models.YOLOV8Detector.from_preset(
"yolo_v8_m_pascalvoc",
num_classes=1,
bounding_box_format="xywh",
)
inference_resizing = keras_cv.layers.Resizing(
640, 640, pad_to_aspect_ratio=True, bounding_box_format="xywh"
)
class_ids = [
"Person"
]
class_mapping = dict(zip(range(len(class_ids)), class_ids))
# Define the codec and create a VideoWriter object
import cv2
fourcc = cv2.VideoWriter_fourcc(*'mp4v') # Use 'mp4v' for MP4 format
output_file = 'output_video.mp4' # Specify the output video file name with the .mp4 extension
output_size = (500, 500) # Set the output video size
out = cv2.VideoWriter(output_file, fourcc, 20.0, output_size)
def store_box_dimensions(boxes):
x_list, y_list, w_list, h_list = [], [], [], []
for i in range(boxes.shape[1]):
x = boxes[0][i][0]
y = boxes[0][i][1]
w = boxes[0][i][2]
h = boxes[0][i][3]
if x == -1:
break
x_list.append(x)
y_list.append(y)
w_list.append(w)
h_list.append(h)
return x_list, y_list, w_list, h_list
# Define the codec and create a VideoWriter object
import cv2
fourcc = cv2.VideoWriter_fourcc(*'mp4v') # Use 'mp4v' for MP4 format
output_file = 'output_video.mp4' # Specify the output video file name with the .mp4 extension
output_size = (500, 500) # Set the output video size
out = cv2.VideoWriter(output_file, fourcc, 20.0, output_size)
cap = cv2.VideoCapture('../../data/test.mp4')
if not cap.isOpened():
print("Error: Couldn't open the camera.")
exit()
frame_counter = 0
while True:
ret, test_img = cap.read()
if not ret:
print("Error: Couldn't read frame. End of video?")
break # Break the loop if we have reached the end of the video
frame_counter += 1
# Process every 25th frame
if frame_counter % 25 == 0:
image = inference_resizing([test_img])
y_pred = model.predict(image)
boxes = y_pred['boxes']
x_values, y_values, w_values, h_values = store_box_dimensions(boxes)
for i in range(len(x_values)):
x, y, w, h = int(x_values[i]), int(y_values[i]), int(w_values[i]), int(h_values[i])
# Debugging print statements
print(f"Box {i+1}: x={x}, y={y}, w={w}, h={h}")
cv2.rectangle(test_img, (x, y), (x + w, y + h), (255, 255, 255), thickness=4)
resize_img = cv2.resize(test_img, (500, 500))
# cv2.imshow("Face Detection Tutorial: ", test_img)
out.write(resize_img)
if cv2.waitKey(10) == ord('q'):
break
cap.release()
out.release()
cv2.destroyAllWindows()