Hi all,
How does the viz_utils.visualize_boxes_and_labels_on_image_array plot the predicted bounding boxes? I noticed that there are 100 predicted boxes per image, each with its own score (sorted by highest score first). Does the viz_utils.visualize_boxes_and_labels_on_image_array plot the highest score bounding box?
from object_detection.utils import visualization_utils as viz_utils
def plot_detections(image_np,
boxes,
classes,
scores,
category_index,
figsize=(12, 16),
image_name=None):
"""Wrapper function to visualize detections.
Args:
image_np: uint8 numpy array with shape (img_height, img_width, 3)
boxes: a numpy array of shape [N, 4]
classes: a numpy array of shape [N]. Note that class indices are 1-based,
and match the keys in the label map.
scores: a numpy array of shape [N] or None. If scores=None, then
this function assumes that the boxes to be plotted are groundtruth
boxes and plot all boxes as black with no classes or scores.
category_index: a dict containing category dictionaries (each holding
category index `id` and category name `name`) keyed by category indices.
figsize: size for the figure.
image_name: a name for the image file.
"""
image_np_with_annotations = image_np.copy()
viz_utils.visualize_boxes_and_labels_on_image_array(
image_np_with_annotations,
boxes,
classes,
scores,
category_index,
use_normalized_coordinates=True,
min_score_thresh=0.8)
if image_name:
plt.imsave(image_name, image_np_with_annotations)
else:
plt.imshow(image_np_with_annotations)
test_image_dir = '../../data/processed/gt/test'
test_images_fps = glob.glob(os.path.join(test_image_dir, "*.tif"))
test_images_np = []
for i in range(1, 20):
image_path= test_images_fps[i]
test_images_np.append(np.expand_dims(load_image_into_numpy_array(image_path), axis=0))
# Again, uncomment this decorator if you want to run inference eagerly
@tf.function
def detect(input_tensor):
"""Run detection on an input image.
Args:
input_tensor: A [1, height, width, 3] Tensor of type tf.float32.
Note that height and width can be anything since the image will be
immediately resized according to the needs of the model within this
function.
Returns:
A dict containing 3 Tensors (`detection_boxes`, `detection_classes`,
and `detection_scores`).
"""
preprocessed_image, shapes = detection_model.preprocess(input_tensor)
prediction_dict = detection_model.predict(preprocessed_image, shapes)
return detection_model.postprocess(prediction_dict, shapes)
# Note that the first frame will trigger tracing of the tf.function, which will
# take some time, after which inference should be fast.
label_id_offset = 1
for i in range(len(test_images_np)):
input_tensor = tf.convert_to_tensor(test_images_np[i], dtype=tf.float32)
detections = detect(input_tensor)
plot_detections(
test_images_np[i][0],
detections['detection_boxes'][0].numpy(),
detections['detection_classes'][0].numpy().astype(np.uint32) + label_id_offset,
detections['detection_scores'][0].numpy(),
category_index, figsize=(15, 20), image_name="test_image_output.jpg")
break
print(detections['detection_boxes'][0].numpy().shape)
print(detections['detection_scores'][0])
Thanks!
Alex


