Object Detection with VinVL
Import mozuma
modules
from mozuma.torch.options import TorchRunnerOptions
from mozuma.torch.runners import TorchInferenceRunner
from mozuma.callbacks.memory import (
CollectBoundingBoxesInMemory,
)
from mozuma.helpers.files import list_files_in_dir
from mozuma.torch.datasets import LocalBinaryFilesDataset, ImageDataset
from mozuma.models.vinvl.pretrained import torch_vinvl_detector
import torch
import matplotlib.pyplot as plt
from matplotlib.patches import Rectangle
from PIL import Image
import os
%matplotlib inline
Load images
base_path = os.path.join("../../tests", "fixtures", "objects")
file_names = list_files_in_dir(base_path, allowed_extensions=("jpg",))[:50]
dataset = ImageDataset(LocalBinaryFilesDataset(file_names))
Run object detection with torch_vinvl_detector
# Load VinVL model (it might take a few minutes.)
torch_device = torch.device("cuda")
vinvl = torch_vinvl_detector(device=torch_device, score_threshold=0.5)
bb = CollectBoundingBoxesInMemory()
# Runner
runner = TorchInferenceRunner(
model=vinvl,
dataset=dataset,
callbacks=[bb],
options=TorchRunnerOptions(
device=torch_device, data_loader_options={"batch_size": 10}, tqdm_enabled=True
),
)
runner.run()
Visualise the detected objects
First get labels and attributes
for i, img_path in enumerate(bb.indices):
print(f"Object detected for {img_path}")
img = Image.open(img_path).convert("RGB")
plt.figure()
plt.imshow(img)
bboxes = bb.bounding_boxes[i].bounding_boxes
scores = bb.bounding_boxes[i].scores
for k, bbox in enumerate(bboxes):
bbox0, bbox1, bbox2, bbox3 = bbox
plt.gca().add_patch(
Rectangle(
(bbox0, bbox1),
bbox2 - bbox0,
bbox3 - bbox1,
fill=False,
edgecolor="red",
linewidth=2,
alpha=0.5,
)
)
plt.text(bbox0, bbox1, f"{scores[k]*100:.1f}%", color="blue", fontsize=12)