Video Key-frames Extraction Model
Import mozuma
modules
from mozuma.torch.datasets import (
LocalBinaryFilesDataset,
)
from mozuma.models.keyframes.pretrained import torch_keyframes_resnet_imagenet
from mozuma.models.keyframes.datasets import (
BinaryVideoCapture,
extract_video_frames,
)
from mozuma.callbacks.memory import CollectVideoFramesInMemory
from mozuma.torch.options import TorchRunnerOptions
from mozuma.torch.runners import TorchInferenceRunner
import torch
import os
Load a test video
Extract key-frames with torch_keyframes_resnet_imagenet
torch_device = torch.device("cpu")
# define model for keyframes extractor
model = torch_keyframes_resnet_imagenet("resnet18", device=torch_device)
features = CollectVideoFramesInMemory()
runner = TorchInferenceRunner(
model=model,
dataset=dataset,
callbacks=[features],
options=TorchRunnerOptions(
device=torch_device, data_loader_options={"batch_size": 1}, tqdm_enabled=True
),
)
runner.run()
Visualise the extracted key-frames
First install ipyplot
# Install a pip package in the current Jupyter kernel
import sys
!{sys.executable} -m pip install ipyplot
Then extract the selected key-frames from the video
with open(
os.path.join("../../tests", "fixtures", "video", "test.mp4"), mode="rb"
) as video_file:
with BinaryVideoCapture(video_file) as capture:
video_frames = dict(extract_video_frames(capture))
frame_positions = sorted(kf for kf in features.frames[0].frame_indices)
selected_frames = [video_frames[i] for i in frame_positions]
Finally, display the frames