Source code for docarray.document.mixins.video

import threading
import time
from typing import (
    Union,
    BinaryIO,
    TYPE_CHECKING,
    Generator,
    Type,
    Dict,
    Optional,
    Tuple,
)

import numpy as np

if TYPE_CHECKING:  # pragma: no cover
    from docarray.typing import T
    from docarray import Document


[docs]class VideoDataMixin: """Provide helper functions for :class:`Document` to support video data."""
[docs] @classmethod def generator_from_webcam( cls: Type['T'], height_width: Optional[Tuple[int, int]] = None, show_window: bool = True, window_title: str = 'webcam', fps: int = 30, exit_key: int = 27, exit_event=None, tags: Optional[Dict] = None, ) -> Generator['T', None, None]: """ Create a generator that yields a :class:`Document` object from the webcam. This feature requires the `opencv-python` package. :param height_width: the shape of the video frame, if not provided, the shape will be determined from the first frame. Note that this is restricted by the hardware of the camera. :param show_window: if to show preview window of the webcam video :param window_title: the window title of the preview window :param fps: expected frames per second, note that this is not guaranteed, as the actual fps depends on the hardware limit :param exit_key: the key to press to exit the preview window :param exit_event: the multiprocessing/threading/asyncio event that once set to exit the preview window :param tags: the tags to attach to the document :return: a generator that yields a :class:`Document` object from a webcam """ import cv2 if exit_event is None: exit_event = threading.Event() vc = cv2.VideoCapture(0) prev_frame_time = time.perf_counter() actual_fps = 0 try: while not exit_event.is_set(): rval, frame = vc.read() d = cls(tensor=frame, tags=tags) # type: Document if height_width: d.set_image_tensor_shape(height_width) yield d key = cv2.waitKey(1000 // (fps + fps - actual_fps)) if show_window: new_frame_time = time.perf_counter() actual_fps = int(1 / (new_frame_time - prev_frame_time)) prev_frame_time = new_frame_time # converting the fps into integer # putting the FPS count on the frame cv2.putText( d.tensor, f'FPS {actual_fps:0.0f}/{fps}', (7, 70), cv2.FONT_HERSHEY_SIMPLEX, 3, (255, 255, 255), 3, cv2.LINE_AA, ) # displaying the frame with fps cv2.imshow(window_title, d.tensor) if key == exit_key or not rval: break finally: vc.release() if show_window: cv2.destroyWindow(window_title)
[docs] def load_uri_to_video_tensor( self: 'T', only_keyframes: bool = False, **kwargs ) -> 'T': """Convert a :attr:`.uri` to a video ndarray :attr:`.tensor`. :param only_keyframes: if True keep only the keyframes, if False keep all frames and store the indices of the keyframes in :attr:`.tags` :param kwargs: supports all keyword arguments that are being supported by av.open() as described in: https://pyav.org/docs/stable/api/_globals.html?highlight=open#av.open :return: Document itself after processed """ import av with av.open(self.uri, **kwargs) as container: if only_keyframes: stream = container.streams.video[0] stream.codec_context.skip_frame = 'NONKEY' frames = [] keyframe_indices = [] for i, frame in enumerate(container.decode(video=0)): img = frame.to_image() frames.append(img) if not only_keyframes and frame.key_frame == 1: keyframe_indices.append(i) self.tensor = np.moveaxis(np.stack(frames), 1, 2) if not only_keyframes: self.tags['keyframe_indices'] = keyframe_indices return self
[docs] def save_video_tensor_to_file( self: 'T', file: Union[str, BinaryIO], frame_rate: int = 30, codec: str = 'h264' ) -> 'T': """Save :attr:`.tensor` as a video mp4/h264 file. :param file: The file to open, which can be either a string or a file-like object. :param frame_rate: frames per second :param codec: the name of a decoder/encoder :return: itself after processed """ if ( self.tensor.ndim != 4 or self.tensor.shape[-1] != 3 or self.tensor.dtype != np.uint8 ): raise ValueError( f'expects `.tensor` with dtype=uint8 and ndim=4 and the last dimension is 3, ' f'but receiving {self.tensor.shape} in {self.tensor.dtype}' ) video_tensor = np.moveaxis(np.clip(self.tensor, 0, 255), 1, 2) import av with av.open(file, mode='w') as container: stream = container.add_stream(codec, rate=frame_rate) stream.width = self.tensor.shape[1] stream.height = self.tensor.shape[2] stream.pix_fmt = 'yuv420p' for b in video_tensor: frame = av.VideoFrame.from_ndarray(b, format='rgb24') for packet in stream.encode(frame): container.mux(packet) for packet in stream.encode(): container.mux(packet) return self