Source code for docarray.document.mixins.convert
from typing import Optional, TYPE_CHECKING
import numpy as np
from docarray.document.mixins.helper import _uri_to_blob, _to_datauri, _is_datauri
if TYPE_CHECKING: # pragma: no cover
from docarray.typing import T
[docs]class ConvertMixin:
"""Provide helper functions for :class:`Document` to support conversion between :attr:`.tensor`, :attr:`.text`
and :attr:`.blob`."""
[docs] def convert_blob_to_tensor(
self: 'T', dtype: Optional[str] = None, count: int = -1, offset: int = 0
) -> 'T':
"""Assuming the :attr:`blob` is a _valid_ buffer of Numpy ndarray,
set :attr:`tensor` accordingly.
:param dtype: Data-type of the returned array; default: float.
:param count: Number of items to read. ``-1`` means all data in the buffer.
:param offset: Start reading the buffer from this offset (in bytes); default: 0.
:return: itself after processed
"""
self.tensor = np.frombuffer(self.blob, dtype=dtype, count=count, offset=offset)
return self
[docs] def convert_tensor_to_blob(self: 'T') -> 'T':
"""Convert :attr:`.tensor` to :attr:`.blob` inplace.
:return: itself after processed
"""
self.blob = self.tensor.tobytes()
return self
[docs] def convert_uri_to_datauri(
self: 'T', charset: str = 'utf-8', base64: bool = False
) -> 'T':
"""Convert :attr:`.uri` to dataURI and store it in :attr:`.uri` inplace.
:param charset: charset may be any character set registered with IANA
:param base64: used to encode arbitrary octet sequences into a form that satisfies the rules of 7bit. Designed to be efficient for non-text 8 bit and binary data. Sometimes used for text data that frequently uses non-US-ASCII characters.
:return: itself after processed
"""
if not _is_datauri(self.uri):
blob = _uri_to_blob(self.uri)
self.uri = _to_datauri(self.mime_type, blob, charset, base64, binary=True)
return self