from typing import List, Sequence, TYPE_CHECKING, Optional, Union
from docarray.math.ndarray import ravel, unravel
if TYPE_CHECKING: # pragma: no cover
from docarray.typing import ArrayType, DocumentContentType
[docs]class ContentPropertyMixin:
"""Helpers that provide faster getter & setter for :attr:`.content` by using protobuf directly."""
def _check_length(self, target_len: int):
if target_len != len(self):
raise ValueError(
f'Length must match {self!r}, but {target_len} != {len(self)}'
)
@property
def embeddings(self) -> Optional['ArrayType']:
"""Return a :class:`ArrayType` stacking all the `embedding` attributes as rows.
:return: a :class:`ArrayType` of embedding
"""
if self:
return unravel(self, 'embedding')
@embeddings.setter
def embeddings(self, value: 'ArrayType'):
"""Set the :attr:`.embedding` of the Documents.
To remove all embeddings of all Documents:
.. highlight:: python
.. code-block:: python
da.embeddings = None
:param value: The embedding matrix to set
"""
if value is None:
self[:, 'embedding'] = [None] * len(self)
else:
emb_shape0 = _get_len(value)
self._check_length(emb_shape0)
ravel(value, self, 'embedding')
@property
def tensors(self) -> Optional['ArrayType']:
"""Return a :class:`ArrayType` stacking all :attr:`.tensor`.
The `tensor` attributes are stacked together along a newly created first
dimension (as if you would stack using ``np.stack(X, axis=0)``).
.. warning:: This operation assumes all tensors have the same shape and dtype.
All dtype and shape values are assumed to be equal to the values of the
first element in the DocumentArray
:return: a :class:`ArrayType` of tensors
"""
if self:
return unravel(self, 'tensor')
@tensors.setter
def tensors(self, value: 'ArrayType'):
"""Set :attr:`.tensor` of the Documents. To clear all :attr:`tensor`, set it to ``None``.
:param value: The tensor array to set. The first axis is the "row" axis.
"""
if value is None:
self[:, 'tensor'] = [None] * len(self)
else:
tensors_shape0 = _get_len(value)
self._check_length(tensors_shape0)
ravel(value, self, 'tensor')
@property
def texts(self) -> Optional[List[str]]:
"""Get :attr:`.text` of all Documents
:return: a list of texts
"""
if self:
return [d.text for d in self]
@texts.setter
def texts(self, value: Sequence[str]):
"""Set :attr:`.text` for all Documents. To clear all :attr:`text`, set it to ``None``.
:param value: A sequence of texts to set, should be the same length as the
number of Documents
"""
if value is None:
self[:, 'text'] = [None] * len(self)
else:
self._check_length(len(value))
self[:, 'text'] = value
@property
def blobs(self) -> Optional[List[bytes]]:
"""Get the blob attribute of all Documents.
:return: a list of blobs
"""
if self:
return [d.blob for d in self]
@blobs.setter
def blobs(self, value: List[bytes]):
"""Set the blob attribute for all Documents. To clear all :attr:`blob`, set it to ``None``.
:param value: A sequence of blob to set, should be the same length as the
number of Documents
"""
if value is None:
self[:, 'blob'] = [None] * len(self)
else:
self._check_length(len(value))
for doc, blob in zip(self, value):
self[doc.id, 'blob'] = blob
@property
def contents(self) -> Optional[Union[Sequence['DocumentContentType'], 'ArrayType']]:
"""Get the :attr:`.content` of all Documents.
:return: a list of texts, blobs or :class:`ArrayType`
"""
if self:
content_type = self[0].content_type or self[-1].content_type
if content_type:
return getattr(self, f'{content_type}s')
@contents.setter
def contents(
self, value: Sequence[Union[Sequence['DocumentContentType'], 'ArrayType']]
):
"""Set the :attr:`.content` of all Documents.
:param value: a list of texts, blobs or :class:`ArrayType`. If the value is a two-element tuple,
then the second element is used as the :attr:`.content_type`
"""
if self:
if isinstance(value, tuple) and len(value) == 2:
content_type = value[1]
else:
content_type = self[0].content_type or self[-1].content_type
if content_type:
setattr(self, f'{content_type}s', value)
def _get_len(value):
return len(value) if isinstance(value, (list, tuple)) else value.shape[0]