Skip to content

Tensor

docarray.typing.tensor.abstract_tensor

AbstractTensor

Bases: Generic[TTensor, T], AbstractType, ABC, Sized

Source code in docarray/typing/tensor/abstract_tensor.py
class AbstractTensor(Generic[TTensor, T], AbstractType, ABC, Sized):
    __parametrized_meta__: type = _ParametrizedMeta
    __unparametrizedcls__: Optional[Type['AbstractTensor']] = None
    __docarray_target_shape__: Optional[Tuple[int, ...]] = None
    _proto_type_name: str

    def _to_node_protobuf(self: T) -> 'NodeProto':
        """Convert itself into a NodeProto protobuf message. This function should
        be called when the Document is nested into another Document that need to be
        converted into a protobuf
        :return: the nested item protobuf message
        """
        from docarray.proto import NodeProto

        nd_proto = self.to_protobuf()
        return NodeProto(ndarray=nd_proto, type=self._proto_type_name)

    @classmethod
    def __docarray_validate_shape__(cls, t: T, shape: Tuple[Union[int, str], ...]) -> T:
        """Every tensor has to implement this method in order to
        enable syntax of the form AnyTensor[shape].
        It is called when a tensor is assigned to a field of this type.
        i.e. when a tensor is passed to a Document field of type AnyTensor[shape].

        The intended behaviour is as follows:

        - If the shape of `t` is equal to `shape`, return `t`.
        - If the shape of `t` is not equal to `shape`,
            but can be reshaped to `shape`, return `t` reshaped to `shape`.
        - If the shape of `t` is not equal to `shape`
            and cannot be reshaped to `shape`, raise a ValueError.

        :param t: The tensor to validate.
        :param shape: The shape to validate against.
        :return: The validated tensor.
        """
        comp_be = t.get_comp_backend()
        tshape = comp_be.shape(t)
        if tshape == shape:
            return t
        elif any(isinstance(dim, str) or dim == Ellipsis for dim in shape):
            ellipsis_occurrences = [
                pos for pos, dim in enumerate(shape) if dim == Ellipsis
            ]
            if ellipsis_occurrences:
                if len(ellipsis_occurrences) > 1:
                    raise ValueError(
                        f'Cannot use Ellipsis (...) more than once for the shape {shape}'
                    )
                ellipsis_pos = ellipsis_occurrences[0]
                # Calculate how many dimensions to add. Should be at least 1.
                dimensions_needed = max(len(tshape) - len(shape) + 1, 1)
                shape = (
                    shape[:ellipsis_pos]
                    + tuple(
                        f'__dim_var_{index}__' for index in range(dimensions_needed)
                    )
                    + shape[ellipsis_pos + 1 :]
                )

            if len(tshape) != len(shape):
                raise ValueError(
                    f'Tensor shape mismatch. Expected {shape}, got {tshape}'
                )
            known_dims: Dict[str, int] = {}
            for tdim, dim in zip(tshape, shape):
                if isinstance(dim, int) and tdim != dim:
                    raise ValueError(
                        f'Tensor shape mismatch. Expected {shape}, got {tshape}'
                    )
                elif isinstance(dim, str):
                    if dim in known_dims and known_dims[dim] != tdim:
                        raise ValueError(
                            f'Tensor shape mismatch. Expected {shape}, got {tshape}'
                        )
                    else:
                        known_dims[dim] = tdim
            else:
                return t
        else:
            shape = cast(Tuple[int], shape)
            warnings.warn(
                f'Tensor shape mismatch. Reshaping tensor '
                f'of shape {tshape} to shape {shape}'
            )
            try:
                value = cls._docarray_from_native(comp_be.reshape(t, shape))
                return cast(T, value)
            except RuntimeError:
                raise ValueError(
                    f'Cannot reshape tensor of shape {tshape} to shape {shape}'
                )

    @classmethod
    def __docarray_validate_getitem__(cls, item: Any) -> Tuple[int]:
        """This method validates the input to `AbstractTensor.__class_getitem__`.

        It is called at "class creation time",
        i.e. when a class is created with syntax of the form AnyTensor[shape].

        The default implementation tries to cast any `item` to a tuple of ints.
        A subclass can override this method to implement custom validation logic.

        The output of this is eventually passed to
        [`AbstractTensor.__docarray_validate_shape__`]
        [docarray.typing.tensor.abstract_tensor.AbstractTensor.__docarray_validate_shape__]
        as its `shape` argument.

        Raises `ValueError` if the input `item` does not pass validation.

        :param item: The item to validate, passed to `__class_getitem__` (`Tensor[item]`).
        :return: The validated item == the target shape of this tensor.
        """
        if isinstance(item, int):
            item = (item,)
        try:
            item = tuple(item)
        except TypeError:
            raise TypeError(f'{item} is not a valid tensor shape.')
        return item

    if is_pydantic_v2:

        @classmethod
        def __get_pydantic_json_schema__(
            cls, core_schema: CoreSchema, handler: GetJsonSchemaHandler
        ) -> Dict[str, Any]:
            json_schema = {}
            json_schema.update(type='array', items={'type': 'number'})
            if cls.__docarray_target_shape__ is not None:
                shape_info = (
                    '['
                    + ', '.join([str(s) for s in cls.__docarray_target_shape__])
                    + ']'
                )
                if (
                    reduce(mul, cls.__docarray_target_shape__, 1)
                    <= DISPLAY_TENSOR_OPENAPI_MAX_ITEMS
                ):
                    # custom example only for 'small' shapes, otherwise it is too big to display
                    example_payload = orjson_dumps(
                        np.zeros(cls.__docarray_target_shape__)
                    ).decode()
                    json_schema.update(example=example_payload)
            else:
                shape_info = 'not specified'
            json_schema['tensor/array shape'] = shape_info
            return json_schema

    else:

        @classmethod
        def __modify_schema__(cls, field_schema: Dict[str, Any]) -> None:
            field_schema.update(type='array', items={'type': 'number'})
            if cls.__docarray_target_shape__ is not None:
                shape_info = (
                    '['
                    + ', '.join([str(s) for s in cls.__docarray_target_shape__])
                    + ']'
                )
                if (
                    reduce(mul, cls.__docarray_target_shape__, 1)
                    <= DISPLAY_TENSOR_OPENAPI_MAX_ITEMS
                ):
                    # custom example only for 'small' shapes, otherwise it is too big to display
                    example_payload = orjson_dumps(
                        np.zeros(cls.__docarray_target_shape__)
                    ).decode()
                    field_schema.update(example=example_payload)
            else:
                shape_info = 'not specified'
            field_schema['tensor/array shape'] = shape_info

    @classmethod
    def _docarray_create_parametrized_type(cls: Type[T], shape: Tuple[int]):
        shape_str = ', '.join([str(s) for s in shape])

        class _ParametrizedTensor(
            cls,  # type: ignore
            metaclass=cls.__parametrized_meta__,  # type: ignore
        ):
            __unparametrizedcls__ = cls
            __docarray_target_shape__ = shape

            @classmethod
            def _docarray_validate(
                _cls,
                value: Any,
            ):
                t = super()._docarray_validate(value)
                return _cls.__docarray_validate_shape__(
                    t, _cls.__docarray_target_shape__
                )

        _ParametrizedTensor.__name__ = f'{cls.__name__}[{shape_str}]'
        _ParametrizedTensor.__qualname__ = f'{cls.__qualname__}[{shape_str}]'

        return _ParametrizedTensor

    def __class_getitem__(cls, item: Any):
        target_shape = cls.__docarray_validate_getitem__(item)
        return cls._docarray_create_parametrized_type(target_shape)

    @classmethod
    def _docarray_stack(cls: Type[T], seq: Union[List[T], Tuple[T]]) -> T:
        """Stack a sequence of tensors into a single tensor."""
        comp_backend = cls.get_comp_backend()
        # at runtime, 'T' is always the correct input type for .stack()
        # but mypy doesn't know that, so we ignore it here
        return cls._docarray_from_native(comp_backend.stack(seq))  # type: ignore

    @classmethod
    @abc.abstractmethod
    def _docarray_from_native(cls: Type[T], value: Any) -> T:
        """
        Create a DocList tensor from a tensor that is native to the given framework,
        e.g. from numpy.ndarray or torch.Tensor.
        """
        ...

    @staticmethod
    @abc.abstractmethod
    def get_comp_backend() -> AbstractComputationalBackend:
        """The computational backend compatible with this tensor type."""
        ...

    @abc.abstractmethod
    def __getitem__(self: T, item) -> T:
        """Get a slice of this tensor."""
        ...

    @abc.abstractmethod
    def __setitem__(self, index, value):
        """Set a slice of this tensor."""
        ...

    @abc.abstractmethod
    def __iter__(self):
        """Iterate over the elements of this tensor."""
        ...

    @abc.abstractmethod
    def to_protobuf(self) -> 'NdArrayProto':
        """Convert DocList into a Protobuf message"""
        ...

    def unwrap(self):
        """Return the native tensor object that this DocList tensor wraps."""

    @abc.abstractmethod
    def _docarray_to_json_compatible(self):
        """
        Convert tensor into a json compatible object
        :return: a representation of the tensor compatible with orjson
        """
        return self

    @classmethod
    @abc.abstractmethod
    def _docarray_from_ndarray(cls: Type[T], value: np.ndarray) -> T:
        """Create a `tensor from a numpy array
        PS: this function is different from `from_ndarray` because it is private under the docarray namesapce.
        This allows us to avoid breaking change if one day we introduce a Tensor backend with a `from_ndarray` method.
        """
        ...

    @abc.abstractmethod
    def _docarray_to_ndarray(self) -> np.ndarray:
        """cast itself to a numpy array"""
        ...

    if is_pydantic_v2:

        @classmethod
        def __get_pydantic_core_schema__(
            cls, _source_type: Any, handler: GetCoreSchemaHandler
        ) -> core_schema.CoreSchema:
            return core_schema.general_plain_validator_function(
                cls.validate,
                serialization=core_schema.plain_serializer_function_ser_schema(
                    function=orjson_dumps,
                    return_schema=handler.generate_schema(bytes),
                    when_used="json-unless-none",
                ),
            )

__docarray_validate_getitem__(item) classmethod

This method validates the input to AbstractTensor.__class_getitem__.

It is called at "class creation time", i.e. when a class is created with syntax of the form AnyTensor[shape].

The default implementation tries to cast any item to a tuple of ints. A subclass can override this method to implement custom validation logic.

The output of this is eventually passed to AbstractTensor.__docarray_validate_shape__ as its shape argument.

Raises ValueError if the input item does not pass validation.

Parameters:

Name Type Description Default
item Any

The item to validate, passed to __class_getitem__ (Tensor[item]).

required

Returns:

Type Description
Tuple[int]

The validated item == the target shape of this tensor.

Source code in docarray/typing/tensor/abstract_tensor.py
@classmethod
def __docarray_validate_getitem__(cls, item: Any) -> Tuple[int]:
    """This method validates the input to `AbstractTensor.__class_getitem__`.

    It is called at "class creation time",
    i.e. when a class is created with syntax of the form AnyTensor[shape].

    The default implementation tries to cast any `item` to a tuple of ints.
    A subclass can override this method to implement custom validation logic.

    The output of this is eventually passed to
    [`AbstractTensor.__docarray_validate_shape__`]
    [docarray.typing.tensor.abstract_tensor.AbstractTensor.__docarray_validate_shape__]
    as its `shape` argument.

    Raises `ValueError` if the input `item` does not pass validation.

    :param item: The item to validate, passed to `__class_getitem__` (`Tensor[item]`).
    :return: The validated item == the target shape of this tensor.
    """
    if isinstance(item, int):
        item = (item,)
    try:
        item = tuple(item)
    except TypeError:
        raise TypeError(f'{item} is not a valid tensor shape.')
    return item

__docarray_validate_shape__(t, shape) classmethod

Every tensor has to implement this method in order to enable syntax of the form AnyTensor[shape]. It is called when a tensor is assigned to a field of this type. i.e. when a tensor is passed to a Document field of type AnyTensor[shape].

The intended behaviour is as follows:

  • If the shape of t is equal to shape, return t.
  • If the shape of t is not equal to shape, but can be reshaped to shape, return t reshaped to shape.
  • If the shape of t is not equal to shape and cannot be reshaped to shape, raise a ValueError.

Parameters:

Name Type Description Default
t T

The tensor to validate.

required
shape Tuple[Union[int, str], ...]

The shape to validate against.

required

Returns:

Type Description
T

The validated tensor.

Source code in docarray/typing/tensor/abstract_tensor.py
@classmethod
def __docarray_validate_shape__(cls, t: T, shape: Tuple[Union[int, str], ...]) -> T:
    """Every tensor has to implement this method in order to
    enable syntax of the form AnyTensor[shape].
    It is called when a tensor is assigned to a field of this type.
    i.e. when a tensor is passed to a Document field of type AnyTensor[shape].

    The intended behaviour is as follows:

    - If the shape of `t` is equal to `shape`, return `t`.
    - If the shape of `t` is not equal to `shape`,
        but can be reshaped to `shape`, return `t` reshaped to `shape`.
    - If the shape of `t` is not equal to `shape`
        and cannot be reshaped to `shape`, raise a ValueError.

    :param t: The tensor to validate.
    :param shape: The shape to validate against.
    :return: The validated tensor.
    """
    comp_be = t.get_comp_backend()
    tshape = comp_be.shape(t)
    if tshape == shape:
        return t
    elif any(isinstance(dim, str) or dim == Ellipsis for dim in shape):
        ellipsis_occurrences = [
            pos for pos, dim in enumerate(shape) if dim == Ellipsis
        ]
        if ellipsis_occurrences:
            if len(ellipsis_occurrences) > 1:
                raise ValueError(
                    f'Cannot use Ellipsis (...) more than once for the shape {shape}'
                )
            ellipsis_pos = ellipsis_occurrences[0]
            # Calculate how many dimensions to add. Should be at least 1.
            dimensions_needed = max(len(tshape) - len(shape) + 1, 1)
            shape = (
                shape[:ellipsis_pos]
                + tuple(
                    f'__dim_var_{index}__' for index in range(dimensions_needed)
                )
                + shape[ellipsis_pos + 1 :]
            )

        if len(tshape) != len(shape):
            raise ValueError(
                f'Tensor shape mismatch. Expected {shape}, got {tshape}'
            )
        known_dims: Dict[str, int] = {}
        for tdim, dim in zip(tshape, shape):
            if isinstance(dim, int) and tdim != dim:
                raise ValueError(
                    f'Tensor shape mismatch. Expected {shape}, got {tshape}'
                )
            elif isinstance(dim, str):
                if dim in known_dims and known_dims[dim] != tdim:
                    raise ValueError(
                        f'Tensor shape mismatch. Expected {shape}, got {tshape}'
                    )
                else:
                    known_dims[dim] = tdim
        else:
            return t
    else:
        shape = cast(Tuple[int], shape)
        warnings.warn(
            f'Tensor shape mismatch. Reshaping tensor '
            f'of shape {tshape} to shape {shape}'
        )
        try:
            value = cls._docarray_from_native(comp_be.reshape(t, shape))
            return cast(T, value)
        except RuntimeError:
            raise ValueError(
                f'Cannot reshape tensor of shape {tshape} to shape {shape}'
            )

__getitem__(item) abstractmethod

Get a slice of this tensor.

Source code in docarray/typing/tensor/abstract_tensor.py
@abc.abstractmethod
def __getitem__(self: T, item) -> T:
    """Get a slice of this tensor."""
    ...

__iter__() abstractmethod

Iterate over the elements of this tensor.

Source code in docarray/typing/tensor/abstract_tensor.py
@abc.abstractmethod
def __iter__(self):
    """Iterate over the elements of this tensor."""
    ...

__setitem__(index, value) abstractmethod

Set a slice of this tensor.

Source code in docarray/typing/tensor/abstract_tensor.py
@abc.abstractmethod
def __setitem__(self, index, value):
    """Set a slice of this tensor."""
    ...

get_comp_backend() abstractmethod staticmethod

The computational backend compatible with this tensor type.

Source code in docarray/typing/tensor/abstract_tensor.py
@staticmethod
@abc.abstractmethod
def get_comp_backend() -> AbstractComputationalBackend:
    """The computational backend compatible with this tensor type."""
    ...

to_protobuf() abstractmethod

Convert DocList into a Protobuf message

Source code in docarray/typing/tensor/abstract_tensor.py
@abc.abstractmethod
def to_protobuf(self) -> 'NdArrayProto':
    """Convert DocList into a Protobuf message"""
    ...

unwrap()

Return the native tensor object that this DocList tensor wraps.

Source code in docarray/typing/tensor/abstract_tensor.py
def unwrap(self):
    """Return the native tensor object that this DocList tensor wraps."""

docarray.typing.tensor.ndarray

NdArray

Bases: ndarray, AbstractTensor, Generic[ShapeT]

Subclass of np.ndarray, intended for use in a Document. This enables (de)serialization from/to protobuf and json, data validation, and coercion from compatible types like torch.Tensor.

This type can also be used in a parametrized way, specifying the shape of the array.


from docarray import BaseDoc
from docarray.typing import NdArray
import numpy as np


class MyDoc(BaseDoc):
    arr: NdArray
    image_arr: NdArray[3, 224, 224]
    square_crop: NdArray[3, 'x', 'x']
    random_image: NdArray[3, ...]  # first dimension is fixed, can have arbitrary shape


# create a document with tensors
doc = MyDoc(
    arr=np.zeros((128,)),
    image_arr=np.zeros((3, 224, 224)),
    square_crop=np.zeros((3, 64, 64)),
    random_image=np.zeros((3, 128, 256)),
)
assert doc.image_arr.shape == (3, 224, 224)

# automatic shape conversion
doc = MyDoc(
    arr=np.zeros((128,)),
    image_arr=np.zeros((224, 224, 3)),  # will reshape to (3, 224, 224)
    square_crop=np.zeros((3, 128, 128)),
    random_image=np.zeros((3, 64, 128)),
)
assert doc.image_arr.shape == (3, 224, 224)

# !! The following will raise an error due to shape mismatch !!
from pydantic import ValidationError

try:
    doc = MyDoc(
        arr=np.zeros((128,)),
        image_arr=np.zeros((224, 224)),  # this will fail validation
        square_crop=np.zeros((3, 128, 64)),  # this will also fail validation
        random_image=np.zeros((4, 64, 128)),  # this will also fail validation
    )
except ValidationError as e:
    pass

Source code in docarray/typing/tensor/ndarray.py
@_register_proto(proto_type_name='ndarray')
class NdArray(np.ndarray, AbstractTensor, Generic[ShapeT]):
    """
    Subclass of `np.ndarray`, intended for use in a Document.
    This enables (de)serialization from/to protobuf and json, data validation,
    and coercion from compatible types like `torch.Tensor`.

    This type can also be used in a parametrized way, specifying the shape of the array.

    ---

    ```python
    from docarray import BaseDoc
    from docarray.typing import NdArray
    import numpy as np


    class MyDoc(BaseDoc):
        arr: NdArray
        image_arr: NdArray[3, 224, 224]
        square_crop: NdArray[3, 'x', 'x']
        random_image: NdArray[3, ...]  # first dimension is fixed, can have arbitrary shape


    # create a document with tensors
    doc = MyDoc(
        arr=np.zeros((128,)),
        image_arr=np.zeros((3, 224, 224)),
        square_crop=np.zeros((3, 64, 64)),
        random_image=np.zeros((3, 128, 256)),
    )
    assert doc.image_arr.shape == (3, 224, 224)

    # automatic shape conversion
    doc = MyDoc(
        arr=np.zeros((128,)),
        image_arr=np.zeros((224, 224, 3)),  # will reshape to (3, 224, 224)
        square_crop=np.zeros((3, 128, 128)),
        random_image=np.zeros((3, 64, 128)),
    )
    assert doc.image_arr.shape == (3, 224, 224)

    # !! The following will raise an error due to shape mismatch !!
    from pydantic import ValidationError

    try:
        doc = MyDoc(
            arr=np.zeros((128,)),
            image_arr=np.zeros((224, 224)),  # this will fail validation
            square_crop=np.zeros((3, 128, 64)),  # this will also fail validation
            random_image=np.zeros((4, 64, 128)),  # this will also fail validation
        )
    except ValidationError as e:
        pass
    ```

    ---
    """

    __parametrized_meta__ = metaNumpy

    @classmethod
    def _docarray_validate(
        cls: Type[T],
        value: Union[T, np.ndarray, str, List[Any], Tuple[Any], Any],
    ) -> T:

        if isinstance(value, str):
            value = orjson.loads(value)

        if isinstance(value, np.ndarray):
            return cls._docarray_from_native(value)
        elif isinstance(value, NdArray):
            return cast(T, value)
        elif isinstance(value, AbstractTensor):
            return cls._docarray_from_native(value._docarray_to_ndarray())
        elif torch_available and isinstance(value, torch.Tensor):
            return cls._docarray_from_native(value.detach().cpu().numpy())
        elif tf_available and isinstance(value, tf.Tensor):
            return cls._docarray_from_native(value.numpy())

        elif jax_available and isinstance(value, jnp.ndarray):
            return cls._docarray_from_native(value.__array__())
        elif isinstance(value, list) or isinstance(value, tuple):
            try:
                arr_from_list: np.ndarray = np.asarray(value)
                return cls._docarray_from_native(arr_from_list)
            except Exception:
                pass  # handled below
        try:
            arr: np.ndarray = np.ndarray(value)
            return cls._docarray_from_native(arr)
        except Exception:
            pass  # handled below
        raise ValueError(f'Expected a numpy.ndarray compatible type, got {type(value)}')

    @classmethod
    def _docarray_from_native(cls: Type[T], value: np.ndarray) -> T:
        if cls.__unparametrizedcls__:  # This is not None if the tensor is parametrized
            return cast(T, value.view(cls.__unparametrizedcls__))
        return value.view(cls)

    def _docarray_to_json_compatible(self) -> np.ndarray:
        """
        Convert `NdArray` into a json compatible object
        :return: a representation of the tensor compatible with orjson
        """
        return self.unwrap()

    def unwrap(self) -> np.ndarray:
        """
        Return the original ndarray without any memory copy.

        The original view rest intact and is still a Document `NdArray`
        but the return object is a pure `np.ndarray` but both object share
        the same memory layout.

        ---

        ```python
        from docarray.typing import NdArray
        import numpy as np
        from pydantic import parse_obj_as

        t1 = parse_obj_as(NdArray, np.zeros((3, 224, 224)))
        t2 = t1.unwrap()
        # here t2 is a pure np.ndarray but t1 is still a Docarray NdArray
        # But both share the same underlying memory
        ```

        ---

        :return: a `numpy.ndarray`
        """
        return self.view(np.ndarray)

    @classmethod
    def from_protobuf(cls: Type[T], pb_msg: 'NdArrayProto') -> 'T':
        """
        Read ndarray from a proto msg
        :param pb_msg:
        :return: a numpy array
        """
        source = pb_msg.dense
        if source.buffer:
            x = np.frombuffer(bytearray(source.buffer), dtype=source.dtype)
            return cls._docarray_from_native(x.reshape(source.shape))
        elif len(source.shape) > 0:
            return cls._docarray_from_native(np.zeros(source.shape))
        else:
            raise ValueError(f'proto message {pb_msg} cannot be cast to a NdArray')

    def to_protobuf(self) -> 'NdArrayProto':
        """
        Transform self into a NdArrayProto protobuf message
        """
        from docarray.proto import NdArrayProto

        nd_proto = NdArrayProto()

        nd_proto.dense.buffer = self.tobytes()
        nd_proto.dense.ClearField('shape')
        nd_proto.dense.shape.extend(list(self.shape))
        nd_proto.dense.dtype = self.dtype.str

        return nd_proto

    @staticmethod
    def get_comp_backend() -> 'NumpyCompBackend':
        """Return the computational backend of the tensor"""
        from docarray.computation.numpy_backend import NumpyCompBackend

        return NumpyCompBackend()

    def __class_getitem__(cls, item: Any, *args, **kwargs):
        # see here for mypy bug: https://github.com/python/mypy/issues/14123
        return AbstractTensor.__class_getitem__.__func__(cls, item)  # type: ignore

    @classmethod
    def _docarray_from_ndarray(cls: Type[T], value: np.ndarray) -> T:
        """Create a `tensor from a numpy array
        PS: this function is different from `from_ndarray` because it is private under the docarray namesapce.
        This allows us to avoid breaking change if one day we introduce a Tensor backend with a `from_ndarray` method.
        """
        return cls._docarray_from_native(value)

    def _docarray_to_ndarray(self) -> np.ndarray:
        """Create a `tensor from a numpy array
        PS: this function is different from `from_ndarray` because it is private under the docarray namesapce.
        This allows us to avoid breaking change if one day we introduce a Tensor backend with a `from_ndarray` method.
        """
        return self.unwrap()

__docarray_validate_getitem__(item) classmethod

This method validates the input to AbstractTensor.__class_getitem__.

It is called at "class creation time", i.e. when a class is created with syntax of the form AnyTensor[shape].

The default implementation tries to cast any item to a tuple of ints. A subclass can override this method to implement custom validation logic.

The output of this is eventually passed to AbstractTensor.__docarray_validate_shape__ as its shape argument.

Raises ValueError if the input item does not pass validation.

Parameters:

Name Type Description Default
item Any

The item to validate, passed to __class_getitem__ (Tensor[item]).

required

Returns:

Type Description
Tuple[int]

The validated item == the target shape of this tensor.

Source code in docarray/typing/tensor/abstract_tensor.py
@classmethod
def __docarray_validate_getitem__(cls, item: Any) -> Tuple[int]:
    """This method validates the input to `AbstractTensor.__class_getitem__`.

    It is called at "class creation time",
    i.e. when a class is created with syntax of the form AnyTensor[shape].

    The default implementation tries to cast any `item` to a tuple of ints.
    A subclass can override this method to implement custom validation logic.

    The output of this is eventually passed to
    [`AbstractTensor.__docarray_validate_shape__`]
    [docarray.typing.tensor.abstract_tensor.AbstractTensor.__docarray_validate_shape__]
    as its `shape` argument.

    Raises `ValueError` if the input `item` does not pass validation.

    :param item: The item to validate, passed to `__class_getitem__` (`Tensor[item]`).
    :return: The validated item == the target shape of this tensor.
    """
    if isinstance(item, int):
        item = (item,)
    try:
        item = tuple(item)
    except TypeError:
        raise TypeError(f'{item} is not a valid tensor shape.')
    return item

__docarray_validate_shape__(t, shape) classmethod

Every tensor has to implement this method in order to enable syntax of the form AnyTensor[shape]. It is called when a tensor is assigned to a field of this type. i.e. when a tensor is passed to a Document field of type AnyTensor[shape].

The intended behaviour is as follows:

  • If the shape of t is equal to shape, return t.
  • If the shape of t is not equal to shape, but can be reshaped to shape, return t reshaped to shape.
  • If the shape of t is not equal to shape and cannot be reshaped to shape, raise a ValueError.

Parameters:

Name Type Description Default
t T

The tensor to validate.

required
shape Tuple[Union[int, str], ...]

The shape to validate against.

required

Returns:

Type Description
T

The validated tensor.

Source code in docarray/typing/tensor/abstract_tensor.py
@classmethod
def __docarray_validate_shape__(cls, t: T, shape: Tuple[Union[int, str], ...]) -> T:
    """Every tensor has to implement this method in order to
    enable syntax of the form AnyTensor[shape].
    It is called when a tensor is assigned to a field of this type.
    i.e. when a tensor is passed to a Document field of type AnyTensor[shape].

    The intended behaviour is as follows:

    - If the shape of `t` is equal to `shape`, return `t`.
    - If the shape of `t` is not equal to `shape`,
        but can be reshaped to `shape`, return `t` reshaped to `shape`.
    - If the shape of `t` is not equal to `shape`
        and cannot be reshaped to `shape`, raise a ValueError.

    :param t: The tensor to validate.
    :param shape: The shape to validate against.
    :return: The validated tensor.
    """
    comp_be = t.get_comp_backend()
    tshape = comp_be.shape(t)
    if tshape == shape:
        return t
    elif any(isinstance(dim, str) or dim == Ellipsis for dim in shape):
        ellipsis_occurrences = [
            pos for pos, dim in enumerate(shape) if dim == Ellipsis
        ]
        if ellipsis_occurrences:
            if len(ellipsis_occurrences) > 1:
                raise ValueError(
                    f'Cannot use Ellipsis (...) more than once for the shape {shape}'
                )
            ellipsis_pos = ellipsis_occurrences[0]
            # Calculate how many dimensions to add. Should be at least 1.
            dimensions_needed = max(len(tshape) - len(shape) + 1, 1)
            shape = (
                shape[:ellipsis_pos]
                + tuple(
                    f'__dim_var_{index}__' for index in range(dimensions_needed)
                )
                + shape[ellipsis_pos + 1 :]
            )

        if len(tshape) != len(shape):
            raise ValueError(
                f'Tensor shape mismatch. Expected {shape}, got {tshape}'
            )
        known_dims: Dict[str, int] = {}
        for tdim, dim in zip(tshape, shape):
            if isinstance(dim, int) and tdim != dim:
                raise ValueError(
                    f'Tensor shape mismatch. Expected {shape}, got {tshape}'
                )
            elif isinstance(dim, str):
                if dim in known_dims and known_dims[dim] != tdim:
                    raise ValueError(
                        f'Tensor shape mismatch. Expected {shape}, got {tshape}'
                    )
                else:
                    known_dims[dim] = tdim
        else:
            return t
    else:
        shape = cast(Tuple[int], shape)
        warnings.warn(
            f'Tensor shape mismatch. Reshaping tensor '
            f'of shape {tshape} to shape {shape}'
        )
        try:
            value = cls._docarray_from_native(comp_be.reshape(t, shape))
            return cast(T, value)
        except RuntimeError:
            raise ValueError(
                f'Cannot reshape tensor of shape {tshape} to shape {shape}'
            )

__getitem__(item) abstractmethod

Get a slice of this tensor.

Source code in docarray/typing/tensor/abstract_tensor.py
@abc.abstractmethod
def __getitem__(self: T, item) -> T:
    """Get a slice of this tensor."""
    ...

__iter__() abstractmethod

Iterate over the elements of this tensor.

Source code in docarray/typing/tensor/abstract_tensor.py
@abc.abstractmethod
def __iter__(self):
    """Iterate over the elements of this tensor."""
    ...

__setitem__(index, value) abstractmethod

Set a slice of this tensor.

Source code in docarray/typing/tensor/abstract_tensor.py
@abc.abstractmethod
def __setitem__(self, index, value):
    """Set a slice of this tensor."""
    ...

from_protobuf(pb_msg) classmethod

Read ndarray from a proto msg

Parameters:

Name Type Description Default
pb_msg NdArrayProto
required

Returns:

Type Description
T

a numpy array

Source code in docarray/typing/tensor/ndarray.py
@classmethod
def from_protobuf(cls: Type[T], pb_msg: 'NdArrayProto') -> 'T':
    """
    Read ndarray from a proto msg
    :param pb_msg:
    :return: a numpy array
    """
    source = pb_msg.dense
    if source.buffer:
        x = np.frombuffer(bytearray(source.buffer), dtype=source.dtype)
        return cls._docarray_from_native(x.reshape(source.shape))
    elif len(source.shape) > 0:
        return cls._docarray_from_native(np.zeros(source.shape))
    else:
        raise ValueError(f'proto message {pb_msg} cannot be cast to a NdArray')

get_comp_backend() staticmethod

Return the computational backend of the tensor

Source code in docarray/typing/tensor/ndarray.py
@staticmethod
def get_comp_backend() -> 'NumpyCompBackend':
    """Return the computational backend of the tensor"""
    from docarray.computation.numpy_backend import NumpyCompBackend

    return NumpyCompBackend()

to_protobuf()

Transform self into a NdArrayProto protobuf message

Source code in docarray/typing/tensor/ndarray.py
def to_protobuf(self) -> 'NdArrayProto':
    """
    Transform self into a NdArrayProto protobuf message
    """
    from docarray.proto import NdArrayProto

    nd_proto = NdArrayProto()

    nd_proto.dense.buffer = self.tobytes()
    nd_proto.dense.ClearField('shape')
    nd_proto.dense.shape.extend(list(self.shape))
    nd_proto.dense.dtype = self.dtype.str

    return nd_proto

unwrap()

Return the original ndarray without any memory copy.

The original view rest intact and is still a Document NdArray but the return object is a pure np.ndarray but both object share the same memory layout.


from docarray.typing import NdArray
import numpy as np
from pydantic import parse_obj_as

t1 = parse_obj_as(NdArray, np.zeros((3, 224, 224)))
t2 = t1.unwrap()
# here t2 is a pure np.ndarray but t1 is still a Docarray NdArray
# But both share the same underlying memory

Returns:

Type Description
ndarray

a numpy.ndarray

Source code in docarray/typing/tensor/ndarray.py
def unwrap(self) -> np.ndarray:
    """
    Return the original ndarray without any memory copy.

    The original view rest intact and is still a Document `NdArray`
    but the return object is a pure `np.ndarray` but both object share
    the same memory layout.

    ---

    ```python
    from docarray.typing import NdArray
    import numpy as np
    from pydantic import parse_obj_as

    t1 = parse_obj_as(NdArray, np.zeros((3, 224, 224)))
    t2 = t1.unwrap()
    # here t2 is a pure np.ndarray but t1 is still a Docarray NdArray
    # But both share the same underlying memory
    ```

    ---

    :return: a `numpy.ndarray`
    """
    return self.view(np.ndarray)

docarray.typing.tensor.tensorflow_tensor

TensorFlowTensor

Bases: AbstractTensor, Generic[ShapeT]

TensorFlowTensor class with a .tensor attribute of type tf.Tensor, intended for use in a Document.

This enables (de)serialization from/to protobuf and json, data validation, and coercion from compatible types like numpy.ndarray.

This type can also be used in a parametrized way, specifying the shape of the tensor.

In comparison to TorchTensor and NdArray, TensorFlowTensor is not a subclass of tf.Tensor (or torch.Tensor, np.ndarray respectively). Instead, the tf.Tensor is stored in TensorFlowTensor.tensor. Therefore, to do operations on the actual tensor data you have to always access the TensorFlowTensor.tensor attribute.


import tensorflow as tf
from docarray.typing import TensorFlowTensor


t = TensorFlowTensor(tensor=tf.zeros((224, 224)))

# tensorflow functions
broadcasted = tf.broadcast_to(t.tensor, (3, 224, 224))
broadcasted = tf.broadcast_to(t.unwrap(), (3, 224, 224))

# this will fail:
# broadcasted = tf.broadcast_to(t, (3, 224, 224))

# tensorflow.Tensor methods:
arr = t.tensor.numpy()
arr = t.unwrap().numpy()

# this will fail:
# arr = t.numpy()

The [TensorFlowBackend] however, operates on our TensorFlowTensor instances. Here, you do not have to access the .tensor attribute, but can instead just hand over your TensorFlowTensor instance.


import tensorflow as tf
from docarray.typing import TensorFlowTensor


zeros = TensorFlowTensor(tensor=tf.zeros((3, 224, 224)))

comp_be = zeros.get_comp_backend()
reshaped = comp_be.reshape(zeros, (224, 224, 3))
assert comp_be.shape(reshaped) == (224, 224, 3)

You can use TensorFlowTensor in a Document as follows:


from docarray import BaseDoc
from docarray.typing import TensorFlowTensor
import tensorflow as tf


class MyDoc(BaseDoc):
    tensor: TensorFlowTensor
    image_tensor: TensorFlowTensor[3, 224, 224]
    square_crop: TensorFlowTensor[3, 'x', 'x']
    random_image: TensorFlowTensor[
        3, ...
    ]  # first dimension is fixed, can have arbitrary shape


# create a document with tensors
doc = MyDoc(
    tensor=tf.zeros((128,)),
    image_tensor=tf.zeros((3, 224, 224)),
    square_crop=tf.zeros((3, 64, 64)),
    random_image=tf.zeros((3, 128, 256)),
)

# automatic shape conversion
doc = MyDoc(
    tensor=tf.zeros((128,)),
    image_tensor=tf.zeros((224, 224, 3)),  # will reshape to (3, 224, 224)
    square_crop=tf.zeros((3, 128, 128)),
    random_image=tf.zeros((3, 64, 128)),
)

# !! The following will raise an error due to shape mismatch !!
from pydantic import ValidationError

try:
    doc = MyDoc(
        tensor=tf.zeros((128,)),
        image_tensor=tf.zeros((224, 224)),  # this will fail validation
        square_crop=tf.zeros((3, 128, 64)),  # this will also fail validation
        random_image=tf.zeros(4, 64, 128),  # this will also fail validation
    )
except ValidationError as e:
    pass

Source code in docarray/typing/tensor/tensorflow_tensor.py
@_register_proto(proto_type_name='tensorflow_tensor')
class TensorFlowTensor(AbstractTensor, Generic[ShapeT], metaclass=metaTensorFlow):
    """
    TensorFlowTensor class with a `.tensor` attribute of type `tf.Tensor`,
    intended for use in a Document.

    This enables (de)serialization from/to protobuf and json, data validation,
    and coercion from compatible types like numpy.ndarray.

    This type can also be used in a parametrized way, specifying the shape of the
    tensor.

    In comparison to [`TorchTensor`][docarray.typing.TorchTensor] and
    [`NdArray`][docarray.typing.tensor.ndarray.NdArray],
    [`TensorFlowTensor`][docarray.typing.tensor.tensorflow_tensor.TensorFlowTensor]
    is not a subclass of `tf.Tensor` (or `torch.Tensor`, `np.ndarray` respectively).
    Instead, the `tf.Tensor` is stored in
    [`TensorFlowTensor.tensor`][docarray.typing.tensor.tensorflow_tensor.TensorFlowTensor].
    Therefore, to do operations on the actual tensor data you have to always access the
    [`TensorFlowTensor.tensor`][docarray.typing.tensor.tensorflow_tensor.TensorFlowTensor]
    attribute.

    ---

    ```python
    import tensorflow as tf
    from docarray.typing import TensorFlowTensor


    t = TensorFlowTensor(tensor=tf.zeros((224, 224)))

    # tensorflow functions
    broadcasted = tf.broadcast_to(t.tensor, (3, 224, 224))
    broadcasted = tf.broadcast_to(t.unwrap(), (3, 224, 224))

    # this will fail:
    # broadcasted = tf.broadcast_to(t, (3, 224, 224))

    # tensorflow.Tensor methods:
    arr = t.tensor.numpy()
    arr = t.unwrap().numpy()

    # this will fail:
    # arr = t.numpy()
    ```

    ---

    The [`TensorFlowBackend`] however, operates on our
    [`TensorFlowTensor`][docarray.typing.TensorFlowTensor] instances.
    Here, you do not have to access the `.tensor` attribute,
    but can instead just hand over your
    [`TensorFlowTensor`][docarray.typing.TensorFlowTensor] instance.

    ---

    ```python
    import tensorflow as tf
    from docarray.typing import TensorFlowTensor


    zeros = TensorFlowTensor(tensor=tf.zeros((3, 224, 224)))

    comp_be = zeros.get_comp_backend()
    reshaped = comp_be.reshape(zeros, (224, 224, 3))
    assert comp_be.shape(reshaped) == (224, 224, 3)
    ```

    ---

    You can use [`TensorFlowTensor`][docarray.typing.TensorFlowTensor] in a Document as follows:

    ---

    ```python
    from docarray import BaseDoc
    from docarray.typing import TensorFlowTensor
    import tensorflow as tf


    class MyDoc(BaseDoc):
        tensor: TensorFlowTensor
        image_tensor: TensorFlowTensor[3, 224, 224]
        square_crop: TensorFlowTensor[3, 'x', 'x']
        random_image: TensorFlowTensor[
            3, ...
        ]  # first dimension is fixed, can have arbitrary shape


    # create a document with tensors
    doc = MyDoc(
        tensor=tf.zeros((128,)),
        image_tensor=tf.zeros((3, 224, 224)),
        square_crop=tf.zeros((3, 64, 64)),
        random_image=tf.zeros((3, 128, 256)),
    )

    # automatic shape conversion
    doc = MyDoc(
        tensor=tf.zeros((128,)),
        image_tensor=tf.zeros((224, 224, 3)),  # will reshape to (3, 224, 224)
        square_crop=tf.zeros((3, 128, 128)),
        random_image=tf.zeros((3, 64, 128)),
    )

    # !! The following will raise an error due to shape mismatch !!
    from pydantic import ValidationError

    try:
        doc = MyDoc(
            tensor=tf.zeros((128,)),
            image_tensor=tf.zeros((224, 224)),  # this will fail validation
            square_crop=tf.zeros((3, 128, 64)),  # this will also fail validation
            random_image=tf.zeros(4, 64, 128),  # this will also fail validation
        )
    except ValidationError as e:
        pass
    ```

    ---
    """

    __parametrized_meta__ = metaTensorFlow

    def __init__(self, tensor: tf.Tensor):
        super().__init__()
        self.tensor = tensor

    def __getitem__(self, item):
        from docarray.computation.tensorflow_backend import TensorFlowCompBackend

        tensor = self.unwrap()
        if tensor is not None:
            tensor = tensor[item]
        return TensorFlowCompBackend._cast_output(t=tensor)

    def __setitem__(self, index, value):
        """Set a slice of this tensor's `tf.Tensor`"""
        t = self.unwrap()
        value = tf.cast(value, dtype=t.dtype)
        var = tf.Variable(t)
        var[index].assign(value)
        self.tensor = tf.constant(var)

    def __iter__(self):
        """Iterate over the elements of this tensor's `tf.Tensor`."""
        for i in range(len(self)):
            yield self[i]

    @classmethod
    def _docarray_validate(
        cls: Type[T],
        value: Union[T, np.ndarray, str, Any],
    ) -> T:
        if isinstance(value, TensorFlowTensor):
            return cast(T, value)
        elif isinstance(value, tf.Tensor):
            return cls._docarray_from_native(value)
        elif isinstance(value, np.ndarray):
            return cls._docarray_from_ndarray(value)
        elif isinstance(value, AbstractTensor):
            return cls._docarray_from_ndarray(value._docarray_to_ndarray())
        elif torch_available and isinstance(value, torch.Tensor):
            return cls._docarray_from_native(value.detach().cpu().numpy())
        elif jax_available and isinstance(value, jnp.ndarray):
            return cls._docarray_from_native(value.__array__())
        elif isinstance(value, str):
            value = orjson.loads(value)

        try:
            arr: tf.Tensor = tf.constant(value)
            return cls(tensor=arr)
        except Exception:
            pass  # handled below

        raise ValueError(
            f'Expected a tensorflow.Tensor compatible type, got {type(value)}'
        )

    @classmethod
    def _docarray_from_native(cls: Type[T], value: Union[tf.Tensor, T]) -> T:
        """
        Create a `TensorFlowTensor` from a `tf.Tensor` or `TensorFlowTensor`
        instance.

        :param value: instance of `tf.Tensor` or `TensorFlowTensor`
        :return: a `TensorFlowTensor`
        """
        if isinstance(value, TensorFlowTensor):
            if cls.__unparametrizedcls__:  # None if the tensor is parametrized
                value.__class__ = cls.__unparametrizedcls__  # type: ignore
            else:
                value.__class__ = cls
            return cast(T, value)
        else:
            if cls.__unparametrizedcls__:  # None if the tensor is parametrized
                cls_param_ = cls.__unparametrizedcls__
                cls_param = cast(Type[T], cls_param_)
            else:
                cls_param = cls

            return cls_param(tensor=value)

    @staticmethod
    def get_comp_backend() -> 'TensorFlowCompBackend':
        """Return the computational backend of the tensor"""
        from docarray.computation.tensorflow_backend import TensorFlowCompBackend

        return TensorFlowCompBackend()

    def _docarray_to_json_compatible(self) -> np.ndarray:
        """
        Convert `TensorFlowTensor` into a json compatible object
        :return: a representation of the tensor compatible with orjson
        """
        return self.unwrap().numpy()

    def to_protobuf(self) -> 'NdArrayProto':
        """
        Transform self into an NdArrayProto protobuf message.
        """
        from docarray.proto import NdArrayProto

        nd_proto = NdArrayProto()

        value_np = self.tensor.numpy()
        nd_proto.dense.buffer = value_np.tobytes()
        nd_proto.dense.ClearField('shape')
        nd_proto.dense.shape.extend(list(value_np.shape))
        nd_proto.dense.dtype = value_np.dtype.str

        return nd_proto

    @classmethod
    def from_protobuf(cls: Type[T], pb_msg: 'NdArrayProto') -> 'T':
        """
        Read ndarray from a proto msg.
        :param pb_msg:
        :return: a `TensorFlowTensor`
        """
        source = pb_msg.dense
        if source.buffer:
            x = np.frombuffer(bytearray(source.buffer), dtype=source.dtype)
            return cls.from_ndarray(x.reshape(source.shape))
        elif len(source.shape) > 0:
            return cls.from_ndarray(np.zeros(source.shape))
        else:
            raise ValueError(
                f'Proto message {pb_msg} cannot be cast to a TensorFlowTensor.'
            )

    @classmethod
    def from_ndarray(cls: Type[T], value: np.ndarray) -> T:
        """Create a `TensorFlowTensor` from a numpy array.

        :param value: the numpy array
        :return: a `TensorFlowTensor`
        """
        return cls._docarray_from_native(tf.convert_to_tensor(value))

    def unwrap(self) -> tf.Tensor:
        """
        Return the original `tf.Tensor` without any memory copy.

        The original view rest intact and is still a Document `TensorFlowTensor`
        but the return object is a pure `tf.Tensor` but both object share
        the same memory layout.

        ---

        ```python
        from docarray.typing import TensorFlowTensor
        import tensorflow as tf

        t1 = TensorFlowTensor.validate(tf.zeros((3, 224, 224)), None, None)
        # here t1 is a docarray TensorFlowTensor
        t2 = t1.unwrap()
        # here t2 is a pure tf.Tensor but t1 is still a Docarray TensorFlowTensor
        ```

        ---
        :return: a `tf.Tensor`
        """
        return self.tensor

    def __len__(self) -> int:
        return len(self.tensor)

    @classmethod
    def _docarray_from_ndarray(cls: Type[T], value: np.ndarray) -> T:
        """Create a `tensor from a numpy array
        PS: this function is different from `from_ndarray` because it is private under the docarray namesapce.
        This allows us to avoid breaking change if one day we introduce a Tensor backend with a `from_ndarray` method.
        """
        return cls.from_ndarray(value)

    def _docarray_to_ndarray(self) -> np.ndarray:
        """cast itself to a numpy array"""
        return self.tensor.numpy()

    @property
    def shape(self):
        return tf.shape(self.tensor)

__docarray_validate_getitem__(item) classmethod

This method validates the input to AbstractTensor.__class_getitem__.

It is called at "class creation time", i.e. when a class is created with syntax of the form AnyTensor[shape].

The default implementation tries to cast any item to a tuple of ints. A subclass can override this method to implement custom validation logic.

The output of this is eventually passed to AbstractTensor.__docarray_validate_shape__ as its shape argument.

Raises ValueError if the input item does not pass validation.

Parameters:

Name Type Description Default
item Any

The item to validate, passed to __class_getitem__ (Tensor[item]).

required

Returns:

Type Description
Tuple[int]

The validated item == the target shape of this tensor.

Source code in docarray/typing/tensor/abstract_tensor.py
@classmethod
def __docarray_validate_getitem__(cls, item: Any) -> Tuple[int]:
    """This method validates the input to `AbstractTensor.__class_getitem__`.

    It is called at "class creation time",
    i.e. when a class is created with syntax of the form AnyTensor[shape].

    The default implementation tries to cast any `item` to a tuple of ints.
    A subclass can override this method to implement custom validation logic.

    The output of this is eventually passed to
    [`AbstractTensor.__docarray_validate_shape__`]
    [docarray.typing.tensor.abstract_tensor.AbstractTensor.__docarray_validate_shape__]
    as its `shape` argument.

    Raises `ValueError` if the input `item` does not pass validation.

    :param item: The item to validate, passed to `__class_getitem__` (`Tensor[item]`).
    :return: The validated item == the target shape of this tensor.
    """
    if isinstance(item, int):
        item = (item,)
    try:
        item = tuple(item)
    except TypeError:
        raise TypeError(f'{item} is not a valid tensor shape.')
    return item

__docarray_validate_shape__(t, shape) classmethod

Every tensor has to implement this method in order to enable syntax of the form AnyTensor[shape]. It is called when a tensor is assigned to a field of this type. i.e. when a tensor is passed to a Document field of type AnyTensor[shape].

The intended behaviour is as follows:

  • If the shape of t is equal to shape, return t.
  • If the shape of t is not equal to shape, but can be reshaped to shape, return t reshaped to shape.
  • If the shape of t is not equal to shape and cannot be reshaped to shape, raise a ValueError.

Parameters:

Name Type Description Default
t T

The tensor to validate.

required
shape Tuple[Union[int, str], ...]

The shape to validate against.

required

Returns:

Type Description
T

The validated tensor.

Source code in docarray/typing/tensor/abstract_tensor.py
@classmethod
def __docarray_validate_shape__(cls, t: T, shape: Tuple[Union[int, str], ...]) -> T:
    """Every tensor has to implement this method in order to
    enable syntax of the form AnyTensor[shape].
    It is called when a tensor is assigned to a field of this type.
    i.e. when a tensor is passed to a Document field of type AnyTensor[shape].

    The intended behaviour is as follows:

    - If the shape of `t` is equal to `shape`, return `t`.
    - If the shape of `t` is not equal to `shape`,
        but can be reshaped to `shape`, return `t` reshaped to `shape`.
    - If the shape of `t` is not equal to `shape`
        and cannot be reshaped to `shape`, raise a ValueError.

    :param t: The tensor to validate.
    :param shape: The shape to validate against.
    :return: The validated tensor.
    """
    comp_be = t.get_comp_backend()
    tshape = comp_be.shape(t)
    if tshape == shape:
        return t
    elif any(isinstance(dim, str) or dim == Ellipsis for dim in shape):
        ellipsis_occurrences = [
            pos for pos, dim in enumerate(shape) if dim == Ellipsis
        ]
        if ellipsis_occurrences:
            if len(ellipsis_occurrences) > 1:
                raise ValueError(
                    f'Cannot use Ellipsis (...) more than once for the shape {shape}'
                )
            ellipsis_pos = ellipsis_occurrences[0]
            # Calculate how many dimensions to add. Should be at least 1.
            dimensions_needed = max(len(tshape) - len(shape) + 1, 1)
            shape = (
                shape[:ellipsis_pos]
                + tuple(
                    f'__dim_var_{index}__' for index in range(dimensions_needed)
                )
                + shape[ellipsis_pos + 1 :]
            )

        if len(tshape) != len(shape):
            raise ValueError(
                f'Tensor shape mismatch. Expected {shape}, got {tshape}'
            )
        known_dims: Dict[str, int] = {}
        for tdim, dim in zip(tshape, shape):
            if isinstance(dim, int) and tdim != dim:
                raise ValueError(
                    f'Tensor shape mismatch. Expected {shape}, got {tshape}'
                )
            elif isinstance(dim, str):
                if dim in known_dims and known_dims[dim] != tdim:
                    raise ValueError(
                        f'Tensor shape mismatch. Expected {shape}, got {tshape}'
                    )
                else:
                    known_dims[dim] = tdim
        else:
            return t
    else:
        shape = cast(Tuple[int], shape)
        warnings.warn(
            f'Tensor shape mismatch. Reshaping tensor '
            f'of shape {tshape} to shape {shape}'
        )
        try:
            value = cls._docarray_from_native(comp_be.reshape(t, shape))
            return cast(T, value)
        except RuntimeError:
            raise ValueError(
                f'Cannot reshape tensor of shape {tshape} to shape {shape}'
            )

__iter__()

Iterate over the elements of this tensor's tf.Tensor.

Source code in docarray/typing/tensor/tensorflow_tensor.py
def __iter__(self):
    """Iterate over the elements of this tensor's `tf.Tensor`."""
    for i in range(len(self)):
        yield self[i]

__setitem__(index, value)

Set a slice of this tensor's tf.Tensor

Source code in docarray/typing/tensor/tensorflow_tensor.py
def __setitem__(self, index, value):
    """Set a slice of this tensor's `tf.Tensor`"""
    t = self.unwrap()
    value = tf.cast(value, dtype=t.dtype)
    var = tf.Variable(t)
    var[index].assign(value)
    self.tensor = tf.constant(var)

from_ndarray(value) classmethod

Create a TensorFlowTensor from a numpy array.

Parameters:

Name Type Description Default
value ndarray

the numpy array

required

Returns:

Type Description
T

a TensorFlowTensor

Source code in docarray/typing/tensor/tensorflow_tensor.py
@classmethod
def from_ndarray(cls: Type[T], value: np.ndarray) -> T:
    """Create a `TensorFlowTensor` from a numpy array.

    :param value: the numpy array
    :return: a `TensorFlowTensor`
    """
    return cls._docarray_from_native(tf.convert_to_tensor(value))

from_protobuf(pb_msg) classmethod

Read ndarray from a proto msg.

Parameters:

Name Type Description Default
pb_msg NdArrayProto
required

Returns:

Type Description
T

a TensorFlowTensor

Source code in docarray/typing/tensor/tensorflow_tensor.py
@classmethod
def from_protobuf(cls: Type[T], pb_msg: 'NdArrayProto') -> 'T':
    """
    Read ndarray from a proto msg.
    :param pb_msg:
    :return: a `TensorFlowTensor`
    """
    source = pb_msg.dense
    if source.buffer:
        x = np.frombuffer(bytearray(source.buffer), dtype=source.dtype)
        return cls.from_ndarray(x.reshape(source.shape))
    elif len(source.shape) > 0:
        return cls.from_ndarray(np.zeros(source.shape))
    else:
        raise ValueError(
            f'Proto message {pb_msg} cannot be cast to a TensorFlowTensor.'
        )

get_comp_backend() staticmethod

Return the computational backend of the tensor

Source code in docarray/typing/tensor/tensorflow_tensor.py
@staticmethod
def get_comp_backend() -> 'TensorFlowCompBackend':
    """Return the computational backend of the tensor"""
    from docarray.computation.tensorflow_backend import TensorFlowCompBackend

    return TensorFlowCompBackend()

to_protobuf()

Transform self into an NdArrayProto protobuf message.

Source code in docarray/typing/tensor/tensorflow_tensor.py
def to_protobuf(self) -> 'NdArrayProto':
    """
    Transform self into an NdArrayProto protobuf message.
    """
    from docarray.proto import NdArrayProto

    nd_proto = NdArrayProto()

    value_np = self.tensor.numpy()
    nd_proto.dense.buffer = value_np.tobytes()
    nd_proto.dense.ClearField('shape')
    nd_proto.dense.shape.extend(list(value_np.shape))
    nd_proto.dense.dtype = value_np.dtype.str

    return nd_proto

unwrap()

Return the original tf.Tensor without any memory copy.

The original view rest intact and is still a Document TensorFlowTensor but the return object is a pure tf.Tensor but both object share the same memory layout.


from docarray.typing import TensorFlowTensor
import tensorflow as tf

t1 = TensorFlowTensor.validate(tf.zeros((3, 224, 224)), None, None)
# here t1 is a docarray TensorFlowTensor
t2 = t1.unwrap()
# here t2 is a pure tf.Tensor but t1 is still a Docarray TensorFlowTensor

Returns:

Type Description
Tensor

a tf.Tensor

Source code in docarray/typing/tensor/tensorflow_tensor.py
def unwrap(self) -> tf.Tensor:
    """
    Return the original `tf.Tensor` without any memory copy.

    The original view rest intact and is still a Document `TensorFlowTensor`
    but the return object is a pure `tf.Tensor` but both object share
    the same memory layout.

    ---

    ```python
    from docarray.typing import TensorFlowTensor
    import tensorflow as tf

    t1 = TensorFlowTensor.validate(tf.zeros((3, 224, 224)), None, None)
    # here t1 is a docarray TensorFlowTensor
    t2 = t1.unwrap()
    # here t2 is a pure tf.Tensor but t1 is still a Docarray TensorFlowTensor
    ```

    ---
    :return: a `tf.Tensor`
    """
    return self.tensor

docarray.typing.tensor.torch_tensor

TorchTensor

Bases: Tensor, AbstractTensor, Generic[ShapeT]

Subclass of torch.Tensor, intended for use in a Document. This enables (de)serialization from/to protobuf and json, data validation, and coercion from compatible types like numpy.ndarray.

This type can also be used in a parametrized way, specifying the shape of the tensor.


from docarray import BaseDoc
from docarray.typing import TorchTensor
import torch


class MyDoc(BaseDoc):
    tensor: TorchTensor
    image_tensor: TorchTensor[3, 224, 224]
    square_crop: TorchTensor[3, 'x', 'x']
    random_image: TorchTensor[
        3, ...
    ]  # first dimension is fixed, can have arbitrary shape


# create a document with tensors
doc = MyDoc(
    tensor=torch.zeros(128),
    image_tensor=torch.zeros(3, 224, 224),
    square_crop=torch.zeros(3, 64, 64),
    random_image=torch.zeros(3, 128, 256),
)

# automatic shape conversion
doc = MyDoc(
    tensor=torch.zeros(128),
    image_tensor=torch.zeros(224, 224, 3),  # will reshape to (3, 224, 224)
    square_crop=torch.zeros(3, 128, 128),
    random_image=torch.zeros(3, 64, 128),
)

# !! The following will raise an error due to shape mismatch !!
from pydantic import ValidationError

try:
    doc = MyDoc(
        tensor=torch.zeros(128),
        image_tensor=torch.zeros(224, 224),  # this will fail validation
        square_crop=torch.zeros(3, 128, 64),  # this will also fail validation
        random_image=torch.zeros(4, 64, 128),  # this will also fail validation
    )
except ValidationError as e:
    pass

Compatibility with torch.compile()

PyTorch 2 introduced compilation support in the form of torch.compile().

Currently, torch.compile() does not properly support subclasses of torch.Tensor such as TorchTensor. The PyTorch team is currently working on a fix for this issue.

In the meantime, you can use the following workaround:

Workaround: Convert TorchTensor to torch.Tensor before calling torch.compile()

Converting any TorchTensors tor torch.Tensor before calling torch.compile() side-steps the issue:

from docarray import BaseDoc
from docarray.typing import TorchTensor
import torch


class MyDoc(BaseDoc):
    tensor: TorchTensor


doc = MyDoc(tensor=torch.zeros(128))


def foo(tensor: torch.Tensor):
    return tensor @ tensor.t()


foo_compiled = torch.compile(foo)

# unwrap the tensor before passing it to torch.compile()
foo_compiled(doc.tensor.unwrap())
Source code in docarray/typing/tensor/torch_tensor.py
@_register_proto(proto_type_name='torch_tensor')
class TorchTensor(
    torch.Tensor,
    AbstractTensor,
    Generic[ShapeT],
    metaclass=metaTorchAndNode,
):
    # Subclassing torch.Tensor following the advice from here:
    # https://pytorch.org/docs/stable/notes/extending.html#subclassing-torch-tensor
    """
    Subclass of `torch.Tensor`, intended for use in a Document.
    This enables (de)serialization from/to protobuf and json, data validation,
    and coercion from compatible types like numpy.ndarray.

    This type can also be used in a parametrized way,
    specifying the shape of the tensor.

    ---

    ```python
    from docarray import BaseDoc
    from docarray.typing import TorchTensor
    import torch


    class MyDoc(BaseDoc):
        tensor: TorchTensor
        image_tensor: TorchTensor[3, 224, 224]
        square_crop: TorchTensor[3, 'x', 'x']
        random_image: TorchTensor[
            3, ...
        ]  # first dimension is fixed, can have arbitrary shape


    # create a document with tensors
    doc = MyDoc(
        tensor=torch.zeros(128),
        image_tensor=torch.zeros(3, 224, 224),
        square_crop=torch.zeros(3, 64, 64),
        random_image=torch.zeros(3, 128, 256),
    )

    # automatic shape conversion
    doc = MyDoc(
        tensor=torch.zeros(128),
        image_tensor=torch.zeros(224, 224, 3),  # will reshape to (3, 224, 224)
        square_crop=torch.zeros(3, 128, 128),
        random_image=torch.zeros(3, 64, 128),
    )

    # !! The following will raise an error due to shape mismatch !!
    from pydantic import ValidationError

    try:
        doc = MyDoc(
            tensor=torch.zeros(128),
            image_tensor=torch.zeros(224, 224),  # this will fail validation
            square_crop=torch.zeros(3, 128, 64),  # this will also fail validation
            random_image=torch.zeros(4, 64, 128),  # this will also fail validation
        )
    except ValidationError as e:
        pass
    ```

    ---


    ## Compatibility with `torch.compile()`


    PyTorch 2 [introduced compilation support](https://pytorch.org/blog/pytorch-2.0-release/) in the form of `torch.compile()`.

    Currently, **`torch.compile()` does not properly support subclasses of `torch.Tensor` such as `TorchTensor`**.
    The PyTorch team is currently working on a [fix for this issue](https://github.com/pytorch/pytorch/pull/105167#issuecomment-1678050808).

    In the meantime, you can use the following workaround:

    ### Workaround: Convert `TorchTensor` to `torch.Tensor` before calling `torch.compile()`

    Converting any `TorchTensor`s tor `torch.Tensor` before calling `torch.compile()` side-steps the issue:

    ```python
    from docarray import BaseDoc
    from docarray.typing import TorchTensor
    import torch


    class MyDoc(BaseDoc):
        tensor: TorchTensor


    doc = MyDoc(tensor=torch.zeros(128))


    def foo(tensor: torch.Tensor):
        return tensor @ tensor.t()


    foo_compiled = torch.compile(foo)

    # unwrap the tensor before passing it to torch.compile()
    foo_compiled(doc.tensor.unwrap())
    ```

    """

    __parametrized_meta__ = metaTorchAndNode

    @classmethod
    def _docarray_validate(
        cls: Type[T],
        value: Union[T, np.ndarray, str, Any],
    ) -> T:
        if isinstance(value, TorchTensor):
            return cast(T, value)
        elif isinstance(value, torch.Tensor):
            return cls._docarray_from_native(value)
        elif isinstance(value, AbstractTensor):
            return cls._docarray_from_ndarray(value._docarray_to_ndarray())
        elif tf_available and isinstance(value, tf.Tensor):
            return cls._docarray_from_ndarray(value.numpy())
        elif isinstance(value, np.ndarray):
            return cls._docarray_from_ndarray(value)
        elif jax_available and isinstance(value, jnp.ndarray):
            return cls._docarray_from_ndarray(value.__array__())
        elif isinstance(value, str):
            value = orjson.loads(value)
        try:
            arr: torch.Tensor = torch.tensor(value)
            return cls._docarray_from_native(arr)
        except Exception:
            pass  # handled below

        raise ValueError(f'Expected a torch.Tensor compatible type, got {type(value)}')

    def _docarray_to_json_compatible(self) -> np.ndarray:
        """
        Convert `TorchTensor` into a json compatible object
        :return: a representation of the tensor compatible with orjson
        """
        return self.detach().numpy()  # might need to check device later

    def unwrap(self) -> torch.Tensor:
        """
        Return the original `torch.Tensor` without any memory copy.

        The original view rest intact and is still a Document `TorchTensor`
        but the return object is a pure `torch.Tensor` but both object share
        the same memory layout.

        ---

        ```python
        from docarray.typing import TorchTensor
        import torch
        from pydantic import parse_obj_as


        t = parse_obj_as(TorchTensor, torch.zeros(3, 224, 224))
        # here t is a docarray TorchTensor
        t2 = t.unwrap()
        # here t2 is a pure torch.Tensor but t1 is still a Docarray TorchTensor
        # But both share the same underlying memory
        ```

        ---

        :return: a `torch.Tensor`
        """
        value = copy(self)  # as unintuitive as it sounds, this
        # does not do any relevant memory copying, just shallow
        # reference to the torch data
        value.__class__ = torch.Tensor  # type: ignore
        return value

    @classmethod
    def _docarray_from_native(cls: Type[T], value: torch.Tensor) -> T:
        """Create a `TorchTensor` from a native `torch.Tensor`

        :param value: the native `torch.Tensor`
        :return: a `TorchTensor`
        """
        if cls.__unparametrizedcls__:  # This is not None if the tensor is parametrized
            value.__class__ = cls.__unparametrizedcls__  # type: ignore
        else:
            value.__class__ = cls
        return cast(T, value)

    @classmethod
    def from_ndarray(cls: Type[T], value: np.ndarray) -> T:
        """Create a `TorchTensor` from a numpy array

        :param value: the numpy array
        :return: a `TorchTensor`
        """
        return cls._docarray_from_native(torch.from_numpy(value))

    @classmethod
    def from_protobuf(cls: Type[T], pb_msg: 'NdArrayProto') -> 'T':
        """
        Read ndarray from a proto msg
        :param pb_msg:
        :return: a `TorchTensor`
        """
        source = pb_msg.dense
        if source.buffer:
            x = np.frombuffer(bytearray(source.buffer), dtype=source.dtype)
            return cls.from_ndarray(x.reshape(source.shape))
        elif len(source.shape) > 0:
            return cls.from_ndarray(np.zeros(source.shape))
        else:
            raise ValueError(f'proto message {pb_msg} cannot be cast to a TorchTensor')

    def to_protobuf(self) -> 'NdArrayProto':
        """
        Transform self into a `NdArrayProto` protobuf message
        """
        from docarray.proto import NdArrayProto

        nd_proto = NdArrayProto()

        value_np = self.detach().cpu().numpy()
        nd_proto.dense.buffer = value_np.tobytes()
        nd_proto.dense.ClearField('shape')
        nd_proto.dense.shape.extend(list(value_np.shape))
        nd_proto.dense.dtype = value_np.dtype.str

        return nd_proto

    @staticmethod
    def get_comp_backend() -> 'TorchCompBackend':
        """Return the computational backend of the tensor"""
        from docarray.computation.torch_backend import TorchCompBackend

        return TorchCompBackend()

    @classmethod
    def __torch_function__(cls, func, types, args=(), kwargs=None):
        # this tells torch to treat all of our custom tensors just like
        # torch.Tensor's. Otherwise, torch will complain that it doesn't
        # know how to handle our custom tensor type.
        docarray_torch_tensors = TorchTensor.__subclasses__()
        types_ = tuple(
            torch.Tensor if t in docarray_torch_tensors else t for t in types
        )
        return super().__torch_function__(func, types_, args, kwargs)

    def __deepcopy__(self, memo):
        """
        Custom implementation of deepcopy for TorchTensor to avoid storage sharing issues.
        """
        # Create a new tensor with the same data and properties
        new_tensor = self.clone()
        # Set the class to the custom TorchTensor class
        new_tensor.__class__ = self.__class__
        return new_tensor

    @classmethod
    def _docarray_from_ndarray(cls: Type[T], value: np.ndarray) -> T:
        """Create a `tensor from a numpy array
        PS: this function is different from `from_ndarray` because it is private under the docarray namesapce.
        This allows us to avoid breaking change if one day we introduce a Tensor backend with a `from_ndarray` method.
        """
        return cls.from_ndarray(value)

    def _docarray_to_ndarray(self) -> np.ndarray:
        """cast itself to a numpy array"""
        return self.detach().cpu().numpy()

    def new_empty(self, *args, **kwargs):
        """
        This method enables the deepcopy of `TorchTensor` by returning another instance of this subclass.
        If this function is not implemented, the deepcopy will throw an RuntimeError from Torch.
        """
        return self.__class__(*args, **kwargs)

__deepcopy__(memo)

Custom implementation of deepcopy for TorchTensor to avoid storage sharing issues.

Source code in docarray/typing/tensor/torch_tensor.py
def __deepcopy__(self, memo):
    """
    Custom implementation of deepcopy for TorchTensor to avoid storage sharing issues.
    """
    # Create a new tensor with the same data and properties
    new_tensor = self.clone()
    # Set the class to the custom TorchTensor class
    new_tensor.__class__ = self.__class__
    return new_tensor

__docarray_validate_getitem__(item) classmethod

This method validates the input to AbstractTensor.__class_getitem__.

It is called at "class creation time", i.e. when a class is created with syntax of the form AnyTensor[shape].

The default implementation tries to cast any item to a tuple of ints. A subclass can override this method to implement custom validation logic.

The output of this is eventually passed to AbstractTensor.__docarray_validate_shape__ as its shape argument.

Raises ValueError if the input item does not pass validation.

Parameters:

Name Type Description Default
item Any

The item to validate, passed to __class_getitem__ (Tensor[item]).

required

Returns:

Type Description
Tuple[int]

The validated item == the target shape of this tensor.

Source code in docarray/typing/tensor/abstract_tensor.py
@classmethod
def __docarray_validate_getitem__(cls, item: Any) -> Tuple[int]:
    """This method validates the input to `AbstractTensor.__class_getitem__`.

    It is called at "class creation time",
    i.e. when a class is created with syntax of the form AnyTensor[shape].

    The default implementation tries to cast any `item` to a tuple of ints.
    A subclass can override this method to implement custom validation logic.

    The output of this is eventually passed to
    [`AbstractTensor.__docarray_validate_shape__`]
    [docarray.typing.tensor.abstract_tensor.AbstractTensor.__docarray_validate_shape__]
    as its `shape` argument.

    Raises `ValueError` if the input `item` does not pass validation.

    :param item: The item to validate, passed to `__class_getitem__` (`Tensor[item]`).
    :return: The validated item == the target shape of this tensor.
    """
    if isinstance(item, int):
        item = (item,)
    try:
        item = tuple(item)
    except TypeError:
        raise TypeError(f'{item} is not a valid tensor shape.')
    return item

__docarray_validate_shape__(t, shape) classmethod

Every tensor has to implement this method in order to enable syntax of the form AnyTensor[shape]. It is called when a tensor is assigned to a field of this type. i.e. when a tensor is passed to a Document field of type AnyTensor[shape].

The intended behaviour is as follows:

  • If the shape of t is equal to shape, return t.
  • If the shape of t is not equal to shape, but can be reshaped to shape, return t reshaped to shape.
  • If the shape of t is not equal to shape and cannot be reshaped to shape, raise a ValueError.

Parameters:

Name Type Description Default
t T

The tensor to validate.

required
shape Tuple[Union[int, str], ...]

The shape to validate against.

required

Returns:

Type Description
T

The validated tensor.

Source code in docarray/typing/tensor/abstract_tensor.py
@classmethod
def __docarray_validate_shape__(cls, t: T, shape: Tuple[Union[int, str], ...]) -> T:
    """Every tensor has to implement this method in order to
    enable syntax of the form AnyTensor[shape].
    It is called when a tensor is assigned to a field of this type.
    i.e. when a tensor is passed to a Document field of type AnyTensor[shape].

    The intended behaviour is as follows:

    - If the shape of `t` is equal to `shape`, return `t`.
    - If the shape of `t` is not equal to `shape`,
        but can be reshaped to `shape`, return `t` reshaped to `shape`.
    - If the shape of `t` is not equal to `shape`
        and cannot be reshaped to `shape`, raise a ValueError.

    :param t: The tensor to validate.
    :param shape: The shape to validate against.
    :return: The validated tensor.
    """
    comp_be = t.get_comp_backend()
    tshape = comp_be.shape(t)
    if tshape == shape:
        return t
    elif any(isinstance(dim, str) or dim == Ellipsis for dim in shape):
        ellipsis_occurrences = [
            pos for pos, dim in enumerate(shape) if dim == Ellipsis
        ]
        if ellipsis_occurrences:
            if len(ellipsis_occurrences) > 1:
                raise ValueError(
                    f'Cannot use Ellipsis (...) more than once for the shape {shape}'
                )
            ellipsis_pos = ellipsis_occurrences[0]
            # Calculate how many dimensions to add. Should be at least 1.
            dimensions_needed = max(len(tshape) - len(shape) + 1, 1)
            shape = (
                shape[:ellipsis_pos]
                + tuple(
                    f'__dim_var_{index}__' for index in range(dimensions_needed)
                )
                + shape[ellipsis_pos + 1 :]
            )

        if len(tshape) != len(shape):
            raise ValueError(
                f'Tensor shape mismatch. Expected {shape}, got {tshape}'
            )
        known_dims: Dict[str, int] = {}
        for tdim, dim in zip(tshape, shape):
            if isinstance(dim, int) and tdim != dim:
                raise ValueError(
                    f'Tensor shape mismatch. Expected {shape}, got {tshape}'
                )
            elif isinstance(dim, str):
                if dim in known_dims and known_dims[dim] != tdim:
                    raise ValueError(
                        f'Tensor shape mismatch. Expected {shape}, got {tshape}'
                    )
                else:
                    known_dims[dim] = tdim
        else:
            return t
    else:
        shape = cast(Tuple[int], shape)
        warnings.warn(
            f'Tensor shape mismatch. Reshaping tensor '
            f'of shape {tshape} to shape {shape}'
        )
        try:
            value = cls._docarray_from_native(comp_be.reshape(t, shape))
            return cast(T, value)
        except RuntimeError:
            raise ValueError(
                f'Cannot reshape tensor of shape {tshape} to shape {shape}'
            )

__getitem__(item) abstractmethod

Get a slice of this tensor.

Source code in docarray/typing/tensor/abstract_tensor.py
@abc.abstractmethod
def __getitem__(self: T, item) -> T:
    """Get a slice of this tensor."""
    ...

__iter__() abstractmethod

Iterate over the elements of this tensor.

Source code in docarray/typing/tensor/abstract_tensor.py
@abc.abstractmethod
def __iter__(self):
    """Iterate over the elements of this tensor."""
    ...

__setitem__(index, value) abstractmethod

Set a slice of this tensor.

Source code in docarray/typing/tensor/abstract_tensor.py
@abc.abstractmethod
def __setitem__(self, index, value):
    """Set a slice of this tensor."""
    ...

from_ndarray(value) classmethod

Create a TorchTensor from a numpy array

Parameters:

Name Type Description Default
value ndarray

the numpy array

required

Returns:

Type Description
T

a TorchTensor

Source code in docarray/typing/tensor/torch_tensor.py
@classmethod
def from_ndarray(cls: Type[T], value: np.ndarray) -> T:
    """Create a `TorchTensor` from a numpy array

    :param value: the numpy array
    :return: a `TorchTensor`
    """
    return cls._docarray_from_native(torch.from_numpy(value))

from_protobuf(pb_msg) classmethod

Read ndarray from a proto msg

Parameters:

Name Type Description Default
pb_msg NdArrayProto
required

Returns:

Type Description
T

a TorchTensor

Source code in docarray/typing/tensor/torch_tensor.py
@classmethod
def from_protobuf(cls: Type[T], pb_msg: 'NdArrayProto') -> 'T':
    """
    Read ndarray from a proto msg
    :param pb_msg:
    :return: a `TorchTensor`
    """
    source = pb_msg.dense
    if source.buffer:
        x = np.frombuffer(bytearray(source.buffer), dtype=source.dtype)
        return cls.from_ndarray(x.reshape(source.shape))
    elif len(source.shape) > 0:
        return cls.from_ndarray(np.zeros(source.shape))
    else:
        raise ValueError(f'proto message {pb_msg} cannot be cast to a TorchTensor')

get_comp_backend() staticmethod

Return the computational backend of the tensor

Source code in docarray/typing/tensor/torch_tensor.py
@staticmethod
def get_comp_backend() -> 'TorchCompBackend':
    """Return the computational backend of the tensor"""
    from docarray.computation.torch_backend import TorchCompBackend

    return TorchCompBackend()

new_empty(*args, **kwargs)

This method enables the deepcopy of TorchTensor by returning another instance of this subclass. If this function is not implemented, the deepcopy will throw an RuntimeError from Torch.

Source code in docarray/typing/tensor/torch_tensor.py
def new_empty(self, *args, **kwargs):
    """
    This method enables the deepcopy of `TorchTensor` by returning another instance of this subclass.
    If this function is not implemented, the deepcopy will throw an RuntimeError from Torch.
    """
    return self.__class__(*args, **kwargs)

to_protobuf()

Transform self into a NdArrayProto protobuf message

Source code in docarray/typing/tensor/torch_tensor.py
def to_protobuf(self) -> 'NdArrayProto':
    """
    Transform self into a `NdArrayProto` protobuf message
    """
    from docarray.proto import NdArrayProto

    nd_proto = NdArrayProto()

    value_np = self.detach().cpu().numpy()
    nd_proto.dense.buffer = value_np.tobytes()
    nd_proto.dense.ClearField('shape')
    nd_proto.dense.shape.extend(list(value_np.shape))
    nd_proto.dense.dtype = value_np.dtype.str

    return nd_proto

unwrap()

Return the original torch.Tensor without any memory copy.

The original view rest intact and is still a Document TorchTensor but the return object is a pure torch.Tensor but both object share the same memory layout.


from docarray.typing import TorchTensor
import torch
from pydantic import parse_obj_as


t = parse_obj_as(TorchTensor, torch.zeros(3, 224, 224))
# here t is a docarray TorchTensor
t2 = t.unwrap()
# here t2 is a pure torch.Tensor but t1 is still a Docarray TorchTensor
# But both share the same underlying memory

Returns:

Type Description
Tensor

a torch.Tensor

Source code in docarray/typing/tensor/torch_tensor.py
def unwrap(self) -> torch.Tensor:
    """
    Return the original `torch.Tensor` without any memory copy.

    The original view rest intact and is still a Document `TorchTensor`
    but the return object is a pure `torch.Tensor` but both object share
    the same memory layout.

    ---

    ```python
    from docarray.typing import TorchTensor
    import torch
    from pydantic import parse_obj_as


    t = parse_obj_as(TorchTensor, torch.zeros(3, 224, 224))
    # here t is a docarray TorchTensor
    t2 = t.unwrap()
    # here t2 is a pure torch.Tensor but t1 is still a Docarray TorchTensor
    # But both share the same underlying memory
    ```

    ---

    :return: a `torch.Tensor`
    """
    value = copy(self)  # as unintuitive as it sounds, this
    # does not do any relevant memory copying, just shallow
    # reference to the torch data
    value.__class__ = torch.Tensor  # type: ignore
    return value

docarray.typing.tensor.AnyTensor

Bases: AbstractTensor, Generic[ShapeT]

Represents a tensor object that can be used with TensorFlow, PyTorch, and NumPy type. !!! note: when doing type checking (mypy or pycharm type checker), this class will actually be replace by a Union of the three tensor types. You can reason about this class as if it was a Union.

from docarray import BaseDoc
from docarray.typing import AnyTensor


class MyTensorDoc(BaseDoc):
    tensor: AnyTensor


# Example usage with TensorFlow:
# import tensorflow as tf

# doc = MyTensorDoc(tensor=tf.zeros(1000, 2))

# Example usage with PyTorch:
import torch

doc = MyTensorDoc(tensor=torch.zeros(1000, 2))

# Example usage with NumPy:
import numpy as np

doc = MyTensorDoc(tensor=np.zeros((1000, 2)))
Source code in docarray/typing/tensor/tensor.py
class AnyTensor(AbstractTensor, Generic[ShapeT]):
    """
    Represents a tensor object that can be used with TensorFlow, PyTorch, and NumPy type.
    !!! note:
        when doing type checking (mypy or pycharm type checker), this class will actually be replace by a Union of the three
        tensor types. You can reason about this class as if it was a Union.

    ```python
    from docarray import BaseDoc
    from docarray.typing import AnyTensor


    class MyTensorDoc(BaseDoc):
        tensor: AnyTensor


    # Example usage with TensorFlow:
    # import tensorflow as tf

    # doc = MyTensorDoc(tensor=tf.zeros(1000, 2))

    # Example usage with PyTorch:
    import torch

    doc = MyTensorDoc(tensor=torch.zeros(1000, 2))

    # Example usage with NumPy:
    import numpy as np

    doc = MyTensorDoc(tensor=np.zeros((1000, 2)))
    ```
    """

    def __getitem__(self: T, item):
        pass

    def __setitem__(self, index, value):
        pass

    def __iter__(self):
        pass

    def __len__(self):
        pass

    @classmethod
    def _docarray_from_native(cls: Type[T], value: Any):
        raise RuntimeError(f'This method should not be called on {cls}.')

    @staticmethod
    def get_comp_backend():
        raise RuntimeError('This method should not be called on AnyTensor.')

    def to_protobuf(self):
        raise RuntimeError(f'This method should not be called on {self.__class__}.')

    def _docarray_to_json_compatible(self):
        raise RuntimeError(f'This method should not be called on {self.__class__}.')

    @classmethod
    def from_protobuf(cls: Type[T], pb_msg: T):
        raise RuntimeError(f'This method should not be called on {cls}.')

    @classmethod
    def _docarray_validate(
        cls: Type[T],
        value: Union[T, np.ndarray, Any],
    ):
        # Check for TorchTensor first, then TensorFlowTensor, then NdArray
        if torch_available:
            if isinstance(value, TorchTensor):
                return value
            elif isinstance(value, torch.Tensor):
                return TorchTensor._docarray_from_native(value)  # noqa
        if tf_available:
            if isinstance(value, TensorFlowTensor):
                return value
            elif isinstance(value, tf.Tensor):
                return TensorFlowTensor._docarray_from_native(value)  # noqa
        if jax_available:
            if isinstance(value, JaxArray):
                return value
            elif isinstance(value, jnp.ndarray):
                return JaxArray._docarray_from_native(value)  # noqa
        try:
            return NdArray._docarray_validate(value)
        except Exception as e:  # noqa
            print(e)
            pass
        raise TypeError(
            f"Expected one of [torch.Tensor, tensorflow.Tensor, numpy.ndarray] "
            f"compatible type, got {type(value)}"
        )

__docarray_validate_getitem__(item) classmethod

This method validates the input to AbstractTensor.__class_getitem__.

It is called at "class creation time", i.e. when a class is created with syntax of the form AnyTensor[shape].

The default implementation tries to cast any item to a tuple of ints. A subclass can override this method to implement custom validation logic.

The output of this is eventually passed to AbstractTensor.__docarray_validate_shape__ as its shape argument.

Raises ValueError if the input item does not pass validation.

Parameters:

Name Type Description Default
item Any

The item to validate, passed to __class_getitem__ (Tensor[item]).

required

Returns:

Type Description
Tuple[int]

The validated item == the target shape of this tensor.

Source code in docarray/typing/tensor/abstract_tensor.py
@classmethod
def __docarray_validate_getitem__(cls, item: Any) -> Tuple[int]:
    """This method validates the input to `AbstractTensor.__class_getitem__`.

    It is called at "class creation time",
    i.e. when a class is created with syntax of the form AnyTensor[shape].

    The default implementation tries to cast any `item` to a tuple of ints.
    A subclass can override this method to implement custom validation logic.

    The output of this is eventually passed to
    [`AbstractTensor.__docarray_validate_shape__`]
    [docarray.typing.tensor.abstract_tensor.AbstractTensor.__docarray_validate_shape__]
    as its `shape` argument.

    Raises `ValueError` if the input `item` does not pass validation.

    :param item: The item to validate, passed to `__class_getitem__` (`Tensor[item]`).
    :return: The validated item == the target shape of this tensor.
    """
    if isinstance(item, int):
        item = (item,)
    try:
        item = tuple(item)
    except TypeError:
        raise TypeError(f'{item} is not a valid tensor shape.')
    return item

__docarray_validate_shape__(t, shape) classmethod

Every tensor has to implement this method in order to enable syntax of the form AnyTensor[shape]. It is called when a tensor is assigned to a field of this type. i.e. when a tensor is passed to a Document field of type AnyTensor[shape].

The intended behaviour is as follows:

  • If the shape of t is equal to shape, return t.
  • If the shape of t is not equal to shape, but can be reshaped to shape, return t reshaped to shape.
  • If the shape of t is not equal to shape and cannot be reshaped to shape, raise a ValueError.

Parameters:

Name Type Description Default
t T

The tensor to validate.

required
shape Tuple[Union[int, str], ...]

The shape to validate against.

required

Returns:

Type Description
T

The validated tensor.

Source code in docarray/typing/tensor/abstract_tensor.py
@classmethod
def __docarray_validate_shape__(cls, t: T, shape: Tuple[Union[int, str], ...]) -> T:
    """Every tensor has to implement this method in order to
    enable syntax of the form AnyTensor[shape].
    It is called when a tensor is assigned to a field of this type.
    i.e. when a tensor is passed to a Document field of type AnyTensor[shape].

    The intended behaviour is as follows:

    - If the shape of `t` is equal to `shape`, return `t`.
    - If the shape of `t` is not equal to `shape`,
        but can be reshaped to `shape`, return `t` reshaped to `shape`.
    - If the shape of `t` is not equal to `shape`
        and cannot be reshaped to `shape`, raise a ValueError.

    :param t: The tensor to validate.
    :param shape: The shape to validate against.
    :return: The validated tensor.
    """
    comp_be = t.get_comp_backend()
    tshape = comp_be.shape(t)
    if tshape == shape:
        return t
    elif any(isinstance(dim, str) or dim == Ellipsis for dim in shape):
        ellipsis_occurrences = [
            pos for pos, dim in enumerate(shape) if dim == Ellipsis
        ]
        if ellipsis_occurrences:
            if len(ellipsis_occurrences) > 1:
                raise ValueError(
                    f'Cannot use Ellipsis (...) more than once for the shape {shape}'
                )
            ellipsis_pos = ellipsis_occurrences[0]
            # Calculate how many dimensions to add. Should be at least 1.
            dimensions_needed = max(len(tshape) - len(shape) + 1, 1)
            shape = (
                shape[:ellipsis_pos]
                + tuple(
                    f'__dim_var_{index}__' for index in range(dimensions_needed)
                )
                + shape[ellipsis_pos + 1 :]
            )

        if len(tshape) != len(shape):
            raise ValueError(
                f'Tensor shape mismatch. Expected {shape}, got {tshape}'
            )
        known_dims: Dict[str, int] = {}
        for tdim, dim in zip(tshape, shape):
            if isinstance(dim, int) and tdim != dim:
                raise ValueError(
                    f'Tensor shape mismatch. Expected {shape}, got {tshape}'
                )
            elif isinstance(dim, str):
                if dim in known_dims and known_dims[dim] != tdim:
                    raise ValueError(
                        f'Tensor shape mismatch. Expected {shape}, got {tshape}'
                    )
                else:
                    known_dims[dim] = tdim
        else:
            return t
    else:
        shape = cast(Tuple[int], shape)
        warnings.warn(
            f'Tensor shape mismatch. Reshaping tensor '
            f'of shape {tshape} to shape {shape}'
        )
        try:
            value = cls._docarray_from_native(comp_be.reshape(t, shape))
            return cast(T, value)
        except RuntimeError:
            raise ValueError(
                f'Cannot reshape tensor of shape {tshape} to shape {shape}'
            )

unwrap()

Return the native tensor object that this DocList tensor wraps.

Source code in docarray/typing/tensor/abstract_tensor.py
def unwrap(self):
    """Return the native tensor object that this DocList tensor wraps."""