Source code for docarray.document.mixins.sugar

from typing import overload, TYPE_CHECKING, Union, Callable, Optional, Tuple

if TYPE_CHECKING:  # pragma: no cover
    from docarray import DocumentArray
    from docarray.typing import AnyDNN, T, ArrayType

    import numpy as np


[docs]class SingletonSugarMixin: """Provide sugary syntax for :class:`Document` by inheriting methods from :class:`DocumentArray`""" # overload_inject_start_match @overload def match( self: 'T', darray: 'DocumentArray', metric: Union[ str, Callable[['ArrayType', 'ArrayType'], 'np.ndarray'] ] = 'cosine', limit: Optional[Union[int, float]] = 20, normalization: Optional[Tuple[float, float]] = None, metric_name: Optional[str] = None, batch_size: Optional[int] = None, exclude_self: bool = False, only_id: bool = False, use_scipy: bool = False, num_worker: Optional[int] = 1, ) -> 'T': """Matching the current Document against a set of Documents. The result will be stored in :attr:`.matches`. .. note:: When you want to match a set Documents (let's call it set `A`) against another set of Documents (set `B`), where you want to find for each element in `A` what are its nearest neighbours in `B`. Then you need :meth:`DocumentArray.match` :param darray: the other DocumentArray to match against :param metric: the distance metric :param limit: the maximum number of matches, when not given defaults to 20. :param normalization: a tuple [a, b] to be used with min-max normalization, the min distance will be rescaled to `a`, the max distance will be rescaled to `b` all values will be rescaled into range `[a, b]`. :param metric_name: if provided, then match result will be marked with this string. :param batch_size: if provided, then ``darray`` is loaded in batches, where each of them is at most ``batch_size`` elements. When `darray` is big, this can significantly speedup the computation. :param exclude_self: if set, Documents in ``darray`` with same ``id`` as the left-hand values will not be considered as matches. :param only_id: if set, then returning matches will only contain ``id`` :param use_scipy: if set, use ``scipy`` as the computation backend. Note, ``scipy`` does not support distance on sparse matrix. :param num_worker: the number of parallel workers. If not given, then the number of CPUs in the system will be used. .. note:: This argument is only effective when ``batch_size`` is set. """ # overload_inject_end_match
[docs] def match(self: 'T', *args, **kwargs) -> 'T': # implementation_stub_inject_start_match """Matching the current Document against a set of Documents. :param darray: the other DocumentArray to match against :param metric: the distance metric :param limit: the maximum number of matches, when not given defaults to 20. :param normalization: a tuple [a, b] to be used with min-max normalization, the min distance will be rescaled to `a`, the max distance will be rescaled to `b` all values will be rescaled into range `[a, b]`. :param metric_name: if provided, then match result will be marked with this string. :param batch_size: if provided, then ``darray`` is loaded in batches, where each of them is at most ``batch_size`` elements. When `darray` is big, this can significantly speedup the computation. :param exclude_self: if set, Documents in ``darray`` with same ``id`` as the left-hand values will not be considered as matches. :param only_id: if set, then returning matches will only contain ``id`` :param use_scipy: if set, use ``scipy`` as the computation backend. Note, ``scipy`` does not support distance on sparse matrix. :param num_worker: the number of parallel workers. If not given, then the number of CPUs in the system will be used. .. note:: This argument is only effective when ``batch_size`` is set. :return: itself after modification .. # noqa: DAR102 .. # noqa: DAR202 .. # noqa: DAR101 .. # noqa: DAR003 """ # implementation_stub_inject_end_match from docarray import DocumentArray _tmp = DocumentArray(self) _tmp.match(*args, **kwargs) return self
@overload def embed( self: 'T', embed_model: 'AnyDNN', device: str = 'cpu', batch_size: int = 256, ) -> 'T': """Fill the embedding of Documents inplace by using `embed_model` :param embed_model: the embedding model written in Keras/Pytorch/Paddle :param device: the computational device for `embed_model`, can be either `cpu` or `cuda`. :param batch_size: number of Documents in a batch for embedding """
[docs] def embed(self: 'T', *args, **kwargs) -> 'T': """Fill the embedding of Documents inplace by using `embed_model` :param embed_model: the embedding model written in Keras/Pytorch/Paddle :param device: the computational device for `embed_model`, can be either `cpu` or `cuda`. :param batch_size: number of Documents in a batch for embedding """ from docarray import DocumentArray _tmp = DocumentArray(self) _tmp.embed(*args, **kwargs) return self
[docs] def post(self: 'T', *args, **kwargs) -> 'T': """Posting itself to a remote Flow/Sandbox and get the modified DocumentArray back :param host: a host string. Can be one of the following: - `grpc://192.168.0.123:8080/endpoint` - `ws://192.168.0.123:8080/endpoint` - `http://192.168.0.123:8080/endpoint` - `jinahub://Hello/endpoint` - `jinahub+docker://Hello/endpoint` - `jinahub+docker://Hello/v0.0.1/endpoint` - `jinahub+docker://Hello/latest/endpoint` - `jinahub+sandbox://Hello/endpoint` :param show_progress: if to show a progressbar :param batch_size: number of Document on each request :param parameters: parameters to send in the request :return: the new DocumentArray returned from remote """ from docarray import DocumentArray _tmp = DocumentArray(self) return _tmp.post(*args, **kwargs)[0]