from typing import overload, TYPE_CHECKING, Union, Callable, Optional, Tuple
if TYPE_CHECKING: # pragma: no cover
from docarray import DocumentArray
from docarray.typing import AnyDNN, T, ArrayType
import numpy as np
[docs]class SingletonSugarMixin:
"""Provide sugary syntax for :class:`Document` by inheriting methods from :class:`DocumentArray`"""
# overload_inject_start_match
@overload
def match(
self: 'T',
darray: 'DocumentArray',
metric: Union[
str, Callable[['ArrayType', 'ArrayType'], 'np.ndarray']
] = 'cosine',
limit: Optional[Union[int, float]] = 20,
normalization: Optional[Tuple[float, float]] = None,
metric_name: Optional[str] = None,
batch_size: Optional[int] = None,
exclude_self: bool = False,
only_id: bool = False,
use_scipy: bool = False,
num_worker: Optional[int] = 1,
) -> 'T':
"""Matching the current Document against a set of Documents.
The result will be stored in :attr:`.matches`.
.. note::
When you want to match a set Documents (let's call it set `A`) against another set of Documents (set `B`),
where you want to find for each element in `A` what are its nearest neighbours in `B`.
Then you need :meth:`DocumentArray.match`
:param darray: the other DocumentArray to match against
:param metric: the distance metric
:param limit: the maximum number of matches, when not given defaults to 20.
:param normalization: a tuple [a, b] to be used with min-max normalization,
the min distance will be rescaled to `a`, the max distance will be rescaled to `b`
all values will be rescaled into range `[a, b]`.
:param metric_name: if provided, then match result will be marked with this string.
:param batch_size: if provided, then ``darray`` is loaded in batches, where each of them is at most ``batch_size``
elements. When `darray` is big, this can significantly speedup the computation.
:param exclude_self: if set, Documents in ``darray`` with same ``id`` as the left-hand values will not be
considered as matches.
:param only_id: if set, then returning matches will only contain ``id``
:param use_scipy: if set, use ``scipy`` as the computation backend. Note, ``scipy`` does not support distance
on sparse matrix.
:param num_worker: the number of parallel workers. If not given, then the number of CPUs in the system will be used.
.. note::
This argument is only effective when ``batch_size`` is set.
"""
# overload_inject_end_match
[docs] def match(self: 'T', *args, **kwargs) -> 'T':
# implementation_stub_inject_start_match
"""Matching the current Document against a set of Documents.
:param darray: the other DocumentArray to match against
:param metric: the distance metric
:param limit: the maximum number of matches, when not given defaults to 20.
:param normalization: a tuple [a, b] to be used with min-max normalization,
the min distance will be rescaled to `a`, the max distance will be rescaled to `b`
all values will be rescaled into range `[a, b]`.
:param metric_name: if provided, then match result will be marked with this string.
:param batch_size: if provided, then ``darray`` is loaded in batches, where each of them is at most ``batch_size``
elements. When `darray` is big, this can significantly speedup the computation.
:param exclude_self: if set, Documents in ``darray`` with same ``id`` as the left-hand values will not be
considered as matches.
:param only_id: if set, then returning matches will only contain ``id``
:param use_scipy: if set, use ``scipy`` as the computation backend. Note, ``scipy`` does not support distance
on sparse matrix.
:param num_worker: the number of parallel workers. If not given, then the number of CPUs in the system will be used.
.. note::
This argument is only effective when ``batch_size`` is set.
:return: itself after modification
.. # noqa: DAR102
.. # noqa: DAR202
.. # noqa: DAR101
.. # noqa: DAR003
"""
# implementation_stub_inject_end_match
from docarray import DocumentArray
_tmp = DocumentArray(self)
_tmp.match(*args, **kwargs)
return self
@overload
def embed(
self: 'T',
embed_model: 'AnyDNN',
device: str = 'cpu',
batch_size: int = 256,
) -> 'T':
"""Fill the embedding of Documents inplace by using `embed_model`
:param embed_model: the embedding model written in Keras/Pytorch/Paddle
:param device: the computational device for `embed_model`, can be either
`cpu` or `cuda`.
:param batch_size: number of Documents in a batch for embedding
"""
[docs] def embed(self: 'T', *args, **kwargs) -> 'T':
"""Fill the embedding of Documents inplace by using `embed_model`
:param embed_model: the embedding model written in Keras/Pytorch/Paddle
:param device: the computational device for `embed_model`, can be either
`cpu` or `cuda`.
:param batch_size: number of Documents in a batch for embedding
"""
from docarray import DocumentArray
_tmp = DocumentArray(self)
_tmp.embed(*args, **kwargs)
return self
[docs] def post(self: 'T', *args, **kwargs) -> 'T':
"""Posting itself to a remote Flow/Sandbox and get the modified DocumentArray back
:param host: a host string. Can be one of the following:
- `grpc://192.168.0.123:8080/endpoint`
- `ws://192.168.0.123:8080/endpoint`
- `http://192.168.0.123:8080/endpoint`
- `jinahub://Hello/endpoint`
- `jinahub+docker://Hello/endpoint`
- `jinahub+docker://Hello/v0.0.1/endpoint`
- `jinahub+docker://Hello/latest/endpoint`
- `jinahub+sandbox://Hello/endpoint`
:param show_progress: if to show a progressbar
:param batch_size: number of Document on each request
:param parameters: parameters to send in the request
:return: the new DocumentArray returned from remote
"""
from docarray import DocumentArray
_tmp = DocumentArray(self)
return _tmp.post(*args, **kwargs)[0]