Source code for docarray.array.storage.opensearch.seqlike

from typing import Iterable, Iterator, Union, TYPE_CHECKING, List, Dict
from docarray.array.storage.base.seqlike import BaseSequenceLikeMixin
import warnings

# if TYPE_CHECKING:
from docarray import Document


[docs]class SequenceLikeMixin(BaseSequenceLikeMixin):
    """Implement sequence-like methods for DocumentArray with OpenSearch as storage"""

    def __eq__(self, other):
        """Compare this object to the other, returns True if and only if other
        as the same type as self and other has the same meta information
        :param other: the other object to check for equality
        :return: ``True`` if other is equal to self
        """
        # two DAW are considered as the same if they have the same client meta data
        return (
            type(self) is type(other)
            and self._client.get_meta() == other._client.get_meta()
            and self._config == other._config
        )

    def __len__(self):
        """Return the length of :class:`DocumentArray` that uses OpenSearch as storage
        :return: the length of this :class:`DocumentArrayOpenSearch` object
        """
        try:
            return self._client.count(index=self._config.index_name)["count"]
        except:
            return 0

    def __contains__(self, x: Union[str, 'Document']):
        """Check if ``x`` is contained in this :class:`DocumentArray` with OpenSearch storage
        :param x: the id of the document to check or the document object itself
        :return: True if ``x`` is contained in self
        """
        if isinstance(x, str):
            return self._doc_id_exists(x)
        elif isinstance(x, Document):
            return self._doc_id_exists(x.id)
        else:
            return False

    def __repr__(self):
        """Return the string representation of :class:`DocumentArrayOpenSearch` object
        :return: string representation of this object
        """
        return f'<{self.__class__.__name__} (length={len(self)}) at {id(self)}>'

    @staticmethod
    def _parse_index_ids_from_bulk_info(
        accumulated_info: List[Dict],
    ) -> Dict[str, List[int]]:
        """Parse ids from bulk info of failed send request to OpenSearch operation
        :param accumulated_info: accumulated info of failed operation
        :return: dict containing failed index ids of each operation type
        """

        parsed_ids = {}

        for info in accumulated_info:
            for _op_type in info.keys():
                if '_id' in info[_op_type]:
                    if _op_type not in parsed_ids:
                        parsed_ids[_op_type] = []

                    parsed_ids[_op_type].append(info[_op_type]['_id'])

        return parsed_ids

    def _upload_batch(self, docs: Iterable['Document'], **kwargs) -> List[int]:
        requests = [self._document_to_opensearch_request(doc) for doc in docs]
        accumulated_info = self._send_requests(requests, **kwargs)
        self._refresh(self._config.index_name)

        successful_ids = self._parse_index_ids_from_bulk_info(accumulated_info)
        if 'index' not in successful_ids:
            return []

        return successful_ids['index']

    def _extend(self, docs: Iterable['Document'], **kwargs):
        docs = list(docs)
        successful_indexed_ids = self._upload_batch(docs, **kwargs)
        if self._list_like:
            self._offset2ids.extend(
                [
                    _id
                    for _id in successful_indexed_ids
                    if _id not in self._offset2ids.ids
                ]
            )

        if len(successful_indexed_ids) != len(docs):
            doc_ids = [doc.id for doc in docs]
            failed_index_ids = set(doc_ids) - set(successful_indexed_ids)

            err_msg = f'fail to add Documents with ids: {failed_index_ids}'
            warnings.warn(err_msg)
            raise IndexError(err_msg)