Source code for docarray.array.storage.opensearch.seqlike
from typing import Iterable, Iterator, Union, TYPE_CHECKING, List, Dict
from docarray.array.storage.base.seqlike import BaseSequenceLikeMixin
import warnings
# if TYPE_CHECKING:
from docarray import Document
[docs]class SequenceLikeMixin(BaseSequenceLikeMixin):
"""Implement sequence-like methods for DocumentArray with OpenSearch as storage"""
def __eq__(self, other):
"""Compare this object to the other, returns True if and only if other
as the same type as self and other has the same meta information
:param other: the other object to check for equality
:return: ``True`` if other is equal to self
"""
# two DAW are considered as the same if they have the same client meta data
return (
type(self) is type(other)
and self._client.get_meta() == other._client.get_meta()
and self._config == other._config
)
def __len__(self):
"""Return the length of :class:`DocumentArray` that uses OpenSearch as storage
:return: the length of this :class:`DocumentArrayOpenSearch` object
"""
try:
return self._client.count(index=self._config.index_name)["count"]
except:
return 0
def __contains__(self, x: Union[str, 'Document']):
"""Check if ``x`` is contained in this :class:`DocumentArray` with OpenSearch storage
:param x: the id of the document to check or the document object itself
:return: True if ``x`` is contained in self
"""
if isinstance(x, str):
return self._doc_id_exists(x)
elif isinstance(x, Document):
return self._doc_id_exists(x.id)
else:
return False
def __repr__(self):
"""Return the string representation of :class:`DocumentArrayOpenSearch` object
:return: string representation of this object
"""
return f'<{self.__class__.__name__} (length={len(self)}) at {id(self)}>'
@staticmethod
def _parse_index_ids_from_bulk_info(
accumulated_info: List[Dict],
) -> Dict[str, List[int]]:
"""Parse ids from bulk info of failed send request to OpenSearch operation
:param accumulated_info: accumulated info of failed operation
:return: dict containing failed index ids of each operation type
"""
parsed_ids = {}
for info in accumulated_info:
for _op_type in info.keys():
if '_id' in info[_op_type]:
if _op_type not in parsed_ids:
parsed_ids[_op_type] = []
parsed_ids[_op_type].append(info[_op_type]['_id'])
return parsed_ids
def _upload_batch(self, docs: Iterable['Document'], **kwargs) -> List[int]:
requests = [self._document_to_opensearch_request(doc) for doc in docs]
accumulated_info = self._send_requests(requests, **kwargs)
self._refresh(self._config.index_name)
successful_ids = self._parse_index_ids_from_bulk_info(accumulated_info)
if 'index' not in successful_ids:
return []
return successful_ids['index']
def _extend(self, docs: Iterable['Document'], **kwargs):
docs = list(docs)
successful_indexed_ids = self._upload_batch(docs, **kwargs)
if self._list_like:
self._offset2ids.extend(
[
_id
for _id in successful_indexed_ids
if _id not in self._offset2ids.ids
]
)
if len(successful_indexed_ids) != len(docs):
doc_ids = [doc.id for doc in docs]
failed_index_ids = set(doc_ids) - set(successful_indexed_ids)
err_msg = f'fail to add Documents with ids: {failed_index_ids}'
warnings.warn(err_msg)
raise IndexError(err_msg)