Source code for docarray.array.mixins.getitem

import itertools
from typing import (
    TYPE_CHECKING,
    Union,
    Sequence,
    overload,
    Any,
    List,
)

import numpy as np

from docarray import Document
from docarray.helper import typename

if TYPE_CHECKING:  # pragma: no cover
    from docarray.typing import (
        DocumentArrayIndexType,
        DocumentArraySingletonIndexType,
        DocumentArrayMultipleIndexType,
        DocumentArrayMultipleAttributeType,
        DocumentArraySingleAttributeType,
    )
    from docarray import DocumentArray


[docs]class GetItemMixin: """Provide helper functions to enable advance indexing in `__getitem__`""" @overload def __getitem__(self, index: 'DocumentArraySingletonIndexType') -> 'Document': ... @overload def __getitem__(self, index: 'DocumentArrayMultipleIndexType') -> 'DocumentArray': ... @overload def __getitem__(self, index: 'DocumentArraySingleAttributeType') -> List[Any]: ... @overload def __getitem__( self, index: 'DocumentArrayMultipleAttributeType' ) -> List[List[Any]]: ... def __getitem__( self, index: 'DocumentArrayIndexType' ) -> Union['Document', 'DocumentArray']: if isinstance(index, (int, np.generic)) and not isinstance(index, bool): return self._get_doc_by_offset(int(index)) elif isinstance(index, str): is_access_path = index.startswith('@') if ( is_access_path and getattr(self, '_subindices', None) is not None and index in self._subindices ): return self._subindices[index] elif is_access_path: return self.traverse_flat(index[1:]) else: return self._get_doc_by_id(index) elif isinstance(index, slice): from docarray import DocumentArray return DocumentArray(self._get_docs_by_slice(index)) elif index is Ellipsis: return self.flatten() elif isinstance(index, Sequence): from docarray import DocumentArray if ( isinstance(index, tuple) and len(index) == 2 and ( isinstance(index[0], (slice, Sequence, str, int)) or index[0] is Ellipsis ) and isinstance(index[1], (str, Sequence)) ): # TODO: add support for cases such as da[1, ['text', 'id']]? if isinstance(index[0], (str, int)) and isinstance(index[1], str): # ambiguity only comes from the second string if index[1] in self: return DocumentArray([self[index[0]], self[index[1]]]) else: _docs = self[index[0]] if not _docs: return [] if isinstance(_docs, Document): return getattr(_docs, index[1]) return _docs._get_attributes(index[1]) elif isinstance(index[0], (slice, Sequence)) or index[0] is Ellipsis: _docs = self[index[0]] _attrs = index[1] if isinstance(_attrs, str): _attrs = (index[1],) return _docs._get_attributes(*_attrs) elif isinstance(index[0], bool): return DocumentArray(itertools.compress(self, index)) elif isinstance(index[0], int): return DocumentArray(self._get_docs_by_offsets(index)) elif isinstance(index[0], str): return DocumentArray(self._get_docs_by_ids(index)) elif isinstance(index, np.ndarray): index = index.squeeze() if index.ndim == 1: return self[index.tolist()] else: raise IndexError( f'When using np.ndarray as index, its `ndim` must =1. However, receiving ndim={index.ndim}' ) raise IndexError(f'Unsupported index type {typename(index)}: {index}')