from contextlib import ExitStack
from typing import Optional, overload, TYPE_CHECKING, Dict, Union
from docarray.array.base import BaseDocumentArray
from docarray.array.mixins import AllMixins
if TYPE_CHECKING: # pragma: no cover
from docarray.typing import DocumentArraySourceType
from docarray.array.memory import DocumentArrayInMemory
from docarray.array.sqlite import DocumentArraySqlite
from docarray.array.annlite import DocumentArrayAnnlite
from docarray.array.weaviate import DocumentArrayWeaviate
from docarray.array.elastic import DocumentArrayElastic
from docarray.array.redis import DocumentArrayRedis
from docarray.array.milvus import DocumentArrayMilvus
from docarray.array.opensearch import DocumentArrayOpenSearch
from docarray.array.storage.sqlite import SqliteConfig
from docarray.array.storage.annlite import AnnliteConfig
from docarray.array.storage.weaviate import WeaviateConfig
from docarray.array.storage.elastic import ElasticConfig
from docarray.array.storage.redis import RedisConfig
from docarray.array.storage.milvus import MilvusConfig
from docarray.array.storage.opensearch import OpenSearchConfig
[docs]class DocumentArray(AllMixins, BaseDocumentArray):
"""
DocumentArray is a list-like container of :class:`~docarray.Document` objects.
A DocumentArray can be used to store, embed, and retrieve :class:`~docarray.Document` objects.
.. code-block:: python
from docarray import Document, DocumentArray
da = DocumentArray(
[Document(text='The cake is a lie'), Document(text='Do a barrel roll!')]
)
da.apply(Document.embed_feature_hashing)
query = Document(text='Can i have some cake?').embed_feature_hashing()
query.match(da, metric='jaccard', use_scipy=True)
print(query.matches[:, ('text', 'scores__jaccard__value')])
.. code-block:: bash
[['The cake is a lie', 'Do a barrel roll!'], [0.9, 1.0]]
A DocumentArray can also :ref:`embed its contents using a neural network <embed-via-model>`,
process them using an :ref:`external Flow or Executor <da-post>`, and persist Documents in a :ref:`Document Store <doc-store>` for
fast vector search:
.. code-block:: python
from docarray import Document, DocumentArray
import numpy as np
n_dim = 3
metric = 'Euclidean'
# initialize a DocumentArray with ANNLiter Document Store
da = DocumentArray(
storage='annlite',
config={'n_dim': n_dim, 'columns': [('price', 'float')], 'metric': metric},
)
# add Documents to the DocumentArray
with da:
da.extend(
[
Document(id=f'r{i}', embedding=i * np.ones(n_dim), tags={'price': i})
for i in range(10)
]
)
# perform vector search
np_query = np.ones(n_dim) * 8
results = da.find(np_query)
.. seealso::
For further details, see our :ref:`user guide <documentarray>`.
"""
@overload
def __new__(
cls,
_docs: Optional['DocumentArraySourceType'] = None,
copy: bool = False,
subindex_configs: Optional[Dict[str, 'None']] = None,
) -> 'DocumentArrayInMemory':
"""Create an in-memory DocumentArray object."""
...
@overload
def __new__(
cls,
_docs: Optional['DocumentArraySourceType'] = None,
storage: str = 'sqlite',
config: Optional[Union['SqliteConfig', Dict]] = None,
subindex_configs: Optional[Dict[str, Dict]] = None,
) -> 'DocumentArraySqlite':
"""Create a SQLite-powered DocumentArray object."""
...
@overload
def __new__(
cls,
_docs: Optional['DocumentArraySourceType'] = None,
storage: str = 'weaviate',
config: Optional[Union['WeaviateConfig', Dict]] = None,
subindex_configs: Optional[Dict[str, Dict]] = None,
) -> 'DocumentArrayWeaviate':
"""Create a Weaviate-powered DocumentArray object."""
...
@overload
def __new__(
cls,
_docs: Optional['DocumentArraySourceType'] = None,
storage: str = 'annlite',
config: Optional[Union['AnnliteConfig', Dict]] = None,
subindex_configs: Optional[Dict[str, Dict]] = None,
) -> 'DocumentArrayAnnlite':
"""Create a AnnLite-powered DocumentArray object."""
...
@overload
def __new__(
cls,
_docs: Optional['DocumentArraySourceType'] = None,
storage: str = 'elasticsearch',
config: Optional[Union['ElasticConfig', Dict]] = None,
subindex_configs: Optional[Dict[str, Dict]] = None,
) -> 'DocumentArrayElastic':
"""Create a Elastic-powered DocumentArray object."""
...
@overload
def __new__(
cls,
_docs: Optional['DocumentArraySourceType'] = None,
storage: str = 'redis',
config: Optional[Union['RedisConfig', Dict]] = None,
) -> 'DocumentArrayRedis':
"""Create a Redis-powered DocumentArray object."""
...
@overload
def __new__(
cls,
_docs: Optional['DocumentArraySourceType'] = None,
storage: str = 'milvus',
config: Optional[Union['MilvusConfig', Dict]] = None,
) -> 'DocumentArrayMilvus':
"""Create a Milvus-powered DocumentArray object."""
@overload
def __new__(
cls,
_docs: Optional['DocumentArraySourceType'] = None,
storage: str = 'opensearch',
config: Optional[Union['OpenSearchConfig', Dict]] = None,
) -> 'DocumentArrayOpenSearch':
"""Create an OpenSearch-powered DocumentArray object."""
...
def __enter__(self):
self._exit_stack = ExitStack()
# Ensure that we sync the data to the storage backend when exiting the context manager
self._exit_stack.callback(self.sync)
# Enter (and then exit) context of all subindices
if getattr(self, '_subindices', None):
for selector, da in self._subindices.items():
self._exit_stack.enter_context(da)
return self
def __exit__(self, *args, **kwargs):
# Trigger all __exit__()s and callbacks added in self.__enter__()
self._exit_stack.close()
def __new__(cls, *args, storage: str = 'memory', **kwargs):
if cls is DocumentArray:
if storage == 'memory':
from docarray.array.memory import DocumentArrayInMemory
instance = super().__new__(DocumentArrayInMemory)
elif storage == 'sqlite':
from docarray.array.sqlite import DocumentArraySqlite
instance = super().__new__(DocumentArraySqlite)
elif storage == 'annlite':
from docarray.array.annlite import DocumentArrayAnnlite
instance = super().__new__(DocumentArrayAnnlite)
elif storage == 'weaviate':
from docarray.array.weaviate import DocumentArrayWeaviate
instance = super().__new__(DocumentArrayWeaviate)
elif storage == 'qdrant':
from docarray.array.qdrant import DocumentArrayQdrant
instance = super().__new__(DocumentArrayQdrant)
elif storage == 'elasticsearch':
from docarray.array.elastic import DocumentArrayElastic
instance = super().__new__(DocumentArrayElastic)
elif storage == 'redis':
from .redis import DocumentArrayRedis
instance = super().__new__(DocumentArrayRedis)
elif storage == 'milvus':
from .milvus import DocumentArrayMilvus
instance = super().__new__(DocumentArrayMilvus)
elif storage == 'opensearch':
from docarray.array.opensearch import DocumentArrayOpenSearch
instance = super().__new__(DocumentArrayOpenSearch)
else:
raise ValueError(f'storage=`{storage}` is not supported.')
else:
instance = super().__new__(cls)
return instance