Source code for docarray.array.mixins.reduce
from typing import List, TYPE_CHECKING
if TYPE_CHECKING: # pragma: no cover
from docarray.typing import T, Document
def _reduce_doc_props(doc1: 'Document', doc2: 'Document'):
doc1_fields = set(doc1.non_empty_fields)
doc2_fields = set(doc2.non_empty_fields)
# update only fields that are set in doc2 and not set in doc1
fields = doc2_fields - doc1_fields
fields = fields - {'matches', 'chunks', 'id', 'parent_id'}
for field in fields:
setattr(doc1, field, getattr(doc2, field))
[docs]class ReduceMixin:
"""
A mixin that provides reducing logic for :class:`DocumentArray`
Reducing 2 or more DocumentArrays consists in merging all Documents into the same DocumentArray.
If a Document belongs to 2 or more DocumentArrays, it is added once and data attributes are merged with priority to
the Document belonging to the left-most DocumentArray. Matches and chunks are also reduced in the same way.
Reduction is applied to all levels of DocumentArrays, that is, from root Documents to all their chunk and match
children.
"""
[docs] def reduce(self: 'T', other: 'T') -> 'T':
"""
Reduces other and the current DocumentArray into one DocumentArray in-place. Changes are applied to the current
DocumentArray.
Reducing 2 DocumentArrays consists in adding Documents in the second DocumentArray to the first DocumentArray
if they do not exist. If a Document exists in both DocumentArrays, the data properties are merged with priority
to the first Document (that is, to the current DocumentArray's Document). The matches and chunks are also
reduced in the same way.
:param other: DocumentArray
:return: DocumentArray
"""
for doc in other:
if doc.id in self:
self._reduce_doc(self[doc.id], doc)
else:
self.append(doc)
return self
@staticmethod
def _reduce_doc(doc1: 'Document', doc2: 'Document'):
"""
Reduces doc1 and doc2 into one Document in-place. Changes are applied to doc1.
Reducing 2 Documents consists in setting data properties of the second Document to the first Document if they
are empty (that is, priority to the left-most Document) and reducing the matches and the chunks of both
documents.
Non-data properties are ignored.
Reduction of matches and chunks relies on :class:`DocumentArray`.:method:`reduce`.
:param doc1: first Document
:param doc2: second Document
"""
_reduce_doc_props(doc1, doc2)
if len(doc2.matches) > 0:
doc1.matches.reduce(doc2.matches)
if len(doc2.chunks) > 0:
doc1.chunks.reduce(doc2.chunks)
[docs] def reduce_all(self: 'T', others: List['T']) -> 'T':
"""
Reduces a list of DocumentArrays and this DocumentArray into one DocumentArray. Changes are applied to this
DocumentArray in-place.
Reduction consists in reducing this DocumentArray with every DocumentArray in `others` sequentially using
:class:`DocumentArray`.:method:`reduce`.
The resulting DocumentArray contains Documents of all DocumentArrays.
If a Document exists in many DocumentArrays, data properties are merged with priority to the left-most
DocumentArrays (that is, if a data attribute is set in a Document belonging to many DocumentArrays, the
attribute value of the left-most DocumentArray is kept).
Matches and chunks of a Document belonging to many DocumentArrays are also reduced in the same way.
Other non-data properties are ignored.
.. note::
- Matches are not kept in a sorted order when they are reduced. You might want to re-sort them in a later
step.
- The final result depends on the order of DocumentArrays when applying reduction.
:param others: List of DocumentArrays to be reduced
:return: the resulting DocumentArray
"""
for da in others:
self.reduce(da)
return self