Source code for docarray.document.mixins.pydantic
import base64
from collections import defaultdict
from typing import TYPE_CHECKING, Type
import numpy as np
if TYPE_CHECKING: # pragma: no cover
from pydantic import BaseModel
from docarray.typing import T
from docarray.document.pydantic_model import PydanticDocument
[docs]class PydanticMixin:
"""Provide helper functions to convert to/from a Pydantic model"""
[docs] @classmethod
def get_json_schema(cls, indent: int = 2) -> str:
"""Return a JSON Schema of Document class."""
from docarray.document.pydantic_model import PydanticDocument as DP
from pydantic import schema_json_of
return schema_json_of(DP, title='Document Schema', indent=indent)
[docs] def to_pydantic_model(self) -> 'PydanticDocument':
"""Convert a Document object into a Pydantic model."""
from docarray.document.pydantic_model import PydanticDocument as DP
_p_dict = {}
for f in self.non_empty_fields:
v = getattr(self, f)
if f in ('matches', 'chunks'):
_p_dict[f] = v.to_pydantic_model()
elif f in ('scores', 'evaluations'):
_p_dict[f] = {k: v.to_dict() for k, v in v.items()}
elif f == 'blob':
_p_dict[f] = base64.b64encode(v).decode('utf8')
else:
_p_dict[f] = v
return DP(**_p_dict)
[docs] @classmethod
def from_pydantic_model(cls: Type['T'], model: 'BaseModel') -> 'T':
"""Build a Document object from a Pydantic model
:param model: the pydantic data model object that represents a Document
:return: a Document object
"""
from docarray import Document
fields = {}
_field_chunks, _field_matches = None, None
if model.chunks:
_field_chunks = [Document.from_pydantic_model(d) for d in model.chunks]
if model.matches:
_field_matches = [Document.from_pydantic_model(d) for d in model.matches]
for (field, value) in model.dict(
exclude_none=True, exclude={'chunks', 'matches'}
).items():
f_name = field
if f_name == 'scores' or f_name == 'evaluations':
from docarray.score import NamedScore
fields[f_name] = defaultdict(NamedScore)
for k, v in value.items():
fields[f_name][k] = NamedScore(v)
elif f_name == 'embedding' or f_name == 'tensor':
fields[f_name] = np.array(value)
elif f_name == 'blob':
fields[f_name] = base64.b64decode(value)
else:
fields[f_name] = value
d = Document(**fields)
if _field_chunks:
d.chunks = _field_chunks
if _field_matches:
d.matches = _field_matches
return d