from typing import TYPE_CHECKING
from docarray.math.ndarray import get_array_type
if TYPE_CHECKING: # pragma: no cover
from docarray.typing import ArrayType
import numpy as np
[docs]def pdist(
x_mat: 'ArrayType',
metric: str,
) -> 'np.ndarray':
"""Computes Pairwise distances between observations in n-dimensional space.
:param x_mat: Union['np.ndarray','scipy.sparse.csr_matrix', 'scipy.sparse.coo_matrix'] of ndim 2
:param metric: string describing the metric type
:return: np.ndarray of ndim 2
"""
return cdist(x_mat, x_mat, metric)
[docs]def cdist(
x_mat: 'ArrayType', y_mat: 'ArrayType', metric: str, device: str = 'cpu'
) -> 'np.ndarray':
"""Computes the pairwise distance between each row of X and each row on Y according to `metric`.
- Let `n_x = x_mat.shape[0]`
- Let `n_y = y_mat.shape[0]`
- Returns a matrix `dist` of shape `(n_x, n_y)` with `dist[i,j] = metric(x_mat[i], y_mat[j])`.
:param x_mat: numpy or scipy array of ndim 2
:param y_mat: numpy or scipy array of ndim 2
:param metric: string describing the metric type
:param device: the computational device, can be either `cpu` or `cuda`.
:return: np.ndarray of ndim 2
"""
x_type = get_array_type(x_mat)
y_type = get_array_type(y_mat)
if x_type != y_type:
raise ValueError(
f'The type of your left-hand side is {x_type}, whereas your right-hand side is {y_type}. '
f'`.cdist()` requires left must be the same type as right.'
)
framework, is_sparse = get_array_type(x_mat)
dists = None
if metric == 'cosine':
if framework == 'scipy' and is_sparse:
from docarray.math.distance.numpy import sparse_cosine
dists = sparse_cosine(x_mat, y_mat)
elif framework == 'numpy':
from docarray.math.distance.numpy import cosine
dists = cosine(x_mat, y_mat)
elif framework == 'tensorflow':
from docarray.math.distance.tensorflow import cosine
dists = cosine(x_mat, y_mat, device=device)
elif framework == 'torch':
from docarray.math.distance.torch import cosine
dists = cosine(x_mat, y_mat, device=device)
elif framework == 'paddle':
from docarray.math.distance.paddle import cosine
dists = cosine(x_mat, y_mat, device=device)
elif metric == 'sqeuclidean':
if framework == 'scipy' and is_sparse:
from docarray.math.distance.numpy import sparse_sqeuclidean
dists = sparse_sqeuclidean(x_mat, y_mat)
elif framework == 'numpy':
from docarray.math.distance.numpy import sqeuclidean
dists = sqeuclidean(x_mat, y_mat)
elif framework == 'tensorflow':
from docarray.math.distance.tensorflow import sqeuclidean
dists = sqeuclidean(x_mat, y_mat, device=device)
elif framework == 'torch':
from docarray.math.distance.torch import sqeuclidean
dists = sqeuclidean(x_mat, y_mat, device=device)
elif framework == 'paddle':
from docarray.math.distance.paddle import sqeuclidean
dists = sqeuclidean(x_mat, y_mat, device=device)
elif metric == 'euclidean':
if framework == 'scipy' and is_sparse:
from docarray.math.distance.numpy import sparse_euclidean
dists = sparse_euclidean(x_mat, y_mat)
elif framework == 'numpy':
from docarray.math.distance.numpy import euclidean
dists = euclidean(x_mat, y_mat)
elif framework == 'tensorflow':
from docarray.math.distance.tensorflow import euclidean
dists = euclidean(x_mat, y_mat, device=device)
elif framework == 'torch':
from docarray.math.distance.torch import euclidean
dists = euclidean(x_mat, y_mat, device=device)
elif framework == 'paddle':
from docarray.math.distance.paddle import euclidean
dists = euclidean(x_mat, y_mat, device=device)
else:
raise NotImplementedError(f'metric `{metric}` is not supported')
if dists is None:
raise NotImplementedError(
f'{framework} sparse={is_sparse} array is not supported'
)
return dists