from typing import TYPE_CHECKING
import numpy as np
if TYPE_CHECKING: # pragma: no cover
from docarray.typing import ArrayType
[docs]def cosine(x_mat: 'np.ndarray', y_mat: 'np.ndarray', eps: float = 1e-7) -> 'np.ndarray':
"""Cosine distance between each row in x_mat and each row in y_mat.
:param x_mat: np.ndarray with ndim=2
:param y_mat: np.ndarray with ndim=2
:param eps: a small jitter to avoid divde by zero
:return: np.ndarray with ndim=2
"""
return 1 - np.clip(
(np.dot(x_mat, y_mat.T) + eps)
/ (
np.outer(np.linalg.norm(x_mat, axis=1), np.linalg.norm(y_mat, axis=1)) + eps
),
-1,
1,
)
[docs]def sqeuclidean(x_mat: 'np.ndarray', y_mat: 'np.ndarray') -> 'np.ndarray':
"""Squared Euclidean distance between each row in x_mat and each row in y_mat.
:param x_mat: np.ndarray with ndim=2
:param y_mat: np.ndarray with ndim=2
:return: np.ndarray with ndim=2
"""
return (
np.sum(y_mat**2, axis=1)
+ np.sum(x_mat**2, axis=1)[:, np.newaxis]
- 2 * np.dot(x_mat, y_mat.T)
)
[docs]def sparse_cosine(x_mat: 'ArrayType', y_mat: 'ArrayType') -> 'np.ndarray':
"""Cosine distance between each row in x_mat and each row in y_mat.
:param x_mat: scipy.sparse like array with ndim=2
:param y_mat: scipy.sparse like array with ndim=2
:return: np.ndarray with ndim=2
"""
from scipy.sparse.linalg import norm
# we need the np.asarray otherwise we get a np.matrix object that iterates differently
return 1 - np.clip(
np.asarray(
x_mat.dot(y_mat.T) / (np.outer(norm(x_mat, axis=1), norm(y_mat, axis=1)))
),
-1,
1,
)
[docs]def sparse_sqeuclidean(x_mat: 'ArrayType', y_mat: 'ArrayType') -> 'np.ndarray':
"""Cosine distance between each row in x_mat and each row in y_mat.
:param x_mat: scipy.sparse like array with ndim=2
:param y_mat: scipy.sparse like array with ndim=2
:return: np.ndarray with ndim=2
"""
# we need the np.asarray otherwise we get a np.matrix object that iterates differently
return np.asarray(
y_mat.power(2).sum(axis=1).flatten()
+ x_mat.power(2).sum(axis=1)
- 2 * x_mat.dot(y_mat.T)
)
[docs]def sparse_euclidean(x_mat: 'ArrayType', y_mat: 'ArrayType') -> 'np.ndarray':
"""Sparse euclidean distance between each row in x_mat and each row in y_mat.
:param x_mat: scipy.sparse like array with ndim=2
:param y_mat: scipy.sparse like array with ndim=2
:return: np.ndarray with ndim=2
"""
return np.sqrt(sparse_sqeuclidean(x_mat, y_mat))
[docs]def euclidean(x_mat: 'ArrayType', y_mat: 'ArrayType') -> 'np.ndarray':
"""Euclidean distance between each row in x_mat and each row in y_mat.
:param x_mat: scipy.sparse like array with ndim=2
:param y_mat: scipy.sparse like array with ndim=2
:return: np.ndarray with ndim=2
"""
return np.sqrt(sqeuclidean(x_mat, y_mat))