from typing import Tuple, Optional
import numpy as np
[docs]def minmax_normalize(
x: 'np.ndarray',
t_range: Tuple = (0, 1),
x_range: Optional[Tuple] = None,
eps: float = 1e-7,
):
"""Normalize values in `x` into `t_range`.
`x` can be a 1D array or a 2D array. When `x` is a 2D array, then normalization is row-based.
.. note::
- with `t_range=(0, 1)` will normalize the min-value of the data to 0, max to 1;
- with `t_range=(1, 0)` will normalize the min-value of the data to 1, max value of the data to 0.
:param x: the data to be normalized
:param t_range: a tuple represents the target range.
:param x_range: a tuple represents x range.
:param eps: a small jitter to avoid divde by zero
:return: normalized data in `t_range`
"""
a, b = t_range
if isinstance(x, np.ndarray):
min_d = x_range[0] if x_range else np.min(x, axis=-1, keepdims=True)
max_d = x_range[1] if x_range else np.max(x, axis=-1, keepdims=True)
r = (b - a) * (x - min_d) / (max_d - min_d + eps) + a
else:
min_d = x_range[0] if x_range else x.min(axis=-1).toarray()
max_d = x_range[1] if x_range else x.max(axis=-1).toarray()
r = (b - a) * (x - min_d) / (max_d - min_d + eps) + a
return np.clip(r, *((a, b) if a < b else (b, a)))
[docs]def top_k(
values: 'np.ndarray', k: int, descending: bool = False
) -> Tuple['np.ndarray', 'np.ndarray']:
"""Finds values and indices of the k largest entries for the last dimension.
:param values: array of distances
:param k: number of values to retrieve
:param descending: find top k biggest values
:return: indices and distances
"""
if descending:
values = -values
if k >= values.shape[1]:
idx = values.argsort(axis=1)[:, :k]
values = np.take_along_axis(values, idx, axis=1)
else:
idx_ps = values.argpartition(kth=k, axis=1)[:, :k]
values = np.take_along_axis(values, idx_ps, axis=1)
idx_fs = values.argsort(axis=1)
idx = np.take_along_axis(idx_ps, idx_fs, axis=1)
values = np.take_along_axis(values, idx_fs, axis=1)
if descending:
values = -values
return values, idx
[docs]def update_rows_x_mat_best(
x_mat_best: 'np.ndarray',
x_inds_best: 'np.ndarray',
x_mat: 'np.ndarray',
x_inds: 'np.ndarray',
k: int,
):
"""
Updates `x_mat_best` and `x_inds_best` rows with the k best values and indices (per row) from `x_mat` union `x_mat_best`.
:param x_mat: numpy array of the first matrix
:param x_inds: numpy array of the indices of the first matrix
:param x_mat_best: numpy array of the second matrix
:param x_inds_best: numpy array of the indices of the second matrix
:param k: number of values to retrieve
:return: indices and distances
"""
all_dists = np.hstack((x_mat, x_mat_best))
all_inds = np.hstack((x_inds, x_inds_best))
best_inds = np.argpartition(all_dists, kth=k, axis=1)
x_mat_best = np.take_along_axis(all_dists, best_inds, axis=1)[:, :k]
x_inds_best = np.take_along_axis(all_inds, best_inds, axis=1)[:, :k]
return x_mat_best, x_inds_best
EPSILON = 1.0e-9