Source code for gower_metric.utils.ranges

import numpy as np


[docs] def scale_method(valid: np.ndarray, method: str) -> float: """ Compute the scaling span for a 1D array of valid (non-NaN) values. Args: valid (np.ndarray): 1D array of floats (no NaNs). method (str): 'range' or 'iqr'. Returns: float: - span: max-min for 'range', Q3-Q1 for 'iqr', or 0.0 if constant/empty. Raises: ValueError: if method is unknown. """ if valid.size == 0: return 0.0 if method == "range": span = valid.max() - valid.min() elif method == "iqr": q75, q25 = np.percentile(valid, [75, 25]) span = q75 - q25 else: raise ValueError(f"Unknown method '{method}' for scaling.") return span if span > 0 else 0.0
[docs] def get_numeric_ranges( X: np.ndarray, indices: list[int], method: str = "range", ) -> np.ndarray: """ Compute the range for each numeric column in X based on selected scale method. Applied only to ratio-scale and internal-scale data types. Args: X (float): array of shape (n_samples, n_features). indices (list[int]): list of column indices to treat as numeric. method (str): method for scaling, either 'range' or 'iqr'. Returns: np.ndarray: - 1D array of length len(indices), where each entry is max(X[:, idx]) - min(X[:, idx]). For now we ignore NaNs. If all values are NaN or constant, range is set to 0.0. """ ranges = np.empty(len(indices), dtype=float) for pos, j in enumerate(indices): col = X[:, j].astype(float) valid = col[~np.isnan(col)] ranges[pos] = scale_method(valid, method) return ranges