Source code for gower_metric.utils.cat_ord_ut
import math
from collections import Counter
from collections.abc import Sequence
from typing import Any
import numpy as np
[docs]
def map_ordered_values(
ordered_values: Sequence[Any] | np.ndarray,
) -> tuple[dict[Any, int], int | None, int | None]:
"""
Map consequtive integers to passed ordered values.
Args:
ordered_values (Sequence[Any] | np.ndarray): A defined sequence of categorical values.
Returns:
tuple[dict[Any, int], int | None, int | None]:
- ranks_mapping: A dictionary mapping each unique value to its rank.
- min_rank: The minimum rank (or None if no categories).
- max_rank: The maximum rank (or None if no categories).
"""
ranks_mapping = {value: rank for rank, value in enumerate(ordered_values)}
min_rank: int | None = 0
max_rank: int | None = len(ordered_values) - 1
return ranks_mapping, min_rank, max_rank
[docs]
def get_cardinalities_mapping(
column: Sequence[Any] | np.ndarray,
) -> tuple[dict[Any, int], list[int]]:
"""
Count occurrences of each category value in an ordinal column.
Args:
column (Sequence[Any] | np.ndarray): A sequence of ordinal values (may include NaN).
NaN values are ignored in counting.
Returns:
tuple[dict[Any, int], list[int]]:
- counts_map: Mapping from each unique category value (excluding NaN) to its count.
- counts_list: List of counts corresponding to each category value, ordered by sorted category values.
"""
cleaned = [v for v in column if not (isinstance(v, float) and math.isnan(v))]
counts_map: dict[Any, int] = Counter(cleaned)
unique_vals = sorted(counts_map.keys())
counts_list = [counts_map[val] for val in unique_vals]
return counts_map, counts_list
[docs]
def collect_ordinal_cardinalities(data: np.ndarray) -> list[np.ndarray]:
"""
Process a 2D array of ordinal columns to get counts per level for each column.
Args:
data (np.ndarray): Two-dimensional array with shape (n_samples, n_ordinal_columns).
Each column may contain NaN and ordinal categorical values.
Returns:
list[np.ndarray]):
- ordinals_cardinality:
A list where each element is a 1D NumPy array of integer counts. Counts[i] is the number of occurrences of the i-th sorted category in that column.
"""
ordinals_cardinality: list[np.ndarray] = []
for i in range(data.shape[1]):
column = data[:, i]
_, counts_list = get_cardinalities_mapping(column)
ordinals_cardinality.append(np.array(counts_list, dtype=int))
return ordinals_cardinality