Source code for hypercluster.additional_metrics

from typing import Iterable, Optional
from collections import Counter
from pandas import DataFrame
from scipy.cluster.hierarchy import linkage, cophenet
from scipy.spatial.distance import pdist

__doc__ = (
    "More functions for evaluating clustering results. Additional metric evaluations can "
    "be added here, as long as the second argument is the labels to evaluate"
)


[docs]def number_clustered(_, labels: Iterable) -> float: """Returns the number of clustered samples. Args: _: Dummy, pass anything or None. labels (Iterable): Vector of sample labels. Returns (int): The number of clustered labels. """ return (labels != -1).sum()
[docs]def smallest_largest_clusters_ratio(_, labels: Iterable) -> float: """Number in the smallest cluster over the number in the largest cluster. Args: _: Dummy, pass anything or None. labels (Iterable): Vector of sample labels. Returns (float): Ratio of number of members in smallest over largest cluster. """ counts = Counter(labels) counts.pop(-1, None) return min(counts.values()) / max(counts.values())
[docs]def smallest_cluster_ratio(_, labels: Iterable) -> float: """Number in the smallest cluster over the total samples. Args: _: Dummy, pass anything or None. labels (Iterable): Vector of sample labels. Returns (float): Ratio of number of members in smallest over all samples. """ counts = Counter(labels) counts.pop(-1, None) return min(counts.values()) / len(labels)
[docs]def number_of_clusters(_, labels: Iterable) -> float: """Number of total clusters. Args: _: Dummy, pass anything or None labels (Iterable): Vector of sample labels. Returns (int): Number of clusters. """ return len(Counter(labels))
[docs]def smallest_cluster_size(_, labels: Iterable) -> float: """Number in smallest cluster Args: _: Dummy, pass anything or None labels (Iterable): Vector of sample labels. Returns (int): Number of samples in smallest cluster. """ return min(Counter(labels).values())
[docs]def largest_cluster_size(_, labels: Iterable) -> float: """Number in largest cluster Args: _: Dummy, pass anything or None labels (Iterable): Vector of sample labels. Returns (int): Number of samples in largest cluster. """ return max(Counter(labels).values())