Source code for py_research.ranking

"""Functions for ranking entities by multiple criteria."""

from typing import Literal

import numpy as np
import pandas as pd

from py_research.intl import get_localization

RankMode = Literal["ascending", "descending"]


[docs] def create_rank_series( rank_by: pd.Series | list[pd.Series] | pd.DataFrame, rank_mode: RankMode = "descending", exclude: pd.Series | None = None, name: str = "rank", ) -> pd.Series: """Create a series of ranks given a series of values to rank by. Args: rank_by: Series of values to rank by. rank_mode: Whether to rank ascending or descending. exclude: Values to exclude from ranking. name: Name of the resulting series. Returns: Series of ranks. """ data_table = pd.DataFrame(rank_by) filtered_data = data_table.loc[~exclude] if exclude is not None else data_table ranks = pd.Series( np.arange(1, len(filtered_data) + 1), index=filtered_data.sort_values( by=list(filtered_data.columns), ascending=(rank_mode == "ascending") ).index, name=name, ) if ranks.empty: return ranks rank_by_cols = list(filtered_data.columns) for _, g in filtered_data.groupby( by=(rank_by_cols[0] if len(rank_by_cols) == 1 else rank_by_cols) ): ranks.loc[g.index] = min(ranks.loc[g.index]) return data_table.join(ranks, how="left")[name]
[docs] def create_ranking_filter( rank_by: pd.Series, cutoff_rank: int, sort_order: Literal["ascending", "descending"] = "descending", rank_only: pd.Series | None = None, show_only: pd.Series | None = None, show_always: pd.Series | None = None, count_always_shown: bool = False, ) -> pd.Series: """Create a filter based on ranking. Args: rank_by: Series of values to rank by. cutoff_rank: Cutoff rank for the filter. sort_order: Whether to rank ascending or descending. rank_only: Only rank the given values. show_only: Only show the given values. show_always: Always show the given values. count_always_shown: Whether to count the values in show_always as ranks. Returns: Boolean series representing the filter. The series name contains a description of the filter. """ loc = get_localization() def _filter_explanation( cutoff: int, rank_by: str, sort_order: str, rank_only: str | None = None, show_only: str | None = None, show_always: str | None = None, ) -> str: return "; ".join( s for s in [ ( f"{cutoff} highest-ranked" + ( f" {rank_only}" if rank_only is not None and len(rank_only) <= 20 else "" ) + f" according to '{rank_by}'" + f" ({sort_order})" + ( f", only ranking {rank_only}" if rank_only is not None and len(rank_only) > 20 else "" ) ), (f"only showing {show_only}" if show_only is not None else ""), (f"always showing {show_always}" if show_always is not None else ""), ] if s ) desc = loc.text( "filter_explanation", _filter_explanation( cutoff_rank, str(rank_by.name), sort_order, rank_only=( str(rank_only.name) if rank_only is not None and rank_only.name is not None else None ), show_only=( str(show_only.name) if show_only is not None and show_only.name is not None else None ), show_always=( str(show_always.name) if show_always is not None and show_always.name is not None else None ), ), ) rank_by_filter = np.full(len(rank_by), True) if rank_only is not None: # Apply a custom filter, if given rank_by_filter &= rank_only rank_series = create_rank_series( rank_by.loc[rank_by_filter], sort_order, exclude=(show_always if not count_always_shown else None), ) rank_by_filter &= rank_series <= cutoff_rank if show_always is not None: # Apply a custom filter, if given rank_by_filter |= show_always if show_only is not None: # Apply a custom filter, if given rank_by_filter &= show_only return pd.Series(rank_by_filter, index=rank_by.index, name=desc)