Source code for rics.performance._util

from typing import Any, Literal, Tuple, Union

import pandas as pd

from ._multi_case_timer import ResultsDict


[docs]def to_dataframe(run_results: ResultsDict) -> pd.DataFrame:
    """Create a DataFrame from performance run output, adding derived values.

    Args:
        run_results: Output from :meth:`rics.performance.MultiCaseTimer.run`.

    Returns:
        The `run_result` input wrapped in a DataFrame.
    """
    ans = []
    for candidate_label, candidate_results in run_results.items():
        for data_label, data_results in candidate_results.items():
            ans.append(
                pd.DataFrame(
                    {
                        "Candidate": candidate_label,
                        "Test data": data_label,
                        "Run no": range(len(data_results)),
                        "Time [s]": data_results,
                    }
                )
            )

    df = pd.concat(ans, ignore_index=True)
    df["Time [ms]"] = df["Time [s]"] * 1000
    df["Time [μs]"] = df["Time [ms]"] * 1000

    groupby = df.groupby("Test data")["Time [s]"]
    df["Times min"] = df["Time [s]"] / df["Test data"].map(groupby.min())
    df["Times mean"] = df["Time [s]"] / df["Test data"].map(groupby.mean())

    return df


[docs]def get_best(run_results: Union[ResultsDict, pd.DataFrame], per_candidate: bool = False) -> pd.DataFrame:
    """Get a summarized view of the best run results for each candidate/data pair.

    Args:
        run_results: Output of :meth:`rics.performance.MultiCaseTimer.run`.
        per_candidate: If ``True``, show the best times for all candidate/data pairs. Otherwise, just show the best
            candidate per data label.

    Returns:
        The best (lowest) times for each candidate/data pair.
    """
    df = run_results if isinstance(run_results, pd.DataFrame) else to_dataframe(run_results)
    return df.sort_values("Time [s]").groupby(["Candidate", "Test data"] if per_candidate else "Test data").head(1)


[docs]def plot_run(
    run_results: Union[ResultsDict, pd.DataFrame],
    x: Literal["candidate", "data"] = None,
    unit: Literal["s", "ms", "μs", "us"] = "ms",
    **figure_kwargs: Any,
) -> None:
    """Plot the results of a performance test.

    .. figure:: ../_images/perf_plot.png

       Comparison of ``time.sleep(t)`` and ``time.sleep(5*t)``.

    Args:
        run_results: Output of :meth:`rics.performance.MultiCaseTimer.run`.
        x: The value to plot on the X-axis. Default=derive.
        unit: Time unit to plot on the Y-axis.
        **figure_kwargs: Keyword arguments for the barplot.

    Raises:
        ModuleNotFoundError: If Seaborn isn't installed.
        TypeError: For unknown `unit` arguments.
    """
    import matplotlib.pyplot as plt
    from seaborn import barplot, move_legend

    data = to_dataframe(run_results) if isinstance(run_results, dict) else run_results.copy()
    data[["Test data", "Candidate"]] = data[["Test data", "Candidate"]].astype("category")

    x_arg, hue = (
        _smaller_as_hue(data)
        if x is None
        else (("Test data", "Candidate") if x == "data" else ("Candidate", "Test data"))
    )
    y = f"Time [{unit.replace('us', 'μs')}]"
    if y not in data:
        # Unit is not one of the literals, but we still check 'data' in case someone added more units themselves.
        raise TypeError(f"Bad {unit=}; column '{y}' not present in data.")

    fig, (left, right) = plt.subplots(
        ncols=2, tight_layout=True, figsize=(8 + 4 * data.Candidate.nunique(), 7), sharey=True
    )
    left.set_title("Average")
    right.set_title("Best")
    fig.suptitle("Performance", size=24)

    barplot(ax=left, data=data, x=x_arg, y=y, hue=hue, errorbar="sd", **figure_kwargs)
    best = data.groupby(["Test data", "Candidate"]).min().reset_index()
    barplot(ax=right, data=best, x=x_arg, y=y, hue=hue, errorbar=None, **figure_kwargs)

    move_legend(right, "upper left", bbox_to_anchor=(1, 1))
    left.get_legend().remove()


def _smaller_as_hue(data: pd.DataFrame) -> Tuple[str, str]:
    unique = data.nunique()
    return ("Test data", "Candidate") if unique["Test data"] < unique["Candidate"] else ("Candidate", "Test data")