Source code for optuna.visualization._edf

from typing import Callable
from typing import cast
from typing import List
from typing import NamedTuple
from typing import Optional
from typing import Sequence
from typing import Union

import numpy as np

from optuna.logging import get_logger
from optuna.study import Study
from optuna.trial import FrozenTrial
from optuna.trial import TrialState
from optuna.visualization._plotly_imports import _imports
from optuna.visualization._utils import _check_plot_args
from optuna.visualization._utils import _filter_nonfinite


if _imports.is_successful():
    from optuna.visualization._plotly_imports import go

_logger = get_logger(__name__)


NUM_SAMPLES_X_AXIS = 100


class _EDFLineInfo(NamedTuple):
    study_name: str
    y_values: np.ndarray


class _EDFInfo(NamedTuple):
    lines: List[_EDFLineInfo]
    x_values: np.ndarray


[docs]def plot_edf(
    study: Union[Study, Sequence[Study]],
    *,
    target: Optional[Callable[[FrozenTrial], float]] = None,
    target_name: str = "Objective Value",
) -> "go.Figure":
    """Plot the objective value EDF (empirical distribution function) of a study.

    Note that only the complete trials are considered when plotting the EDF.

    .. note::

        EDF is useful to analyze and improve search spaces.
        For instance, you can see a practical use case of EDF in the paper
        `Designing Network Design Spaces <https://arxiv.org/abs/2003.13678>`_.

    .. note::

        The plotted EDF assumes that the value of the objective function is in
        accordance with the uniform distribution over the objective space.

    Example:

        The following code snippet shows how to plot EDF.

        .. plotly::

            import math

            import optuna


            def ackley(x, y):
                a = 20 * math.exp(-0.2 * math.sqrt(0.5 * (x ** 2 + y ** 2)))
                b = math.exp(0.5 * (math.cos(2 * math.pi * x) + math.cos(2 * math.pi * y)))
                return -a - b + math.e + 20


            def objective(trial, low, high):
                x = trial.suggest_float("x", low, high)
                y = trial.suggest_float("y", low, high)
                return ackley(x, y)


            sampler = optuna.samplers.RandomSampler(seed=10)

            # Widest search space.
            study0 = optuna.create_study(study_name="x=[0,5), y=[0,5)", sampler=sampler)
            study0.optimize(lambda t: objective(t, 0, 5), n_trials=500)

            # Narrower search space.
            study1 = optuna.create_study(study_name="x=[0,4), y=[0,4)", sampler=sampler)
            study1.optimize(lambda t: objective(t, 0, 4), n_trials=500)

            # Narrowest search space but it doesn't include the global optimum point.
            study2 = optuna.create_study(study_name="x=[1,3), y=[1,3)", sampler=sampler)
            study2.optimize(lambda t: objective(t, 1, 3), n_trials=500)

            fig = optuna.visualization.plot_edf([study0, study1, study2])
            fig.show()

    Args:
        study:
            A target :class:`~optuna.study.Study` object.
            You can pass multiple studies if you want to compare those EDFs.
        target:
            A function to specify the value to display. If it is :obj:`None` and ``study`` is being
            used for single-objective optimization, the objective values are plotted.

            .. note::
                Specify this argument if ``study`` is being used for multi-objective optimization.
        target_name:
            Target's name to display on the axis label.

    Returns:
        A :class:`plotly.graph_objs.Figure` object.
    """

    _imports.check()

    layout = go.Layout(
        title="Empirical Distribution Function Plot",
        xaxis={"title": target_name},
        yaxis={"title": "Cumulative Probability"},
    )

    info = _get_edf_info(study, target, target_name)
    edf_lines = info.lines

    if len(edf_lines) == 0:
        return go.Figure(data=[], layout=layout)

    traces = []
    for study_name, y_values in edf_lines:
        traces.append(go.Scatter(x=info.x_values, y=y_values, name=study_name, mode="lines"))

    figure = go.Figure(data=traces, layout=layout)
    figure.update_yaxes(range=[0, 1])

    return figure


def _get_edf_info(
    study: Union[Study, Sequence[Study]],
    target: Optional[Callable[[FrozenTrial], float]] = None,
    target_name: str = "Objective Value",
) -> _EDFInfo:

    if isinstance(study, Study):
        studies = [study]
    else:
        studies = list(study)

    _check_plot_args(studies, target, target_name)

    if len(studies) == 0:
        _logger.warning("There are no studies.")
        return _EDFInfo(lines=[], x_values=np.array([]))

    if target is None:

        def _target(t: FrozenTrial) -> float:
            return cast(float, t.value)

        target = _target

    study_names = []
    all_values: List[np.ndarray] = []
    for study in studies:
        trials = _filter_nonfinite(
            study.get_trials(deepcopy=False, states=(TrialState.COMPLETE,)), target=target
        )

        values = np.array([target(trial) for trial in trials])
        all_values.append(values)
        study_names.append(study.study_name)

    if all(len(values) == 0 for values in all_values):
        _logger.warning("There are no complete trials.")
        return _EDFInfo(lines=[], x_values=np.array([]))

    min_x_value = np.min(np.concatenate(all_values))
    max_x_value = np.max(np.concatenate(all_values))
    x_values = np.linspace(min_x_value, max_x_value, NUM_SAMPLES_X_AXIS)

    edf_line_info_list = []
    for (study_name, values) in zip(study_names, all_values):
        y_values = np.sum(values[:, np.newaxis] <= x_values, axis=0) / values.size
        edf_line_info_list.append(_EDFLineInfo(study_name=study_name, y_values=y_values))

    return _EDFInfo(lines=edf_line_info_list, x_values=x_values)