Source code for optuna.samplers._grid

import collections
import itertools
import random

from optuna.logging import get_logger
from optuna.samplers import BaseSampler
from optuna import type_checking

if type_checking.TYPE_CHECKING:
    from typing import Any  # NOQA
    from typing import Dict  # NOQA
    from typing import List  # NOQA
    from typing import Mapping  # NOQA
    from typing import Sequence  # NOQA
    from typing import Union

    from optuna.distributions import BaseDistribution  # NOQA
    from optuna.study import Study  # NOQA
    from optuna.trial import FrozenTrial  # NOQA

    GridValueType = Union[str, float, int, bool, None]


_logger = get_logger(__name__)


[docs]class GridSampler(BaseSampler): """Sampler using grid search. With :class:`~optuna.samplers.GridSampler`, the trials suggest all combinations of parameters in the given search space during the study. Example: .. testcode:: import optuna def objective(trial): x = trial.suggest_uniform('x', -100, 100) y = trial.suggest_int('y', -100, 100) return x ** 2 + y ** 2 search_space = { 'x': [-50, 0, 50], 'y': [-99, 0, 99] } study = optuna.create_study(sampler=optuna.samplers.GridSampler(search_space)) study.optimize(objective, n_trials=3*3) Note: :class:`~optuna.samplers.GridSampler` automatically stops the optimization if all combinations in the passed ``search_space`` have already been evaluated, internally invoking the :func:`~optuna.study.Study.stop` method. Note: :class:`~optuna.samplers.GridSampler` does not take care of a parameter's quantization specified by discrete suggest methods but just samples one of values specified in the search space. E.g., in the following code snippet, either of ``-0.5`` or ``0.5`` is sampled as ``x`` instead of an integer point. .. testcode:: import optuna def objective(trial): # The following suggest method specifies integer points between -5 and 5. x = trial.suggest_discrete_uniform('x', -5, 5, 1) return x ** 2 # Non-int points are specified in the grid. search_space = {'x': [-0.5, 0.5]} study = optuna.create_study(sampler=optuna.samplers.GridSampler(search_space)) study.optimize(objective, n_trials=2) Args: search_space: A dictionary whose key and value are a parameter name and the corresponding candidates of values, respectively. """
[docs] def __init__(self, search_space): # type: (Mapping[str, Sequence[GridValueType]]) -> None for param_name, param_values in search_space.items(): for value in param_values: self._check_value(param_name, value) self._search_space = collections.OrderedDict() for param_name, param_values in sorted(search_space.items(), key=lambda x: x[0]): self._search_space[param_name] = sorted(param_values) self._all_grids = list(itertools.product(*self._search_space.values())) self._param_names = sorted(search_space.keys()) self._n_min_trials = len(self._all_grids)
[docs] def infer_relative_search_space(self, study, trial): # type: (Study, FrozenTrial) -> Dict[str, BaseDistribution] return {}
[docs] def sample_relative(self, study, trial, search_space): # type: (Study, FrozenTrial, Dict[str, BaseDistribution]) -> Dict[str, Any] # Instead of returning param values, GridSampler puts the target grid id as a system attr, # and the values are returned from `sample_independent`. This is because the distribution # object is hard to get at the beginning of trial, while we need the access to the object # to validate the sampled value. target_grids = self._get_unvisited_grid_ids(study) if len(target_grids) == 0: # This case may occur with distributed optimization or trial queue. If there is no # target grid, `GridSampler` evaluates a visited, duplicated point with the current # trial. After that, the optimization stops. _logger.warning( "`GridSampler` is re-evaluating a configuration because the grid has been " "exhausted. This may happen due to a timing issue during distributed optimization " "or when re-running optimizations on already finished studies." ) # One of all grids is randomly picked up in this case. target_grids = list(range(len(self._all_grids))) study.stop() elif len(target_grids) == 1: # When there is only one target grid, optimization stops after the current trial # finishes. study.stop() # In distributed optimization, multiple workers may simultaneously pick up the same grid. # To make the conflict less frequent, the grid is chosen randomly. grid_id = random.choice(target_grids) study._storage.set_trial_system_attr(trial._trial_id, "search_space", self._search_space) study._storage.set_trial_system_attr(trial._trial_id, "grid_id", grid_id) return {}
[docs] def sample_independent(self, study, trial, param_name, param_distribution): # type: (Study, FrozenTrial, str, BaseDistribution) -> Any if param_name not in self._search_space: message = "The parameter name, {}, is not found in the given grid.".format(param_name) raise ValueError(message) # TODO(c-bata): Reduce the number of duplicated evaluations on multiple workers. # Current selection logic may evaluate the same parameters multiple times. # See https://gist.github.com/c-bata/f759f64becb24eea2040f4b2e3afce8f for details. grid_id = trial.system_attrs["grid_id"] param_value = self._all_grids[grid_id][self._param_names.index(param_name)] contains = param_distribution._contains(param_distribution.to_internal_repr(param_value)) if not contains: raise ValueError( "The value `{}` is out of range of the parameter `{}`. Please make " "sure the search space of the `GridSampler` only contains values " "consistent with the distribution specified in the objective " "function. The distribution is: `{}`.".format( param_value, param_name, param_distribution ) ) return param_value
@staticmethod def _check_value(param_name, param_value): # type: (str, Any) -> None if param_value is None or isinstance(param_value, (str, int, float, bool)): return raise ValueError( "{} contains a value with the type of {}, which is not supported by " "`GridSampler`. Please make sure a value is `str`, `int`, `float`, `bool`" " or `None`.".format(param_name, type(param_value)) ) def _get_unvisited_grid_ids(self, study): # type: (Study) -> List[int] # List up unvisited grids based on already finished ones. visited_grids = [] for t in study.trials: if ( t.state.is_finished() and "grid_id" in t.system_attrs and self._same_search_space(t.system_attrs["search_space"]) ): visited_grids.append(t.system_attrs["grid_id"]) unvisited_grids = set(range(self._n_min_trials)) - set(visited_grids) return list(unvisited_grids) def _same_search_space(self, search_space): # type: (Mapping[str, Sequence[GridValueType]]) -> bool if set(search_space.keys()) != set(self._search_space.keys()): return False for param_name in search_space.keys(): if len(search_space[param_name]) != len(self._search_space[param_name]): return False for i, param_value in enumerate(sorted(search_space[param_name])): if param_value != self._search_space[param_name][i]: return False return True