# Source code for optuna.samplers._qmc

```
import sys
from typing import Any
from typing import Dict
from typing import Optional
from typing import Sequence
import numpy as np
import optuna
from optuna import logging
from optuna._experimental import experimental
from optuna._imports import _LazyImport
from optuna._transform import _SearchSpaceTransform
from optuna.distributions import BaseDistribution
from optuna.distributions import CategoricalDistribution
from optuna.samplers import BaseSampler
from optuna.study import Study
from optuna.trial import FrozenTrial
from optuna.trial import TrialState
_logger = logging.get_logger(__name__)
_SUGGESTED_STATES = (TrialState.COMPLETE, TrialState.PRUNED)
[docs]@experimental("3.0.0")
class QMCSampler(BaseSampler):
"""A Quasi Monte Carlo Sampler that generates low-discrepancy sequences.
Quasi Monte Carlo (QMC) sequences are designed to have lower discrepancies than
standard random seqeunces. They are known to perform better than the standard
randam sequences in hyperparameter optimization.
For further information about the use of QMC sequences for hyperparameter optimization,
please refer to the following paper:
- `Bergstra, James, and Yoshua Bengio. Random search for hyper-parameter optimization.
Journal of machine learning research 13.2, 2012.
<https://jmlr.org/papers/v13/bergstra12a.html>`_
We use the QMC implementations in Scipy. For the details of the QMC algorithm,
see the Scipy API references on `scipy.stats.qmc
<https://scipy.github.io/devdocs/reference/stats.qmc.html>`_.
.. note:
If your search space contains categorical parameters, it samples the catagorical
parameters by its `independent_sampler` without using QMC algorithm.
.. note::
The search space of the sampler is determined by either previous trials in the study or
the first trial that this sampler samples.
If there are previous trials in the study, :class:`~optuna.samplers.QMCSamper` infers its
search space using the trial which was created first in the study.
Otherwise (if the study has no previous trials), :class:`~optuna.samplers.QMCSampler`
samples the first trial using its `independent_sampler` and then infers the search space
in the second trial.
As mentioned above, the search space of the :class:`~optuna.sampler.QMCSampler` is
determined by the first trial of the study. Once the search space is determined, it cannot
be changed afterwards.
.. note:
`QMCSampler` is not supported for Python 3.6 as it depends on `scipy.stat.qmc` module which
only supports Python 3.7 or the later versions.
Args:
qmc_type:
The type of QMC sequence to be sampled. This must be one of
`"halton"` and `"sobol"`. Default is `"sobol"`.
.. note::
Sobol' sequence is designed to have low-discrepancy property when the number of
samples is :math:`n=2^m` for each positive integer :math:`m`. When it is possible
to pre-specify the number of trials suggested by `QMCSampler`, it is recommended
that the number of trials should be set as power of two.
scramble:
If this option is :obj:`True`, scrambling (randomization) is applied to the QMC
sequences.
seed:
A seed for `QMCSampler`. This argument is used only when `scramble` is :obj:`True`.
If this is :obj:`None`, the seed is initialized randomly. Default is :obj:`None`.
.. note::
When using multiple :class:`~optuna.samplers.QMCSampler`'s in parallel and/or
distributed optimization, all the samplers must share the same seed when the
`scrambling` is enabled. Otherwise, the low-discrepancy property of the samples
will be degraded.
independent_sampler:
A :class:`~optuna.samplers.BaseSampler` instance that is used for independent
sampling. The first trial of the study and the parameters not contained in the
relative search space are sampled by this sampler.
If :obj:`None` is specified, :class:`~optuna.samplers.RandomSampler` is used
as the default.
.. seealso::
:class:`~optuna.samplers` module provides built-in independent samplers
such as :class:`~optuna.samplers.RandomSampler` and
:class:`~optuna.samplers.TPESampler`.
warn_independent_sampling:
If this is :obj:`True`, a warning message is emitted when
the value of a parameter is sampled by using an independent sampler.
Note that the parameters of the first trial in a study are sampled via an
independent sampler in most cases, so no warning messages are emitted in such cases.
warn_asyncronous_seeding:
If this is :obj:`True`, a warning message is emitted when the scrambling
(randomization) is applied to the QMC sequence and the random seed of the sampler is
not set manually.
.. note::
When using parallel and/or distributed optimization without manually
setting the seed, the seed is set randomly for each instances of
:class:`~optuna.samplers.QMCSampler` for different workers, which ends up
asyncronous seeding for multiple samplers used in the optimization.
.. seealso::
See parameter ``seed`` in :class:`~optuna.samplers.QMCSampler`.
Example:
Optimize a simple quadratic function by using :class:`~optuna.samplers.QMCSampler`.
.. testcode::
import optuna
def objective(trial):
x = trial.suggest_float("x", -1, 1)
y = trial.suggest_int("y", -1, 1)
return x**2 + y
sampler = optuna.samplers.QMCSampler()
study = optuna.create_study(sampler=sampler)
study.optimize(objective, n_trials=8)
"""
def __init__(
self,
*,
qmc_type: str = "sobol",
scramble: bool = False, # default is False for simplicity in distributed environment.
seed: Optional[int] = None,
independent_sampler: Optional[BaseSampler] = None,
warn_asyncronous_seeding: bool = True,
warn_independent_sampling: bool = True,
) -> None:
version = sys.version_info
if version < (3, 7, 0):
version_txt = str(version[0]) + "." + str(version[1]) + "." + str(version[2])
message = (
f"`QMCSampler` is not supported for Python {version_txt}. "
"Consider using Python 3.7 or later."
)
raise ValueError(message)
self._scramble = scramble
self._seed = seed or np.random.PCG64().random_raw()
self._independent_sampler = independent_sampler or optuna.samplers.RandomSampler(seed=seed)
self._initial_search_space: Optional[Dict[str, BaseDistribution]] = None
self._warn_independent_sampling = warn_independent_sampling
if qmc_type in ("halton", "sobol"):
self._qmc_type = qmc_type
else:
message = (
f'The `qmc_type`, "{qmc_type}", is not a valid. '
'It must be one of "halton" and "sobol".'
)
raise ValueError(message)
if seed is None and scramble and warn_asyncronous_seeding:
# Sobol/Halton sequences without scrambling do not use seed.
self._log_asyncronous_seeding()
[docs] def reseed_rng(self) -> None:
# We must not reseed the `self._seed` like below. Otherwise, workers will have different
# seed under parallel execution because `self.reseed_rng()` is called when starting each
# parallel executor.
# >>> self._seed = np.random.MT19937().random_raw()
self._independent_sampler.reseed_rng()
[docs] def infer_relative_search_space(
self, study: Study, trial: FrozenTrial
) -> Dict[str, BaseDistribution]:
if self._initial_search_space is not None:
return self._initial_search_space
past_trials = study.get_trials(deepcopy=False, states=_SUGGESTED_STATES)
# The initial trial is sampled by the independent sampler.
if len(past_trials) == 0:
return {}
# If an initial trial was already made,
# construct search_space of this sampler from the initial trial.
first_trial = min(past_trials, key=lambda t: t.number)
self._initial_search_space = self._infer_initial_search_space(first_trial)
return self._initial_search_space
def _infer_initial_search_space(self, trial: FrozenTrial) -> Dict[str, BaseDistribution]:
search_space: Dict[str, BaseDistribution] = {}
for param_name, distribution in trial.distributions.items():
if isinstance(distribution, CategoricalDistribution):
continue
search_space[param_name] = distribution
return search_space
@staticmethod
def _log_asyncronous_seeding() -> None:
_logger.warning(
"No seed is provided for `QMCSampler` and the seed is set randomly. "
"If you are running multiple `QMCSampler`s in parallel and/or distributed "
" environment, the same seed must be used in all samplers to ensure that resulting "
"samples are taken from the same QMC sequence. "
)
def _log_independent_sampling(self, trial: FrozenTrial, param_name: str) -> None:
_logger.warning(
f"The parameter '{param_name}' in trial#{trial.number} is sampled independently "
f"by using `{self._independent_sampler.__class__.__name__}` instead of `QMCSampler` "
"(optimization performance may be degraded). "
"`QMCSampler` does not support dynamic search space or `CategoricalDistribution`. "
"You can suppress this warning by setting `warn_independent_sampling` "
"to `False` in the constructor of `QMCSampler`, "
"if this independent sampling is intended behavior."
)
[docs] def sample_independent(
self,
study: Study,
trial: FrozenTrial,
param_name: str,
param_distribution: BaseDistribution,
) -> Any:
if self._initial_search_space is not None:
if self._warn_independent_sampling:
self._log_independent_sampling(trial, param_name)
return self._independent_sampler.sample_independent(
study, trial, param_name, param_distribution
)
[docs] def sample_relative(
self, study: Study, trial: FrozenTrial, search_space: Dict[str, BaseDistribution]
) -> Dict[str, Any]:
if search_space == {}:
return {}
sample = self._sample_qmc(study, search_space)
trans = _SearchSpaceTransform(search_space)
sample = trans.bounds[:, 0] + sample * (trans.bounds[:, 1] - trans.bounds[:, 0])
return trans.untransform(sample[0, :])
[docs] def after_trial(
self,
study: "optuna.Study",
trial: "optuna.trial.FrozenTrial",
state: TrialState,
values: Optional[Sequence[float]],
) -> None:
self._independent_sampler.after_trial(study, trial, state, values)
def _sample_qmc(self, study: Study, search_space: Dict[str, BaseDistribution]) -> np.ndarray:
# Lazy import because the `scipy.stats.qmc` is slow to import.
qmc_module = _LazyImport("scipy.stats.qmc")
sample_id = self._find_sample_id(study, search_space)
d = len(search_space)
if self._qmc_type == "halton":
qmc_engine = qmc_module.Halton(d, seed=self._seed, scramble=self._scramble)
elif self._qmc_type == "sobol":
qmc_engine = qmc_module.Sobol(d, seed=self._seed, scramble=self._scramble)
else:
raise ValueError("Invalid `qmc_type`")
forward_size = sample_id # `sample_id` starts from 0.
qmc_engine.fast_forward(forward_size)
sample = qmc_engine.random(1)
return sample
def _find_sample_id(self, study: Study, search_space: Dict[str, BaseDistribution]) -> int:
qmc_id = ""
qmc_id += self._qmc_type
# Sobol/Halton sequences without scrambling do not use seed.
if self._scramble:
qmc_id += f" (scramble=True, seed={self._seed})"
else:
qmc_id += " (scramble=False)"
key_qmc_id = qmc_id + "'s last sample id"
# TODO(kstoneriv3): Here, we ideally assume that the following block is
# an atomic transaction. Without such an assumption, the current implementation
# only ensures that each `sample_id` is sampled at least once.
system_attrs = study._storage.get_study_system_attrs(study._study_id)
if key_qmc_id in system_attrs.keys():
sample_id = system_attrs[key_qmc_id]
sample_id += 1
else:
sample_id = 0
study._storage.set_study_system_attr(study._study_id, key_qmc_id, sample_id)
return sample_id
```