Source code for orion.algo.robo.randomforest

"""
Wrapper for RoBO with Random Forest
"""
from __future__ import annotations

from typing import Sequence

import numpy
import pyrfr.regression as reg
from orion.algo.space import Space
from robo.models.random_forest import RandomForest

from orion.algo.robo.base import AcquisitionFnName, MaximizerName, RoBO, build_bounds


[docs]class RoBO_RandomForest(RoBO): """ Wrapper for RoBO with Parameters ---------- space: ``orion.algo.space.Space`` Optimisation space with priors for each dimension. seed: None, int or sequence of int Seed to sample initial points and candidates points. Default: 0. n_initial_points: int Number of initial points randomly sampled. If new points are requested and less than `n_initial_points` are observed, the next points will also be sampled randomly instead of being sampled from the parzen estimators. Default: ``20`` maximizer: str The optimizer for the acquisition function. Can be one of ``{"random", "scipy", "differential_evolution"}``. Defaults to 'random' acquisition_func: str Name of the acquisition function. Can be one of ``['ei', 'log_ei', 'pi', 'lcb']``. num_trees: int The number of trees in the random forest. Defaults to 30. do_bootstrapping: bool Turns on / off bootstrapping in the random forest. Defaults to ``True``. n_points_per_tree: int Number of data point per tree. If set to 0 then we will use all data points in each tree. Defaults to 0. compute_oob_error: bool Turns on / off calculation of out-of-bag error. Defaults to ``False``. return_total_variance: bool Return law of total variance (mean of variances + variance of means, if True) or explained variance (variance of means, if False). Defaults to ``True``. """ def __init__( self, space: Space, seed: int | Sequence[int] | None = 0, n_initial_points=20, maximizer: MaximizerName = "random", acquisition_func: AcquisitionFnName = "log_ei", num_trees: int = 30, do_bootstrapping: bool = True, n_points_per_tree: int = 0, compute_oob_error: bool = False, return_total_variance: bool = True, ): super().__init__( space, maximizer=maximizer, acquisition_func=acquisition_func, n_initial_points=n_initial_points, seed=seed, ) self.num_trees = num_trees self.do_bootstrapping = do_bootstrapping self.n_points_per_tree = n_points_per_tree self.compute_oob_error = compute_oob_error self.return_total_variance = return_total_variance def build_model(self): lower, upper = build_bounds(self.space) return OrionRandomForestWrapper( rng=None, num_trees=self.num_trees, do_bootstrapping=self.do_bootstrapping, n_points_per_tree=self.n_points_per_tree, compute_oob_error=self.compute_oob_error, return_total_variance=self.return_total_variance, lower=lower, upper=upper, )
class OrionRandomForestWrapper(RandomForest): """ Wrapper for RoBO's RandomForest model Parameters ---------- lower : np.array(D,) Lower bound of the input space which is used for the input space normalization upper : np.array(D,) Upper bound of the input space which is used for the input space normalization num_trees: int The number of trees in the random forest. do_bootstrapping: bool Turns on / off bootstrapping in the random forest. n_points_per_tree: int Number of data point per tree. If set to 0 then we will use all data points in each tree compute_oob_error: bool Turns on / off calculation of out-of-bag error. Default: False return_total_variance: bool Return law of total variance (mean of variances + variance of means, if True) or explained variance (variance of means, if False). Default: True rng: np.random.RandomState Random number generator """ def __init__( self, lower, upper, num_trees=30, do_bootstrapping=True, n_points_per_tree=0, compute_oob_error=False, return_total_variance=True, rng=None, ): super().__init__( num_trees=num_trees, do_bootstrapping=do_bootstrapping, n_points_per_tree=n_points_per_tree, compute_oob_error=compute_oob_error, return_total_variance=return_total_variance, rng=rng, ) self.lower = lower self.upper = upper def train(self, X: numpy.ndarray, y: numpy.ndarray, **kwargs): """ Seeds the RNG of Random Forest before calling parent's train(). """ # NOTE: We cannot save `reg_rng` state so instead we control it # with random integers sampled from `rng` and keep track of `rng` state. self.reg_rng = reg.default_random_engine(int(self.rng.randint(int(10e8)))) super().train(X, y, **kwargs) def predict(self, X_test: numpy.ndarray, **kwargs): # Seeds the RNG of Random Forest before calling parent's predict(). # NOTE: We cannot save `reg_rng` state so instead we control it # with random integers sampled from `rng` and keep track of `rng` state. self.reg_rng = reg.default_random_engine(int(self.rng.randint(int(10e8)))) return super().predict(X_test, **kwargs) def set_state(self, state_dict: dict) -> None: """Restore the state of the optimizer""" self.rng.set_state(state_dict["model_rng_state"]) def state_dict(self): """Return the current state of the optimizer so that it can be restored""" return { "model_rng_state": self.rng.get_state(), } def seed(self, seed): """Seed all internal RNGs""" self.rng.seed(seed)