From 3d20dc09f297c13902081c24ad14cd99e9fa1c5c Mon Sep 17 00:00:00 2001 From: Sam Daulton Date: Fri, 18 Jun 2021 19:16:53 -0700 Subject: [PATCH] make reference point optional (#601) Summary: Pull Request resolved: https://github.com/facebook/Ax/pull/601 This diff does two things: 1) if a user creates a `MultiObjectiveTorchModelbridge` and calls `gen` without specifying objective thresholds, then we infer the objective thresholds within the `MultiObjectiveBotorchModel`, generate candidates, and return the inferred objective thresholds in `gen_metadata`. 2) it adds a `infer_objective_thresholds` method to the `MultiObjectiveTorchModelbridge`, which can be used to infer objective thresholds without generating candidates. This refactors the Base `Modelbridge.gen` and `ArrayModelbridge._gen` methods and to apply transformations within a utility function. Note that this method returns ObservationData. If the user wants to plot outcomes with objective thresholds, the user would have to create the ObjectiveThresholds and set the objective thresholds on the optimization config. Reviewed By: Balandat Differential Revision: D28163744 fbshipit-source-id: c74908290bf6b7162771c0768e06bf8181fd4185 --- ax/modelbridge/array.py | 88 +++++--- ax/modelbridge/base.py | 89 +++++--- ax/modelbridge/multi_objective_torch.py | 111 +++++++++- .../test_multi_objective_torch_modelbridge.py | 190 +++++++++++++++++- ax/modelbridge/transforms/trial_as_task.py | 2 +- ax/models/tests/test_botorch_moo_model.py | 184 ++++++++++++++++- ax/models/tests/test_torch.py | 6 + ax/models/torch/botorch_moo.py | 105 +++++++++- ax/models/torch_base.py | 37 +++- 9 files changed, 748 insertions(+), 64 deletions(-) diff --git a/ax/modelbridge/array.py b/ax/modelbridge/array.py index 9d9db904b30..9189fca638e 100644 --- a/ax/modelbridge/array.py +++ b/ax/modelbridge/array.py @@ -4,6 +4,7 @@ # This source code is licensed under the MIT license found in the # LICENSE file in the root directory of this source tree. +from dataclasses import dataclass from typing import Any, Callable, Dict, List, Optional, Set, Tuple import numpy as np @@ -35,6 +36,18 @@ FIT_MODEL_ERROR = "Model must be fit before {action}." +@dataclass +class ArrayModelGenArgs: + search_space_digest: SearchSpaceDigest + objective_weights: np.ndarray + outcome_constraints: Optional[Tuple[np.ndarray, np.ndarray]] + linear_constraints: Optional[Tuple[np.ndarray, np.ndarray]] + fixed_features: Optional[Dict[int, float]] + pending_observations: Optional[List[np.ndarray]] + rounding_func: Callable[[np.ndarray], np.ndarray] + extra_model_gen_kwargs: Dict[str, Any] + + # pyre-fixme[13]: Attribute `model` is never initialized. # pyre-fixme[13]: Attribute `outcomes` is never initialized. # pyre-fixme[13]: Attribute `parameters` is never initialized. @@ -177,25 +190,14 @@ def _get_extra_model_gen_kwargs( ) -> Dict[str, Any]: return {} - def _gen( + def _get_transformed_model_gen_args( self, - n: int, search_space: SearchSpace, pending_observations: Dict[str, List[ObservationFeatures]], fixed_features: ObservationFeatures, model_gen_options: Optional[TConfig] = None, optimization_config: Optional[OptimizationConfig] = None, - ) -> Tuple[ - List[ObservationFeatures], - List[float], - Optional[ObservationFeatures], - TGenMetadata, - ]: - """Generate new candidates according to search_space and - optimization_config. - - The outcome constraints should be transformed to no longer be relative. - """ + ) -> ArrayModelGenArgs: # Validation if not self.parameters: # pragma: no cover raise ValueError(FIT_MODEL_ERROR.format(action="_gen")) @@ -228,19 +230,59 @@ def _gen( pending_array = pending_observations_as_array( pending_observations, self.outcomes, self.parameters ) - # Generate the candidates - X, w, gen_metadata, candidate_metadata = self._model_gen( - n=n, - bounds=search_space_digest.bounds, + return ArrayModelGenArgs( + search_space_digest=search_space_digest, objective_weights=objective_weights, outcome_constraints=outcome_constraints, linear_constraints=linear_constraints, fixed_features=fixed_features_dict, pending_observations=pending_array, - model_gen_options=model_gen_options, rounding_func=transform_callback(self.parameters, self.transforms), + extra_model_gen_kwargs=extra_model_gen_kwargs, + ) + + def _gen( + self, + n: int, + search_space: SearchSpace, + pending_observations: Dict[str, List[ObservationFeatures]], + fixed_features: ObservationFeatures, + model_gen_options: Optional[TConfig] = None, + optimization_config: Optional[OptimizationConfig] = None, + ) -> Tuple[ + List[ObservationFeatures], + List[float], + Optional[ObservationFeatures], + TGenMetadata, + ]: + """Generate new candidates according to search_space and + optimization_config. + + The outcome constraints should be transformed to no longer be relative. + """ + array_model_gen_args = self._get_transformed_model_gen_args( + search_space=search_space, + pending_observations=pending_observations, + fixed_features=fixed_features, + model_gen_options=model_gen_options, + optimization_config=optimization_config, + ) + + # Generate the candidates + search_space_digest = array_model_gen_args.search_space_digest + # TODO: pass array_model_gen_args to _model_gen + X, w, gen_metadata, candidate_metadata = self._model_gen( + n=n, + bounds=search_space_digest.bounds, + objective_weights=array_model_gen_args.objective_weights, + outcome_constraints=array_model_gen_args.outcome_constraints, + linear_constraints=array_model_gen_args.linear_constraints, + fixed_features=array_model_gen_args.fixed_features, + pending_observations=array_model_gen_args.pending_observations, + model_gen_options=model_gen_options, + rounding_func=array_model_gen_args.rounding_func, target_fidelities=search_space_digest.target_fidelities, - **extra_model_gen_kwargs, + **array_model_gen_args.extra_model_gen_kwargs, ) # Transform array to observations observation_features = parse_observation_features( @@ -248,10 +290,10 @@ def _gen( ) xbest = self._model_best_point( bounds=search_space_digest.bounds, - objective_weights=objective_weights, - outcome_constraints=outcome_constraints, - linear_constraints=linear_constraints, - fixed_features=fixed_features_dict, + objective_weights=array_model_gen_args.objective_weights, + outcome_constraints=array_model_gen_args.outcome_constraints, + linear_constraints=array_model_gen_args.linear_constraints, + fixed_features=array_model_gen_args.fixed_features, model_gen_options=model_gen_options, target_fidelities=search_space_digest.target_fidelities, ) diff --git a/ax/modelbridge/base.py b/ax/modelbridge/base.py index 8f94231c942..c299d1f1501 100644 --- a/ax/modelbridge/base.py +++ b/ax/modelbridge/base.py @@ -8,6 +8,7 @@ from abc import ABC from collections import OrderedDict from copy import deepcopy +from dataclasses import dataclass from typing import Any, Dict, List, MutableMapping, Optional, Set, Tuple, Type from ax.core.arm import Arm @@ -43,6 +44,14 @@ logger = get_logger(__name__) +@dataclass +class BaseGenArgs: + search_space: SearchSpace + optimization_config: OptimizationConfig + pending_observations: Dict[str, List[ObservationFeatures]] + fixed_features: ObservationFeatures + + class ModelBridge(ABC): """The main object for using models in Ax. @@ -576,40 +585,18 @@ def _update( """ raise NotImplementedError # pragma: no cover - def gen( + def _get_transformed_gen_args( self, - n: int, - search_space: Optional[SearchSpace] = None, + search_space: SearchSpace, optimization_config: Optional[OptimizationConfig] = None, pending_observations: Optional[Dict[str, List[ObservationFeatures]]] = None, fixed_features: Optional[ObservationFeatures] = None, - model_gen_options: Optional[TConfig] = None, - ) -> GeneratorRun: - """ - Args: - n: Number of points to generate - search_space: Search space - optimization_config: Optimization config - pending_observations: A map from metric name to pending - observations for that metric. - fixed_features: An ObservationFeatures object containing any - features that should be fixed at specified values during - generation. - model_gen_options: A config dictionary that is passed along to the - model. - """ - t_gen_start = time.time() + ) -> BaseGenArgs: if pending_observations is None: pending_observations = {} if fixed_features is None: fixed_features = ObservationFeatures({}) - # Get modifiable versions - if search_space is None: - search_space = self._model_space - orig_search_space = search_space - search_space = search_space.clone() - if optimization_config is None: optimization_config = ( # pyre-fixme[16]: `Optional` has no attribute `clone`. @@ -636,14 +623,55 @@ def gen( for metric, po in pending_observations.items(): pending_observations[metric] = t.transform_observation_features(po) fixed_features = t.transform_observation_features([fixed_features])[0] + return BaseGenArgs( + search_space=search_space, + optimization_config=optimization_config, + pending_observations=pending_observations, + fixed_features=fixed_features, + ) - # Apply terminal transform and gen - observation_features, weights, best_obsf, gen_metadata = self._gen( - n=n, + def gen( + self, + n: int, + search_space: Optional[SearchSpace] = None, + optimization_config: Optional[OptimizationConfig] = None, + pending_observations: Optional[Dict[str, List[ObservationFeatures]]] = None, + fixed_features: Optional[ObservationFeatures] = None, + model_gen_options: Optional[TConfig] = None, + ) -> GeneratorRun: + """ + Args: + n: Number of points to generate + search_space: Search space + optimization_config: Optimization config + pending_observations: A map from metric name to pending + observations for that metric. + fixed_features: An ObservationFeatures object containing any + features that should be fixed at specified values during + generation. + model_gen_options: A config dictionary that is passed along to the + model. + """ + t_gen_start = time.time() + # Get modifiable versions + if search_space is None: + search_space = self._model_space + orig_search_space = search_space + search_space = search_space.clone() + base_gen_args = self._get_transformed_gen_args( search_space=search_space, optimization_config=optimization_config, pending_observations=pending_observations, fixed_features=fixed_features, + ) + + # Apply terminal transform and gen + observation_features, weights, best_obsf, gen_metadata = self._gen( + n=n, + search_space=base_gen_args.search_space, + optimization_config=base_gen_args.optimization_config, + pending_observations=base_gen_args.pending_observations, + fixed_features=base_gen_args.fixed_features, model_gen_options=model_gen_options, ) # Apply reverse transforms @@ -692,11 +720,12 @@ def gen( immutable = getattr( self, "_experiment_has_immutable_search_space_and_opt_config", False ) + optimization_config = None if immutable else base_gen_args.optimization_config gr = GeneratorRun( arms=arms, weights=weights, - optimization_config=None if immutable else optimization_config, - search_space=None if immutable else search_space, + optimization_config=optimization_config, + search_space=None if immutable else base_gen_args.search_space, model_predictions=model_predictions, best_arm_predictions=None if best_arm is None diff --git a/ax/modelbridge/multi_objective_torch.py b/ax/modelbridge/multi_objective_torch.py index 3302f268e93..1422e8daf8e 100644 --- a/ax/modelbridge/multi_objective_torch.py +++ b/ax/modelbridge/multi_objective_torch.py @@ -4,7 +4,7 @@ # This source code is licensed under the MIT license found in the # LICENSE file in the root directory of this source tree. -from typing import Any, Callable, Dict, List, Optional, Tuple, Type +from typing import Any, Callable, Dict, List, Optional, Tuple, Type, Union import numpy as np import torch @@ -22,6 +22,7 @@ from ax.modelbridge.array import FIT_MODEL_ERROR from ax.modelbridge.modelbridge_utils import ( extract_objective_thresholds, + parse_observation_features, validate_and_apply_final_transform, ) from ax.modelbridge.torch import TorchModelBridge @@ -34,6 +35,7 @@ from ax.utils.common.docutils import copy_doc from ax.utils.common.logger import get_logger from ax.utils.common.typeutils import checked_cast_optional, not_none +from torch import Tensor logger = get_logger("MultiObjectiveTorchModelBridge") @@ -168,6 +170,18 @@ def _model_gen( rounding_func=tensor_rounding_func, target_fidelities=target_fidelities, ) + # if objective_thresholds are supplied by the user, then the + # transformed user-specified objective thresholds are in + # gen_metadata. Otherwise, inferred objective thresholds are + # in gen_metadata. + objective_thresholds = gen_metadata["objective_thresholds"] + obj_thlds = self.untransform_objective_thresholds( + objective_thresholds=objective_thresholds, + objective_weights=obj_w, + bounds=bounds, + fixed_features=fixed_features, + ) + gen_metadata["objective_thresholds"] = obj_thlds return ( X.detach().cpu().clone().numpy(), w.detach().cpu().clone().numpy(), @@ -225,3 +239,98 @@ def _get_frontier_evaluator(self) -> TFrontierEvaluator: if hasattr(self.model, "frontier_evaluator") else get_default_frontier_evaluator() ) + + def infer_objective_thresholds( + self, + search_space: Optional[SearchSpace] = None, + optimization_config: Optional[OptimizationConfig] = None, + fixed_features: Optional[ObservationFeatures] = None, + ) -> ObservationData: + """Infer objective thresholds. + + This method uses the model-estimated Pareto frontier over the in-sample points + to infer absolute (not relativized) objective thresholds. + + This uses a heuristic that sets the objective threshold to be a scaled nadir + point, where the nadir point is scaled back based on the range of each + objective across the current in-sample Pareto frontier. + """ + if search_space is None: + search_space = self._model_space + search_space = search_space.clone() + base_gen_args = self._get_transformed_gen_args( + search_space=search_space, + optimization_config=optimization_config, + fixed_features=fixed_features, + ) + # get transformed args from ArrayModelbridge + array_model_gen_args = self._get_transformed_model_gen_args( + search_space=base_gen_args.search_space, + fixed_features=base_gen_args.fixed_features, + pending_observations={}, + optimization_config=base_gen_args.optimization_config, + ) + # get transformed args from TorchModelbridge + obj_w, oc_c, l_c, pend_obs, _ = validate_and_apply_final_transform( + objective_weights=array_model_gen_args.objective_weights, + outcome_constraints=array_model_gen_args.outcome_constraints, + pending_observations=None, + linear_constraints=array_model_gen_args.linear_constraints, + final_transform=self._array_to_tensor, + ) + # infer objective thresholds + objective_thresholds = not_none(self.model).infer_objective_thresholds( + objective_weights=obj_w, + bounds=array_model_gen_args.search_space_digest.bounds, + outcome_constraints=oc_c, + linear_constraints=l_c, + fixed_features=array_model_gen_args.fixed_features, + ) + return self.untransform_objective_thresholds( + objective_thresholds=objective_thresholds, + objective_weights=obj_w, + bounds=array_model_gen_args.search_space_digest.bounds, + fixed_features=array_model_gen_args.fixed_features, + ) + + def untransform_objective_thresholds( + self, + objective_thresholds: Tensor, + objective_weights: Tensor, + bounds: List[Tuple[Union[int, float], Union[int, float]]], + fixed_features: Optional[Dict[int, float]], + ) -> ObservationData: + objective_thresholds_np = objective_thresholds.cpu().numpy() + # pyre-ignore [16] + objective_indices = objective_weights.nonzero().view(-1).tolist() + objective_names = [self.outcomes[i] for i in objective_indices] + # create an ObservationData object for untransforming the objective thresholds + observation_data = [ + ObservationData( + metric_names=objective_names, + means=objective_thresholds_np[objective_indices].copy(), + covariance=np.zeros((len(objective_indices), len(objective_indices))), + ) + ] + # Untransform objective thresholds. Note: there is one objective threshold + # for every outcome. + # Construct dummy observation features + X = [bound[0] for bound in bounds] + fixed_features = fixed_features or {} + for i, val in fixed_features.items(): + X[i] = val + observation_features = parse_observation_features( + X=np.array([X]), + param_names=self.parameters, + ) + # Apply reverse transforms, in reverse order + for t in reversed(self.transforms.values()): + observation_data = t.untransform_observation_data( + observation_data=observation_data, + observation_features=observation_features, + ) + observation_features = t.untransform_observation_features( + observation_features=observation_features, + ) + observation_data = observation_data[0] + return observation_data diff --git a/ax/modelbridge/tests/test_multi_objective_torch_modelbridge.py b/ax/modelbridge/tests/test_multi_objective_torch_modelbridge.py index 8c8789c34de..a0c1fab6e96 100644 --- a/ax/modelbridge/tests/test_multi_objective_torch_modelbridge.py +++ b/ax/modelbridge/tests/test_multi_objective_torch_modelbridge.py @@ -4,12 +4,19 @@ # This source code is licensed under the MIT license found in the # LICENSE file in the root directory of this source tree. +from contextlib import ExitStack from unittest.mock import patch import numpy as np import torch from ax.core.observation import ObservationFeatures, ObservationData -from ax.core.outcome_constraint import ComparisonOp, ObjectiveThreshold +from ax.core.outcome_constraint import ( + ComparisonOp, + ObjectiveThreshold, + OutcomeConstraint, +) +from ax.core.parameter_constraint import ParameterConstraint +from ax.modelbridge.factory import get_sobol from ax.modelbridge.modelbridge_utils import ( get_pareto_frontier_and_transformed_configs, pareto_frontier, @@ -19,15 +26,17 @@ observed_pareto_frontier, ) from ax.modelbridge.multi_objective_torch import MultiObjectiveTorchModelBridge +from ax.modelbridge.registry import Cont_X_trans, Y_trans, ST_MTGP_trans from ax.modelbridge.transforms.base import Transform from ax.models.torch.botorch_moo import MultiObjectiveBotorchModel from ax.models.torch.botorch_moo_defaults import pareto_frontier_evaluator +from ax.service.utils.report_utils import exp_to_df from ax.utils.common.testutils import TestCase from ax.utils.testing.core_stubs import ( get_branin_data_multi_objective, get_branin_experiment_with_multi_objective, ) - +from botorch.utils.multi_objective.pareto import is_non_dominated PARETO_FRONTIER_EVALUATOR_PATH = ( f"{pareto_frontier_evaluator.__module__}.pareto_frontier_evaluator" @@ -341,3 +350,180 @@ def test_hypervolume(self, _): observation_features=observation_features, ) self.assertTrue(predicted_hv >= 0) + + @patch( + # Mocking `BraninMetric` as not available while running, so it will + # be grabbed from cache during `fetch_data`. + f"{STUBS_PATH}.BraninMetric.is_available_while_running", + return_value=False, + ) + def test_infer_objective_thresholds(self, _): + # lightweight test + exp = get_branin_experiment_with_multi_objective( + has_optimization_config=True, + with_batch=True, + with_status_quo=True, + ) + for trial in exp.trials.values(): + trial.mark_running(no_runner_required=True).mark_completed() + exp.attach_data( + get_branin_data_multi_objective(trial_indices=exp.trials.keys()) + ) + data = exp.fetch_data() + modelbridge = MultiObjectiveTorchModelBridge( + search_space=exp.search_space, + model=MultiObjectiveBotorchModel(), + optimization_config=exp.optimization_config, + transforms=Cont_X_trans + Y_trans, + experiment=exp, + data=data, + ) + fixed_features = ObservationFeatures(parameters={"x1": 0.0}) + search_space = exp.search_space.clone() + param_constraints = [ + ParameterConstraint(constraint_dict={"x1": 1.0}, bound=10.0) + ] + outcome_constraints = [ + OutcomeConstraint( + metric=exp.metrics["branin_a"], + op=ComparisonOp.GEQ, + bound=-40.0, + relative=False, + ) + ] + search_space.add_parameter_constraints(param_constraints) + exp.optimization_config.outcome_constraints = outcome_constraints + expected_base_gen_args = modelbridge._get_transformed_gen_args( + search_space=search_space.clone(), + optimization_config=exp.optimization_config, + fixed_features=fixed_features, + ) + with ExitStack() as es: + mock_model_infer_obj_t = es.enter_context( + patch.object( + modelbridge.model, + "infer_objective_thresholds", + wraps=modelbridge.model.infer_objective_thresholds, + ) + ) + mock_get_transformed_gen_args = es.enter_context( + patch.object( + modelbridge, + "_get_transformed_gen_args", + wraps=modelbridge._get_transformed_gen_args, + ) + ) + mock_get_transformed_model_gen_args = es.enter_context( + patch.object( + modelbridge, + "_get_transformed_model_gen_args", + wraps=modelbridge._get_transformed_model_gen_args, + ) + ) + mock_untransform_objective_thresholds = es.enter_context( + patch.object( + modelbridge, + "untransform_objective_thresholds", + wraps=modelbridge.untransform_objective_thresholds, + ) + ) + obs_data = modelbridge.infer_objective_thresholds( + search_space=search_space, + optimization_config=exp.optimization_config, + fixed_features=fixed_features, + ) + ckwargs = mock_model_infer_obj_t.call_args[1] + self.assertTrue(torch.equal(ckwargs["objective_weights"], torch.ones(2))) + # check that transforms have been applied (at least UnitX) + self.assertEqual(ckwargs["bounds"], [(0.0, 1.0), (0.0, 1.0)]) + oc = ckwargs["outcome_constraints"] + self.assertTrue(torch.equal(oc[0], torch.tensor([[-1.0, 0.0]]))) + self.assertTrue(torch.equal(oc[1], torch.tensor([[45.0]]))) + lc = ckwargs["linear_constraints"] + self.assertTrue(torch.equal(lc[0], torch.tensor([[15.0, 0.0]]))) + self.assertTrue(torch.equal(lc[1], torch.tensor([[15.0]]))) + self.assertEqual(ckwargs["fixed_features"], {0: 1.0 / 3.0}) + mock_get_transformed_gen_args.assert_called_once() + mock_get_transformed_model_gen_args.assert_called_once_with( + search_space=expected_base_gen_args.search_space, + fixed_features=expected_base_gen_args.fixed_features, + pending_observations=expected_base_gen_args.pending_observations, + optimization_config=expected_base_gen_args.optimization_config, + ) + mock_untransform_objective_thresholds.assert_called_once() + ckwargs = mock_untransform_objective_thresholds.call_args[1] + + self.assertTrue(torch.equal(ckwargs["objective_weights"], torch.ones(2))) + self.assertEqual(ckwargs["bounds"], [(0.0, 1.0), (0.0, 1.0)]) + self.assertEqual(ckwargs["fixed_features"], {0: 1.0 / 3.0}) + self.assertEqual(obs_data.metric_names, ["branin_a", "branin_b"]) + df = exp_to_df(exp) + Y = np.stack([df.branin_a.values, df.branin_b.values]).T + Y = torch.from_numpy(Y) + pareto_Y = Y[is_non_dominated(Y)] + nadir = pareto_Y.min(dim=0).values + self.assertTrue(np.all(np.array(obs_data.means) < nadir.numpy())) + self.assertTrue(np.all(np.array(obs_data.covariance) == 0.0)) + # test using MTGP + sobol_generator = get_sobol(search_space=exp.search_space) + sobol_run = sobol_generator.gen(n=5) + trial = exp.new_batch_trial(optimize_for_power=True) + trial.add_generator_run(sobol_run) + trial.mark_running(no_runner_required=True).mark_completed() + data = exp.fetch_data() + modelbridge = MultiObjectiveTorchModelBridge( + search_space=exp.search_space, + model=MultiObjectiveBotorchModel(), + optimization_config=exp.optimization_config, + transforms=ST_MTGP_trans, + experiment=exp, + data=data, + ) + fixed_features = ObservationFeatures(parameters={}, trial_index=1) + expected_base_gen_args = modelbridge._get_transformed_gen_args( + search_space=search_space.clone(), + optimization_config=exp.optimization_config, + fixed_features=fixed_features, + ) + with self.assertRaises(ValueError): + # Check that a ValueError is raised when MTGP is being used + # and trial_index is not specified as a fixed features. + # Note: this error is raised by StratifiedStandardizeY + modelbridge.infer_objective_thresholds( + search_space=search_space, + optimization_config=exp.optimization_config, + ) + with ExitStack() as es: + mock_model_infer_obj_t = es.enter_context( + patch.object( + modelbridge.model, + "infer_objective_thresholds", + wraps=modelbridge.model.infer_objective_thresholds, + ) + ) + mock_untransform_objective_thresholds = es.enter_context( + patch.object( + modelbridge, + "untransform_objective_thresholds", + wraps=modelbridge.untransform_objective_thresholds, + ) + ) + obs_data = modelbridge.infer_objective_thresholds( + search_space=search_space, + optimization_config=exp.optimization_config, + fixed_features=fixed_features, + ) + ckwargs = mock_model_infer_obj_t.call_args[1] + self.assertEqual(ckwargs["fixed_features"], {2: 1.0}) + mock_untransform_objective_thresholds.assert_called_once() + ckwargs = mock_untransform_objective_thresholds.call_args[1] + self.assertEqual(ckwargs["fixed_features"], {2: 1.0}) + self.assertEqual(obs_data.metric_names, ["branin_a", "branin_b"]) + df = exp_to_df(exp) + trial_mask = df.trial_index == 1 + Y = np.stack([df.branin_a.values[trial_mask], df.branin_b.values[trial_mask]]).T + Y = torch.from_numpy(Y) + pareto_Y = Y[is_non_dominated(Y)] + nadir = pareto_Y.min(dim=0).values + self.assertTrue(np.all(np.array(obs_data.means) < nadir.numpy())) + self.assertTrue(np.all(np.array(obs_data.covariance) == 0.0)) diff --git a/ax/modelbridge/transforms/trial_as_task.py b/ax/modelbridge/transforms/trial_as_task.py index 1dd60bee70f..c576d3ac451 100644 --- a/ax/modelbridge/transforms/trial_as_task.py +++ b/ax/modelbridge/transforms/trial_as_task.py @@ -97,7 +97,7 @@ def transform_observation_features( def transform_search_space(self, search_space: SearchSpace) -> SearchSpace: for p_name, level_dict in self.trial_level_map.items(): - level_values = list(set(level_dict.values())) + level_values = sorted(set(level_dict.values())) if len(level_values) == 1: raise ValueError( "TrialAsTask transform expects 2+ task params, " diff --git a/ax/models/tests/test_botorch_moo_model.py b/ax/models/tests/test_botorch_moo_model.py index b8b496b9c0a..4949d13b957 100644 --- a/ax/models/tests/test_botorch_moo_model.py +++ b/ax/models/tests/test_botorch_moo_model.py @@ -4,21 +4,25 @@ # This source code is licensed under the MIT license found in the # LICENSE file in the root directory of this source tree. +from contextlib import ExitStack from typing import Dict from unittest import mock import ax.models.torch.botorch_moo as botorch_moo +import numpy as np import torch from ax.core.search_space import SearchSpaceDigest from ax.exceptions.core import AxError from ax.models.torch.botorch_defaults import get_NEI from ax.models.torch.botorch_moo import MultiObjectiveBotorchModel -from ax.models.torch.utils import HYPERSPHERE +from ax.models.torch.utils import HYPERSPHERE, _get_X_pending_and_observed from ax.utils.common.testutils import TestCase from botorch.acquisition.multi_objective import monte_carlo as moo_monte_carlo -from botorch.models import ModelListGP +from botorch.models import ModelListGP, FixedNoiseGP from botorch.models.transforms.input import Warp +from botorch.utils.multi_objective.hypervolume import infer_reference_point from botorch.utils.multi_objective.scalarization import get_chebyshev_scalarization +from botorch.utils.testing import MockPosterior FIT_MODEL_MO_PATH = "ax.models.torch.botorch_defaults.fit_gpytorch_model" SAMPLE_SIMPLEX_UTIL_PATH = "ax.models.torch.utils.sample_simplex" @@ -286,7 +290,7 @@ def test_BotorchMOOModel_with_ehvi(self, dtype=torch.float, cuda=False): ) as _, mock.patch( PARTITIONING_PATH, wraps=moo_monte_carlo.NondominatedPartitioning ) as _mock_partitioning: - model.gen( + _, _, gen_metadata, _ = model.gen( n, bounds, objective_weights, @@ -297,6 +301,7 @@ def test_BotorchMOOModel_with_ehvi(self, dtype=torch.float, cuda=False): self.assertEqual(1, _mock_ehvi_acqf.call_count) # check partitioning strategy self.assertEqual(_mock_partitioning.call_args[1]["alpha"], 0.0) + self.assertTrue(torch.equal(gen_metadata["objective_thresholds"], obj_t)) # 3 objective with mock.patch(FIT_MODEL_MO_PATH) as _mock_fit_model: @@ -330,6 +335,179 @@ def test_BotorchMOOModel_with_ehvi(self, dtype=torch.float, cuda=False): # check partitioning strategy self.assertEqual(_mock_partitioning.call_args[1]["alpha"], 1e-5) + # test inferred objective thresholds in gen() + with mock.patch(FIT_MODEL_MO_PATH) as _mock_fit_model: + # create several data points + Xs1 = [torch.cat([Xs1[0], Xs1[0] - 0.1], dim=0)] + Ys1 = [torch.cat([Ys1[0], Ys1[0] - 0.5], dim=0)] + Ys2 = [torch.cat([Ys2[0], Ys2[0] + 0.5], dim=0)] + Yvars1 = [torch.cat([Yvars1[0], Yvars1[0] + 0.2], dim=0)] + Yvars2 = [torch.cat([Yvars2[0], Yvars2[0] + 0.1], dim=0)] + model.fit( + Xs=Xs1 + Xs1, + Ys=Ys1 + Ys2, + Yvars=Yvars1 + Yvars2, + search_space_digest=SearchSpaceDigest( + feature_names=fns, + bounds=bounds, + task_features=tfs, + ), + metric_names=mns + ["dummy_metric"], + ) + with ExitStack() as es: + _mock_ehvi_acqf = es.enter_context( + mock.patch( + EHVI_ACQF_PATH, + wraps=moo_monte_carlo.qExpectedHypervolumeImprovement, + ) + ) + es.enter_context( + mock.patch( + "ax.models.torch.botorch_defaults.optimize_acqf", + return_value=(X_dummy, acqfv_dummy), + ) + ) + _mock_partitioning = es.enter_context( + mock.patch( + PARTITIONING_PATH, wraps=moo_monte_carlo.NondominatedPartitioning + ) + ) + _mock_model_infer_objective_thresholds = es.enter_context( + mock.patch.object( + model, + "infer_objective_thresholds", + wraps=model.infer_objective_thresholds, + ) + ) + _mock_infer_reference_point = es.enter_context( + mock.patch( + "ax.models.torch.botorch_moo.infer_reference_point", + wraps=infer_reference_point, + ) + ) + es.enter_context( + mock.patch.object( + model.model, + "posterior", + return_value=MockPosterior( + mean=torch.tensor( + [ + [11.0, 2.0, 0.0], + [9.0, 3.0, 0.0], + ] + ) + ), + ) + ) + outcome_constraints = ( + torch.tensor([[1.0, 0.0, 0.0]]), + torch.tensor([[10.0]]), + ) + _, _, gen_metadata, _ = model.gen( + n, + bounds, + objective_weights=torch.tensor([-1.0, -1.0, 0.0]), + outcome_constraints=outcome_constraints, + model_gen_options={"optimizer_kwargs": _get_optimizer_kwargs()}, + ) + # the EHVI acquisition function should be created only once. + self.assertEqual(1, _mock_ehvi_acqf.call_count) + ckwargs = _mock_model_infer_objective_thresholds.call_args[1] + X_observed = ckwargs["X_observed"] + sorted_idcs = X_observed[:, 0].argsort() + expected_X_observed = torch.tensor([[1.0, 2.0, 3.0], [0.9, 1.9, 2.9]]) + sorted_idcs2 = expected_X_observed[:, 0].argsort() + self.assertTrue( + torch.equal( + X_observed[sorted_idcs], + expected_X_observed[sorted_idcs2], + ) + ) + self.assertTrue( + torch.equal( + ckwargs["objective_weights"], torch.tensor([-1.0, -1.0, 0.0]) + ) + ) + oc = ckwargs["outcome_constraints"] + self.assertTrue(torch.equal(oc[0], outcome_constraints[0])) + self.assertTrue(torch.equal(oc[1], outcome_constraints[1])) + self.assertIsInstance(ckwargs["model"], FixedNoiseGP) + self.assertTrue(torch.equal(ckwargs["subset_idcs"], torch.tensor([0, 1]))) + _mock_infer_reference_point.assert_called_once() + ckwargs = _mock_infer_reference_point.call_args[1] + self.assertEqual(ckwargs["scale"], 0.1) + self.assertTrue( + torch.equal(ckwargs["pareto_Y"], torch.tensor([[-9.0, -3.0]])) + ) + self.assertIn("objective_thresholds", gen_metadata) + obj_t = gen_metadata["objective_thresholds"] + self.assertTrue(torch.equal(obj_t[:2], torch.tensor([9.9, 3.3]))) + self.assertTrue(np.isnan(obj_t[2])) + + # test infer objective thresholds alone + # include an extra 3rd outcome + outcome_constraints = (torch.tensor([[1.0, 0.0, 0.0]]), torch.tensor([[10.0]])) + with ExitStack() as es: + _mock_infer_reference_point = es.enter_context( + mock.patch( + "ax.models.torch.botorch_moo.infer_reference_point", + wraps=infer_reference_point, + ) + ) + _mock_get_X_pending_and_observed = es.enter_context( + mock.patch( + "ax.models.torch.botorch_moo._get_X_pending_and_observed", + wraps=_get_X_pending_and_observed, + ) + ) + es.enter_context( + mock.patch.object( + model.model, + "posterior", + return_value=MockPosterior( + mean=torch.tensor( + [ + [11.0, 2.0, 0.0], + [9.0, 3.0, 0.0], + ] + ) + ), + ) + ) + linear_constraints = (torch.tensor([1.0, 0.0, 0.0]), torch.tensor([2.0])) + objective_weights = torch.tensor([-1.0, -1.0, 0.0]) + obj_thresholds = model.infer_objective_thresholds( + bounds=bounds, + objective_weights=objective_weights, + outcome_constraints=outcome_constraints, + fixed_features={}, + linear_constraints=linear_constraints, + ) + _mock_get_X_pending_and_observed.assert_called_once() + ckwargs = _mock_get_X_pending_and_observed.call_args[1] + actual_Xs = ckwargs["Xs"] + for X in actual_Xs: + self.assertTrue(torch.equal(X, Xs1[0])) + self.assertEqual(ckwargs["bounds"], bounds) + self.assertTrue( + torch.equal(ckwargs["objective_weights"], objective_weights) + ) + oc = ckwargs["outcome_constraints"] + self.assertTrue(torch.equal(oc[0], outcome_constraints[0])) + self.assertTrue(torch.equal(oc[1], outcome_constraints[1])) + self.assertEqual(ckwargs["fixed_features"], {}) + lc = ckwargs["linear_constraints"] + self.assertTrue(torch.equal(lc[0], linear_constraints[0])) + self.assertTrue(torch.equal(lc[1], linear_constraints[1])) + _mock_infer_reference_point.assert_called_once() + ckwargs = _mock_infer_reference_point.call_args[1] + self.assertEqual(ckwargs["scale"], 0.1) + self.assertTrue( + torch.equal(ckwargs["pareto_Y"], torch.tensor([[-9.0, -3.0]])) + ) + self.assertTrue(torch.equal(obj_thresholds[:2], torch.tensor([9.9, 3.3]))) + self.assertTrue(np.isnan(obj_thresholds[2].item())) + def test_BotorchMOOModel_with_random_scalarization_and_outcome_constraints( self, dtype=torch.float, cuda=False ): diff --git a/ax/models/tests/test_torch.py b/ax/models/tests/test_torch.py index fe94e540c94..285a3199091 100644 --- a/ax/models/tests/test_torch.py +++ b/ax/models/tests/test_torch.py @@ -5,6 +5,7 @@ # LICENSE file in the root directory of this source tree. import numpy as np +import torch from ax.core.search_space import SearchSpaceDigest from ax.models.torch_base import TorchModel from ax.utils.common.testutils import TestCase @@ -64,3 +65,8 @@ def testTorchModelUpdate(self): search_space_digest=SearchSpaceDigest(feature_names=[], bounds=[]), metric_names=[], ) + + def testTorchModelInferObjectiveThresholds(self): + torch_model = TorchModel() + with self.assertRaises(NotImplementedError): + torch_model.infer_objective_thresholds(torch.zeros(2)) diff --git a/ax/models/torch/botorch_moo.py b/ax/models/torch/botorch_moo.py index 8316746632f..93635cd49e9 100644 --- a/ax/models/torch/botorch_moo.py +++ b/ax/models/torch/botorch_moo.py @@ -36,6 +36,7 @@ randomize_objective_weights, subset_model, ) +from ax.models.torch.utils import get_outcome_constraint_transforms from ax.models.torch_base import TorchModel from ax.utils.common.constants import Keys from ax.utils.common.docutils import copy_doc @@ -43,6 +44,8 @@ from ax.utils.common.typeutils import checked_cast, not_none from botorch.acquisition.acquisition import AcquisitionFunction from botorch.models.model import Model +from botorch.utils.multi_objective.hypervolume import infer_reference_point +from botorch.utils.multi_objective.pareto import is_non_dominated from torch import Tensor @@ -229,6 +232,80 @@ def __init__( self.fidelity_features: List[int] = [] self.metric_names: List[str] = [] + def infer_objective_thresholds( + self, + objective_weights: Tensor, # objective_directions + bounds: Optional[List[Tuple[float, float]]] = None, + outcome_constraints: Optional[Tuple[Tensor, Tensor]] = None, + linear_constraints: Optional[Tuple[Tensor, Tensor]] = None, + fixed_features: Optional[Dict[int, float]] = None, + X_observed: Optional[Tensor] = None, + model: Optional[Model] = None, + subset_idcs: Optional[Tensor] = None, + ) -> Tensor: + """Infer objective thresholds. + + Returns: + A `m`-dim tensor of objective thresholds, where the objective + threshold is `nan` if the outcome is not an objective. + """ + if X_observed is None: + if bounds is None: + raise ValueError("bounds is required if X_observed is None.") + _, X_observed = _get_X_pending_and_observed( + Xs=self.Xs, + objective_weights=objective_weights, + outcome_constraints=outcome_constraints, + bounds=bounds, + linear_constraints=linear_constraints, + fixed_features=fixed_features, + ) + if model is not None: + if subset_idcs is None: + raise ValueError( + "subset_idcs must be provided if the model is provided." + ) + else: + # subset the model + subset_model_results = subset_model( + model=self.model, # pyre-ignore [6] + objective_weights=objective_weights, + outcome_constraints=outcome_constraints, + ) + model = subset_model_results.model + objective_weights = subset_model_results.objective_weights + outcome_constraints = subset_model_results.outcome_constraints + subset_idcs = subset_model_results.indices + with torch.no_grad(): + pred = not_none(model).posterior(not_none(X_observed)).mean + if outcome_constraints is not None: + cons_tfs = get_outcome_constraint_transforms(outcome_constraints) + # pyre-ignore [16] + feas = torch.stack([c(pred) <= 0 for c in cons_tfs], dim=-1).all(dim=-1) + pred = pred[feas] + if pred.shape[0] == 0: + raise AxError("There are no feasible observed points.") + # pyre-ignore [16] + obj_mask = objective_weights.nonzero().view(-1) + obj_weights_subset = objective_weights[obj_mask] + obj = pred[..., obj_mask] * obj_weights_subset + pareto_obj = obj[is_non_dominated(obj)] + objective_thresholds = infer_reference_point( + pareto_Y=pareto_obj, + scale=0.1, + ) + # multiply by objective weights to return objective thresholds in the + # unweighted space + objective_thresholds = objective_thresholds * obj_weights_subset + full_objective_thresholds = torch.full( + (len(self.metric_names),), + float("nan"), + dtype=objective_weights.dtype, + device=objective_weights.device, + ) + full_objective_thresholds[subset_idcs] = objective_thresholds.clone() + return full_objective_thresholds + @copy_doc(TorchModel.gen) def gen( self, @@ -272,7 +349,7 @@ def gen( ) model = not_none(self.model) - + full_objective_thresholds = objective_thresholds # subset model only to the outcomes we need for the optimization if options.get(Keys.SUBSET_MODEL, True): subset_model_results = subset_model( @@ -285,6 +362,26 @@ def gen( objective_weights = subset_model_results.objective_weights outcome_constraints = subset_model_results.outcome_constraints objective_thresholds = subset_model_results.objective_thresholds + idcs = subset_model_results.indices + else: + idcs = torch.arange( + objective_weights.shape[0], + dtype=objective_weights.dtype, + device=objective_weights.device, + ) + if objective_thresholds is None: + full_objective_thresholds = self.infer_objective_thresholds( + X_observed=not_none(X_observed), + objective_weights=objective_weights, + outcome_constraints=outcome_constraints, + model=model, + subset_idcs=idcs, + ) + + # subset the objective thresholds + objective_thresholds = full_objective_thresholds[idcs].clone() + else: + full_objective_thresholds = objective_thresholds bounds_ = torch.tensor(bounds, dtype=self.dtype, device=self.device) bounds_ = bounds_.transpose(0, 1) @@ -351,9 +448,13 @@ def gen( rounding_func=botorch_rounding_func, **optimizer_options, ) + gen_metadata = { + "expected_acquisition_value": expected_acquisition_value.tolist(), + "objective_thresholds": full_objective_thresholds.cpu(), + } return ( candidates.detach().cpu(), torch.ones(n, dtype=self.dtype), - {"expected_acquisition_value": expected_acquisition_value.tolist()}, + gen_metadata, None, ) diff --git a/ax/models/torch_base.py b/ax/models/torch_base.py index 75da46f9b41..070c9908688 100644 --- a/ax/models/torch_base.py +++ b/ax/models/torch_base.py @@ -9,13 +9,14 @@ import torch from ax.core.search_space import SearchSpaceDigest from ax.core.types import TCandidateMetadata, TConfig, TGenMetadata -from ax.models.base import Model +from ax.models.base import Model as BaseModel +from botorch.models.model import Model from torch import Tensor # pyre-fixme[13]: Attribute `device` is never initialized. # pyre-fixme[13]: Attribute `dtype` is never initialized. -class TorchModel(Model): +class TorchModel(BaseModel): """This class specifies the interface for a torch-based model. These methods should be implemented to have access to all of the features @@ -228,3 +229,35 @@ def evaluate_acquisition_function(self, X: Tensor) -> Tensor: A single-element tensor with the acquisition value for these points. """ raise NotImplementedError + + def infer_objective_thresholds( + self, + objective_weights: Tensor, # objective_directions + bounds: Optional[List[Tuple[float, float]]] = None, + outcome_constraints: Optional[Tuple[Tensor, Tensor]] = None, + linear_constraints: Optional[Tuple[Tensor, Tensor]] = None, + fixed_features: Optional[Dict[int, float]] = None, + X_observed: Optional[Tensor] = None, + model: Optional[Model] = None, + subset_idcs: Optional[Tensor] = None, + ) -> Tensor: + """Infer objective thresholds. + + Args: + objective_weights: The objective is to maximize a weighted sum of + the columns of f(x). These are the weights. + bounds: A list of (lower, upper) tuples for each column of X. + outcome_constraints: A tuple of (A, b). For k outcome constraints + and m outputs at f(x), A is (k x m) and b is (k x 1) such that + A f(x) <= b. + linear_constraints: A tuple of (A, b). For k linear constraints on + d-dimensional x, A is (k x d) and b is (k x 1) such that + A x <= b. + fixed_features: A map {feature_index: value} for features that + should be fixed to a particular value during generation. + X_observed: A `n x d`-dim tensor of observed in-sample points + model: The model + subset_idcs: The indices of the outcomes are that are used in the + optimization config (if the model has been subset'd). + """ + raise NotImplementedError