Source code for ablator.main.configs
# TODO fix mypy that does not recognize correctly the types i.e. Stateless
from ablator.config.main import ConfigBase, configclass
from ablator.config.types import (
Optional,
Stateless,
Literal,
Tuple,
List,
Enum,
Dict,
)
from ablator.modules.optimizer import OptimizerConfig
from ablator.modules.scheduler import SchedulerConfig
from ablator.modules.storage.cloud import GcpConfig
from ablator.modules.storage.remote import RemoteConfig
[docs]@configclass
class TrainConfig(ConfigBase):
"""
Training configuration.
Attributes
----------
dataset: str
dataset name. maybe used in custom dataset loader functions.
batch_size: int
batch size.
epochs: int
number of epochs to train.
optimizer_config: OptimizerConfig
optimizer configuration. (check ``OptimizerConfig`` for more details)
scheduler_config: Optional[SchedulerConfig]
scheduler configuration. (check ``SchedulerConfig`` for more details)
rand_weights_init: bool = True
whether to initialize model weights randomly.
"""
dataset: str
batch_size: int
epochs: int
optimizer_config: OptimizerConfig
scheduler_config: Optional[SchedulerConfig]
rand_weights_init: bool = True
# TODO decorator @modelconfig as opposed to @configclass ModelConfig
[docs]@configclass
class ModelConfig(ConfigBase):
"""
Model configuration.
When initializing a model, the config is passed to the model constructor.
"""
pass
[docs]@configclass
class RunConfig(ConfigBase):
"""
Base configuration for running an experiment.
Attributes
----------
experiment_dir: Optional[str] = None
location to store experiment artifacts.
random_seed: Optional[int] = None
random seed.
train_config: TrainConfig
training configuration. (check ``TrainConfig`` for more details)
model_config: ModelConfig
model configuration. (check ``ModelConfig`` for more details)
keep_n_checkpoints: int = 3
number of latest checkpoints to keep.
tensorboard: bool = True
whether to use tensorboardLogger.
amp: bool = True
whether to use automatic mixed precision when running on gpu.
device: str = "cuda" or "cpu"
device to run on.
verbose: Literal["console", "tqdm", "silent"] = "console"
verbosity level.
eval_subsample: float = 1
fraction of the dataset to use for evaluation.
metrics_n_batches: int = 32
max number of batches stored in every tag(train, eval, test) for evaluation.
metrics_mb_limit: int = 100
max number of megabytes stored in every tag(train, eval, test) for evaluation.
early_stopping_iter: Optional[int] = None
The maximum allowed difference between the current iteration and the last iteration
with the best metric before applying early stopping.
Early stopping will be triggered if the difference ``(current_itr - best_itr)`` exceeds ``early_stopping_iter``.
If set to ``None``, early stopping will not be applied.
eval_epoch: float = 1
The epoch interval between two evaluations.
log_epoch: float = 1
The epoch interval between two logging.
init_chkpt: Optional[str] = None
path to a checkpoint to initialize the model with.
warm_up_epochs: float = 0
number of epochs marked as warm up epochs.
divergence_factor: float = 100
if ``cur_loss > best_loss > divergence_factor``, the model is considered to have diverged.
"""
# location to store experiment artifacts
experiment_dir: Stateless[Optional[str]] = None
random_seed: Optional[int] = None
train_config: TrainConfig
model_config: ModelConfig
keep_n_checkpoints: Stateless[int] = 3
tensorboard: Stateless[bool] = True
amp: Stateless[bool] = True
device: Stateless[str] = "cuda"
verbose: Stateless[Literal["console", "tqdm", "silent"]] = "console"
eval_subsample: Stateless[float] = 1
metrics_n_batches: Stateless[int] = 32
metrics_mb_limit: Stateless[int] = 100
early_stopping_iter: Stateless[Optional[int]] = None
eval_epoch: Stateless[float] = 1
log_epoch: Stateless[float] = 1
init_chkpt: Stateless[Optional[str]] = None
warm_up_epochs: Stateless[float] = 1
divergence_factor: Stateless[float] = 100
@property
def uid(self) -> str:
train_uid = self.train_config.uid
model_uid = self.model_config.uid
uid = f"{train_uid}_{model_uid}"
return uid
[docs]class SearchType(Enum):
"""
Type of search space.
"""
integer = "int"
numerical = "float"
[docs]@configclass
class SearchSpace(ConfigBase):
"""
Search space configuration.
"""
value_range: Optional[Tuple[str, str]]
categorical_values: Optional[List[str]]
value_type: SearchType = SearchType.numerical
[docs] def __init__(self, *args, **kwargs) -> None:
super().__init__(*args, **kwargs)
assert (
self.value_range is None or self.categorical_values is None
), "Can not specify value_range and categorical_values for SearchSpace."
[docs]class SearchAlgo(Enum):
"""
Type of search algorithm.
"""
random = "random"
tpe = "tpe"
[docs]class Optim(Enum):
"""
Type of optimization direction.
"""
min = "min"
max = "max"
[docs]@configclass
class ParallelConfig(RunConfig):
"""
Parallel training configuration. ``{"val_loss": "min"}``
Attributes
----------
total_trials: int
total number of trials.
concurrent_trials: int
number of trials to run concurrently.
search_space: Dict[SearchSpace]
search space for hyperparameter search,
eg. ``{"train_config.optimizer_config.arguments.lr": SearchSpace(value_range=[0, 10], value_type="int"),}``
optim_metrics: Dict[Optim]
metrics to optimize, eg. ``{"val_loss": "min"}``
search_algo: SearchAlgo = SearchAlgo.tpe
type of search algorithm.
ignore_invalid_params: bool = False
whether to ignore invalid parameters when sampling.
remote_config: Optional[RemoteConfig] = None
remote storage configuration.
gcp_config: Optional[GcpConfig] = None
gcp configuration.
"""
total_trials: int
concurrent_trials: Stateless[int]
search_space: Dict[SearchSpace]
optim_metrics: Stateless[Dict[Optim]]
gpu_mb_per_experiment: Stateless[int]
cpus_per_experiment: Stateless[float]
search_algo: Stateless[SearchAlgo] = SearchAlgo.tpe
ignore_invalid_params: Stateless[bool] = False
remote_config: Stateless[Optional[RemoteConfig]] = None
gcp_config: Stateless[Optional[GcpConfig]] = None