Source code for ablator.main.configs

# TODO fix mypy that does not recognize correctly the types i.e. Stateless
from ablator.config.main import ConfigBase, configclass
from ablator.config.types import (
    Optional,
    Stateless,
    Literal,
    Tuple,
    List,
    Enum,
    Dict,
)
from ablator.modules.optimizer import OptimizerConfig
from ablator.modules.scheduler import SchedulerConfig
from ablator.modules.storage.cloud import GcpConfig
from ablator.modules.storage.remote import RemoteConfig


[docs]@configclass
class TrainConfig(ConfigBase):
    """
    Training configuration.

    Attributes
    ----------
    dataset: str
        dataset name. maybe used in custom dataset loader functions.
    batch_size: int
        batch size.
    epochs: int
        number of epochs to train.
    optimizer_config: OptimizerConfig
        optimizer configuration. (check ``OptimizerConfig`` for more details)
    scheduler_config: Optional[SchedulerConfig]
        scheduler configuration. (check ``SchedulerConfig`` for more details)
    rand_weights_init: bool = True
        whether to initialize model weights randomly.
    """

    dataset: str
    batch_size: int
    epochs: int
    optimizer_config: OptimizerConfig
    scheduler_config: Optional[SchedulerConfig]
    rand_weights_init: bool = True


# TODO decorator @modelconfig as opposed to @configclass ModelConfig
[docs]@configclass
class ModelConfig(ConfigBase):
    """
    Model configuration.
    When initializing a model, the config is passed to the model constructor.
    """

    pass


[docs]@configclass
class RunConfig(ConfigBase):
    """
    Base configuration for running an experiment.

    Attributes
    ----------
    experiment_dir: Optional[str] = None
        location to store experiment artifacts.
    random_seed: Optional[int] = None
        random seed.
    train_config: TrainConfig
        training configuration. (check ``TrainConfig`` for more details)
    model_config: ModelConfig
        model configuration. (check ``ModelConfig`` for more details)
    keep_n_checkpoints: int = 3
        number of latest checkpoints to keep.
    tensorboard: bool = True
        whether to use tensorboardLogger.
    amp: bool = True
        whether to use automatic mixed precision when running on gpu.
    device: str = "cuda" or "cpu"
        device to run on.
    verbose: Literal["console", "tqdm", "silent"] = "console"
        verbosity level.
    eval_subsample: float = 1
        fraction of the dataset to use for evaluation.
    metrics_n_batches: int = 32
        max number of batches stored in every tag(train, eval, test) for evaluation.
    metrics_mb_limit: int = 100
        max number of megabytes stored in every tag(train, eval, test) for evaluation.
    early_stopping_iter: Optional[int] = None
        The maximum allowed difference between the current iteration and the last iteration
        with the best metric before applying early stopping.
        Early stopping will be triggered if the difference ``(current_itr - best_itr)`` exceeds ``early_stopping_iter``.
        If set to ``None``, early stopping will not be applied.
    eval_epoch: float = 1
        The epoch interval between two evaluations.
    log_epoch: float = 1
        The epoch interval between two logging.
    init_chkpt: Optional[str] = None
        path to a checkpoint to initialize the model with.
    warm_up_epochs: float = 0
        number of epochs marked as warm up epochs.
    divergence_factor: float = 100
        if ``cur_loss > best_loss > divergence_factor``, the model is considered to have diverged.

    """

    # location to store experiment artifacts
    experiment_dir: Stateless[Optional[str]] = None
    random_seed: Optional[int] = None
    train_config: TrainConfig
    model_config: ModelConfig
    keep_n_checkpoints: Stateless[int] = 3
    tensorboard: Stateless[bool] = True
    amp: Stateless[bool] = True
    device: Stateless[str] = "cuda"
    verbose: Stateless[Literal["console", "tqdm", "silent"]] = "console"
    eval_subsample: Stateless[float] = 1
    metrics_n_batches: Stateless[int] = 32
    metrics_mb_limit: Stateless[int] = 100
    early_stopping_iter: Stateless[Optional[int]] = None
    eval_epoch: Stateless[float] = 1
    log_epoch: Stateless[float] = 1
    init_chkpt: Stateless[Optional[str]] = None
    warm_up_epochs: Stateless[float] = 1
    divergence_factor: Stateless[float] = 100

    @property
    def uid(self) -> str:
        train_uid = self.train_config.uid
        model_uid = self.model_config.uid
        uid = f"{train_uid}_{model_uid}"
        return uid


[docs]class SearchType(Enum):
    """
    Type of search space.
    """

    integer = "int"
    numerical = "float"


[docs]@configclass
class SearchSpace(ConfigBase):
    """
    Search space configuration.
    """

    value_range: Optional[Tuple[str, str]]
    categorical_values: Optional[List[str]]
    value_type: SearchType = SearchType.numerical

[docs]    def __init__(self, *args, **kwargs) -> None:
        super().__init__(*args, **kwargs)
        assert (
            self.value_range is None or self.categorical_values is None
        ), "Can not specify value_range and categorical_values for SearchSpace."


[docs]class SearchAlgo(Enum):
    """
    Type of search algorithm.
    """

    random = "random"
    tpe = "tpe"


[docs]class Optim(Enum):
    """
    Type of optimization direction.
    """

    min = "min"
    max = "max"


[docs]@configclass
class ParallelConfig(RunConfig):
    """
    Parallel training configuration. ``{"val_loss": "min"}``

    Attributes
    ----------
    total_trials: int
        total number of trials.
    concurrent_trials: int
        number of trials to run concurrently.
    search_space: Dict[SearchSpace]
        search space for hyperparameter search,
        eg. ``{"train_config.optimizer_config.arguments.lr": SearchSpace(value_range=[0, 10], value_type="int"),}``
    optim_metrics: Dict[Optim]
        metrics to optimize, eg. ``{"val_loss": "min"}``
    search_algo: SearchAlgo = SearchAlgo.tpe
        type of search algorithm.
    ignore_invalid_params: bool = False
        whether to ignore invalid parameters when sampling.
    remote_config: Optional[RemoteConfig] = None
        remote storage configuration.
    gcp_config: Optional[GcpConfig] = None
        gcp configuration.

    """

    total_trials: int
    concurrent_trials: Stateless[int]
    search_space: Dict[SearchSpace]
    optim_metrics: Stateless[Dict[Optim]]
    gpu_mb_per_experiment: Stateless[int]
    cpus_per_experiment: Stateless[float]
    search_algo: Stateless[SearchAlgo] = SearchAlgo.tpe
    ignore_invalid_params: Stateless[bool] = False
    remote_config: Stateless[Optional[RemoteConfig]] = None
    gcp_config: Stateless[Optional[GcpConfig]] = None