Skip to content

job

zeus._legacy.job

Defines the Job specification dataclass.

Job dataclass

Job specification tuple.

Attributes:

Name Type Description
dataset str

Name of the dataset.

network str

Name of the DNN model.

optimizer str

Name of the optimizer, e.g. Adam.

target_metric float

Target validation metric.

max_epochs int

Maximum number of epochs to train before terminating.

default_bs int | None

Initial batch size (b0) provided by the user.

default_lr float | None

Learning rate corresponding to the default batch size.

workdir str | None

Working directory in which to launch the job command.

command list[str] | None

Job command template. See gen_command.

Source code in zeus/_legacy/job.py
 12
 13
 14
 15
 16
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
@dataclass(frozen=True, unsafe_hash=True)
class Job:
    """Job specification tuple.

    Attributes:
        dataset: Name of the dataset.
        network: Name of the DNN model.
        optimizer: Name of the optimizer, e.g. Adam.
        target_metric: Target validation metric.
        max_epochs: Maximum number of epochs to train before terminating.
        default_bs: Initial batch size (b0) provided by the user.
        default_lr: Learning rate corresponding to the default batch size.
        workdir: Working directory in which to launch the job command.
        command: Job command template. See [`gen_command`][zeus._legacy.job.Job.gen_command].
    """

    dataset: str
    network: str
    optimizer: str
    target_metric: float
    max_epochs: int
    default_bs: int | None = None
    default_lr: float | None = None
    workdir: str | None = None
    command: list[str] | None = field(default=None, hash=False, compare=False)

    def __str__(self) -> str:
        """Generate a more conside representation of the object."""
        return (
            f"Job({self.dataset},{self.network},{self.optimizer},{self.target_metric}"
            f"{f',bs{self.default_bs}' if self.default_bs is not None else ''}~{self.max_epochs})"
        )

    def to_logdir(self) -> str:
        """Generate a logdir name that explains this job."""
        return (
            f"{self.dataset}+{self.network}+bs{self.default_bs}"
            f"+{self.optimizer}+lr{self.default_lr}"
            f"+tm{self.target_metric}+me{self.max_epochs}"
        )

    def filter_df(self, df: pd.DataFrame) -> pd.DataFrame:
        """Pick out the rows corresponding to this job from the DataFrame."""
        return df.loc[
            (df.dataset == self.dataset)
            & (df.network == self.network)
            & (df.optimizer == self.optimizer)
            & (df.target_metric == self.target_metric)
        ]

    def gen_command(
        self,
        batch_size: int,
        learning_rate: float,
        seed: int,
        rec_i: int,
    ) -> list[str]:
        """Format the job command with given arguments.

        Args:
            batch_size: Batch size to use for this job launch.
            learning_rate: Learning rate to use for this job launch.
            seed: Random seed to use for this job launch.
            rec_i: Recurrence number of this job launch.
        """
        assert self.command, "You must provide a command format string for this job."
        command = []
        for piece in self.command:
            if piece in ["{bs}", "{batch_size}"]:
                command.append(str(batch_size))
            elif piece in ["{lr}", "{learning_rate}"]:
                command.append(str(learning_rate))
            elif piece == "{seed}":
                command.append(str(seed))
            elif piece in ["{epoch}", "{epochs}"]:
                command.append(str(self.max_epochs))
            elif piece == "{slice_number}":
                command.append(str(rec_i))
            elif piece == "{target_metric}":
                command.append(str(self.target_metric))
            else:
                command.append(piece)
        return command

    def scale_lr(self, batch_size: int) -> float:
        """Scale the learning rate for the given batch size.

        Assumes that `self.default_bs` and `self.default_lr` were given.
        Then, `self.default_lr` is scaled for the given `batch_size` using
        square root scaling for adaptive optimizers (e.g. Adam, Adadelta,
        AdamW) and linear scaling for others (e.g. SGD).
        """
        assert self.default_bs, "You must provide default_bs to scale LR."
        assert self.default_lr, "You must provide default_lr to scale LR."

        optimizer = self.optimizer.lower()
        if optimizer in ["adam", "adadelta", "adamw"]:
            scaler = SquareRootScaler(bs=self.default_bs, lr=self.default_lr)
            return scaler.compute_lr(batch_size)
        if optimizer in ["sgd"]:
            scaler = LinearScaler(bs=self.default_bs, lr=self.default_lr)
            return scaler.compute_lr(batch_size)
        raise NotImplementedError(f"LR scaling for {self.optimizer} is not supported.")

__str__

__str__()

Generate a more conside representation of the object.

Source code in zeus/_legacy/job.py
38
39
40
41
42
43
def __str__(self) -> str:
    """Generate a more conside representation of the object."""
    return (
        f"Job({self.dataset},{self.network},{self.optimizer},{self.target_metric}"
        f"{f',bs{self.default_bs}' if self.default_bs is not None else ''}~{self.max_epochs})"
    )

to_logdir

to_logdir()

Generate a logdir name that explains this job.

Source code in zeus/_legacy/job.py
45
46
47
48
49
50
51
def to_logdir(self) -> str:
    """Generate a logdir name that explains this job."""
    return (
        f"{self.dataset}+{self.network}+bs{self.default_bs}"
        f"+{self.optimizer}+lr{self.default_lr}"
        f"+tm{self.target_metric}+me{self.max_epochs}"
    )

filter_df

filter_df(df)

Pick out the rows corresponding to this job from the DataFrame.

Source code in zeus/_legacy/job.py
53
54
55
56
57
58
59
60
def filter_df(self, df: pd.DataFrame) -> pd.DataFrame:
    """Pick out the rows corresponding to this job from the DataFrame."""
    return df.loc[
        (df.dataset == self.dataset)
        & (df.network == self.network)
        & (df.optimizer == self.optimizer)
        & (df.target_metric == self.target_metric)
    ]

gen_command

gen_command(batch_size, learning_rate, seed, rec_i)

Format the job command with given arguments.

Parameters:

Name Type Description Default
batch_size int

Batch size to use for this job launch.

required
learning_rate float

Learning rate to use for this job launch.

required
seed int

Random seed to use for this job launch.

required
rec_i int

Recurrence number of this job launch.

required
Source code in zeus/_legacy/job.py
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
def gen_command(
    self,
    batch_size: int,
    learning_rate: float,
    seed: int,
    rec_i: int,
) -> list[str]:
    """Format the job command with given arguments.

    Args:
        batch_size: Batch size to use for this job launch.
        learning_rate: Learning rate to use for this job launch.
        seed: Random seed to use for this job launch.
        rec_i: Recurrence number of this job launch.
    """
    assert self.command, "You must provide a command format string for this job."
    command = []
    for piece in self.command:
        if piece in ["{bs}", "{batch_size}"]:
            command.append(str(batch_size))
        elif piece in ["{lr}", "{learning_rate}"]:
            command.append(str(learning_rate))
        elif piece == "{seed}":
            command.append(str(seed))
        elif piece in ["{epoch}", "{epochs}"]:
            command.append(str(self.max_epochs))
        elif piece == "{slice_number}":
            command.append(str(rec_i))
        elif piece == "{target_metric}":
            command.append(str(self.target_metric))
        else:
            command.append(piece)
    return command

scale_lr

scale_lr(batch_size)

Scale the learning rate for the given batch size.

Assumes that self.default_bs and self.default_lr were given. Then, self.default_lr is scaled for the given batch_size using square root scaling for adaptive optimizers (e.g. Adam, Adadelta, AdamW) and linear scaling for others (e.g. SGD).

Source code in zeus/_legacy/job.py
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
def scale_lr(self, batch_size: int) -> float:
    """Scale the learning rate for the given batch size.

    Assumes that `self.default_bs` and `self.default_lr` were given.
    Then, `self.default_lr` is scaled for the given `batch_size` using
    square root scaling for adaptive optimizers (e.g. Adam, Adadelta,
    AdamW) and linear scaling for others (e.g. SGD).
    """
    assert self.default_bs, "You must provide default_bs to scale LR."
    assert self.default_lr, "You must provide default_lr to scale LR."

    optimizer = self.optimizer.lower()
    if optimizer in ["adam", "adadelta", "adamw"]:
        scaler = SquareRootScaler(bs=self.default_bs, lr=self.default_lr)
        return scaler.compute_lr(batch_size)
    if optimizer in ["sgd"]:
        scaler = LinearScaler(bs=self.default_bs, lr=self.default_lr)
        return scaler.compute_lr(batch_size)
    raise NotImplementedError(f"LR scaling for {self.optimizer} is not supported.")