interface

zeus.policy.interface

Abstract classes for implementing custom optimization policies.

BatchSizeOptimizer

Bases: ABC

Finds out the best batch size to use for the job.

Source code in zeus/policy/interface.py

class BatchSizeOptimizer(ABC):
    """Finds out the best batch size to use for the job."""

    @property
    @abstractmethod
    def name(self) -> str:
        """Name of the batch size optimizer."""

    @abstractmethod
    def register_job(self, job: Job, batch_sizes: list[int]) -> None:
        """Prepare internal state so that it can handle the given job.

        It is assumed that the state of each [`Job`][zeus.job.Job] will be
        managed separately. Note that [`Job`][zeus.job.Job] is hashable,
        and thus can be used as dictionary keys.

        Args:
            job: New jobs to register.
            batch_sizes: Batch sizes to consider.
        """

    @abstractmethod
    def predict(self, job: Job) -> int:
        """Return the best batch size to use for the job.

        Args:
            job: The job to pick the best batch size for.
        """

    @abstractmethod
    def observe(
        self, job: Job, batch_size: int, cost: float, converged: bool | None = None
    ) -> None:
        """Observe the cost of using the given batch size for the job.

        Args:
            job: The job from which this cost observation resulted.
            batch_size: The batch size used for this run of the job.
            cost: The energy-time cost of running the job.
            converged: Whether the job reached its target metric. If may not have
                reached its target if the job was early stopped based on cost or
                the maximum epoch was reached. For BSO's that do not take this into
                account, `None` can be passed.
        """

    def _log(self, message: str) -> None:
        """Log message with object name."""
        print(f"[{self.name}] {message}")

name `abstractmethod` `property`

name

Name of the batch size optimizer.

register_job `abstractmethod`

register_job(job, batch_sizes)

Prepare internal state so that it can handle the given job.

It is assumed that the state of each Job will be managed separately. Note that Job is hashable, and thus can be used as dictionary keys.

Parameters:

Name	Type	Description	Default
`job`	`Job`	New jobs to register.	required
`batch_sizes`	`list[int]`	Batch sizes to consider.	required

Source code in zeus/policy/interface.py

@abstractmethod
def register_job(self, job: Job, batch_sizes: list[int]) -> None:
    """Prepare internal state so that it can handle the given job.

    It is assumed that the state of each [`Job`][zeus.job.Job] will be
    managed separately. Note that [`Job`][zeus.job.Job] is hashable,
    and thus can be used as dictionary keys.

    Args:
        job: New jobs to register.
        batch_sizes: Batch sizes to consider.
    """

predict `abstractmethod`

predict(job)

Return the best batch size to use for the job.

Parameters:

Name	Type	Description	Default
`job`	`Job`	The job to pick the best batch size for.	required

Source code in zeus/policy/interface.py

@abstractmethod
def predict(self, job: Job) -> int:
    """Return the best batch size to use for the job.

    Args:
        job: The job to pick the best batch size for.
    """

observe `abstractmethod`

observe(job, batch_size, cost, converged=None)

Observe the cost of using the given batch size for the job.

Parameters:

Name	Type	Description	Default
`job`	`Job`	The job from which this cost observation resulted.	required
`batch_size`	`int`	The batch size used for this run of the job.	required
`cost`	`float`	The energy-time cost of running the job.	required
`converged`	`bool \| None`	Whether the job reached its target metric. If may not have reached its target if the job was early stopped based on cost or the maximum epoch was reached. For BSO's that do not take this into account, `None` can be passed.	`None`

Source code in zeus/policy/interface.py

@abstractmethod
def observe(
    self, job: Job, batch_size: int, cost: float, converged: bool | None = None
) -> None:
    """Observe the cost of using the given batch size for the job.

    Args:
        job: The job from which this cost observation resulted.
        batch_size: The batch size used for this run of the job.
        cost: The energy-time cost of running the job.
        converged: Whether the job reached its target metric. If may not have
            reached its target if the job was early stopped based on cost or
            the maximum epoch was reached. For BSO's that do not take this into
            account, `None` can be passed.
    """

_log

_log(message)

Log message with object name.

Source code in zeus/policy/interface.py

def _log(self, message: str) -> None:
    """Log message with object name."""
    print(f"[{self.name}] {message}")

PowerLimitOptimizer

Bases: ABC

Finds out the best power limit to use for the job and batch size.

Source code in zeus/policy/interface.py

class PowerLimitOptimizer(ABC):
    """Finds out the best power limit to use for the job and batch size."""

    @property
    @abstractmethod
    def name(self) -> str:
        """Name of the power limit optimizer."""

    @abstractmethod
    def predict(self, job: Job, batch_size: int) -> int | None:
        """Return the best power limit for the job and batch size.

        Args:
            job: The job to pick the best power limit for.
            batch_size: The batch size chosen by the
                [`BatchSizeOptimizer`][zeus.policy.BatchSizeOptimizer] for this job.

        Returns:
            The best power limit, or `None` if profiling results via
            [`observe`][zeus.policy.interface.PowerLimitOptimizer.observe] are needed.
        """

    @abstractmethod
    def observe(self, job: Job, batch_size: int, power_limit: int, cost: float) -> None:
        """Observe the cost of using the given batch size and power limit for the job.

        Args:
            job: The job from which this cost observation resulted.
            batch_size: The batch size used for this run of the job.
            power_limit: The power limit used for this run of the job.
            cost: The cost of running the job.
        """

    def _log(self, message: str) -> None:
        """Log message with object name."""
        print(f"[{self.name}] {message}")

name `abstractmethod` `property`

name

Name of the power limit optimizer.

predict `abstractmethod`

predict(job, batch_size)

Return the best power limit for the job and batch size.

Parameters:

Name	Type	Description	Default
`job`	`Job`	The job to pick the best power limit for.	required
`batch_size`	`int`	The batch size chosen by the `BatchSizeOptimizer` for this job.	required

Returns:

Type	Description
`int \| None`	The best power limit, or `None` if profiling results via
`int \| None`	`observe` are needed.

Source code in zeus/policy/interface.py

@abstractmethod
def predict(self, job: Job, batch_size: int) -> int | None:
    """Return the best power limit for the job and batch size.

    Args:
        job: The job to pick the best power limit for.
        batch_size: The batch size chosen by the
            [`BatchSizeOptimizer`][zeus.policy.BatchSizeOptimizer] for this job.

    Returns:
        The best power limit, or `None` if profiling results via
        [`observe`][zeus.policy.interface.PowerLimitOptimizer.observe] are needed.
    """

observe `abstractmethod`

observe(job, batch_size, power_limit, cost)

Observe the cost of using the given batch size and power limit for the job.

Parameters:

Name	Type	Description	Default
`job`	`Job`	The job from which this cost observation resulted.	required
`batch_size`	`int`	The batch size used for this run of the job.	required
`power_limit`	`int`	The power limit used for this run of the job.	required
`cost`	`float`	The cost of running the job.	required

Source code in zeus/policy/interface.py

@abstractmethod
def observe(self, job: Job, batch_size: int, power_limit: int, cost: float) -> None:
    """Observe the cost of using the given batch size and power limit for the job.

    Args:
        job: The job from which this cost observation resulted.
        batch_size: The batch size used for this run of the job.
        power_limit: The power limit used for this run of the job.
        cost: The cost of running the job.
    """

_log

_log(message)

Log message with object name.

Source code in zeus/policy/interface.py

def _log(self, message: str) -> None:
    """Log message with object name."""
    print(f"[{self.name}] {message}")

interface

zeus.policy.interface

BatchSizeOptimizer

name abstractmethod property

register_job abstractmethod

predict abstractmethod

observe abstractmethod

_log

PowerLimitOptimizer

name abstractmethod property

predict abstractmethod

observe abstractmethod

_log

name `abstractmethod` `property`

register_job `abstractmethod`

predict `abstractmethod`

observe `abstractmethod`

name `abstractmethod` `property`

predict `abstractmethod`

observe `abstractmethod`