Skip to content

frequency_controller

zeus.optimizer.perseus.frequency_controller

Controller that sets the GPU's frequency in a non-blocking fashion.

FrequencyController

Spawns a separate process that sets the GPU frequency.

Source code in zeus/optimizer/perseus/frequency_controller.py
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
class FrequencyController:
    """Spawns a separate process that sets the GPU frequency."""

    def __init__(self, nvml_device_id: int = 0) -> None:
        """Instantiate the frequency controller.

        Args:
            nvml_device_id: The NVML device ID of the GPU to control.
        """
        self._q: mp.Queue[int | None] = mp.Queue()
        self._proc = mp.Process(target=self._controller_process, args=(nvml_device_id,))

        atexit.register(self.end)
        self._proc.start()

    def set_frequency(self, frequency: int) -> None:
        """Set the GPU's frequency asynchronously.

        If `frequency` is zero, returns without doing anything.
        """
        if frequency != 0:
            self._q.put(frequency, block=False)

    def end(self) -> None:
        """Stop the controller process."""
        self._q.put(None, block=False)

    def _controller_process(self, device_id: int) -> None:
        """Receive frequency values through a queue and apply it."""
        pynvml.nvmlInit()
        handle = pynvml.nvmlDeviceGetHandleByIndex(device_id)

        # Return the power limit to the default.
        pynvml.nvmlDeviceSetPowerManagementLimit(
            handle,
            pynvml.nvmlDeviceGetPowerManagementDefaultLimit(handle),
        )

        # Set the memory frequency to be the highest.
        max_mem_freq = max(pynvml.nvmlDeviceGetSupportedMemoryClocks(handle))
        with contextlib.suppress(pynvml.NVMLError_NotSupported):  # type: ignore
            pynvml.nvmlDeviceSetMemoryLockedClocks(handle, max_mem_freq, max_mem_freq)

        # Set the SM frequency to be the highest.
        max_freq = max(
            pynvml.nvmlDeviceGetSupportedGraphicsClocks(handle, max_mem_freq)
        )
        pynvml.nvmlDeviceSetGpuLockedClocks(handle, max_freq, max_freq)
        current_freq = max_freq

        # Wait on the queue for the next frequency to set.
        while True:
            target_freq = self._q.get(block=True)
            if target_freq is None:
                break
            if current_freq != target_freq:
                pynvml.nvmlDeviceSetGpuLockedClocks(handle, target_freq, target_freq)
                current_freq = target_freq

        # Reset everything.
        with contextlib.suppress(pynvml.NVMLError_NotSupported):  # type: ignore
            pynvml.nvmlDeviceResetMemoryLockedClocks(handle)
        pynvml.nvmlDeviceResetGpuLockedClocks(handle)
        pynvml.nvmlShutdown()

__init__

__init__(nvml_device_id=0)

Parameters:

Name Type Description Default
nvml_device_id int

The NVML device ID of the GPU to control.

0
Source code in zeus/optimizer/perseus/frequency_controller.py
29
30
31
32
33
34
35
36
37
38
39
def __init__(self, nvml_device_id: int = 0) -> None:
    """Instantiate the frequency controller.

    Args:
        nvml_device_id: The NVML device ID of the GPU to control.
    """
    self._q: mp.Queue[int | None] = mp.Queue()
    self._proc = mp.Process(target=self._controller_process, args=(nvml_device_id,))

    atexit.register(self.end)
    self._proc.start()

set_frequency

set_frequency(frequency)

Set the GPU's frequency asynchronously.

If frequency is zero, returns without doing anything.

Source code in zeus/optimizer/perseus/frequency_controller.py
41
42
43
44
45
46
47
def set_frequency(self, frequency: int) -> None:
    """Set the GPU's frequency asynchronously.

    If `frequency` is zero, returns without doing anything.
    """
    if frequency != 0:
        self._q.put(frequency, block=False)

end

end()

Stop the controller process.

Source code in zeus/optimizer/perseus/frequency_controller.py
49
50
51
def end(self) -> None:
    """Stop the controller process."""
    self._q.put(None, block=False)

_controller_process

_controller_process(device_id)

Receive frequency values through a queue and apply it.

Source code in zeus/optimizer/perseus/frequency_controller.py
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
def _controller_process(self, device_id: int) -> None:
    """Receive frequency values through a queue and apply it."""
    pynvml.nvmlInit()
    handle = pynvml.nvmlDeviceGetHandleByIndex(device_id)

    # Return the power limit to the default.
    pynvml.nvmlDeviceSetPowerManagementLimit(
        handle,
        pynvml.nvmlDeviceGetPowerManagementDefaultLimit(handle),
    )

    # Set the memory frequency to be the highest.
    max_mem_freq = max(pynvml.nvmlDeviceGetSupportedMemoryClocks(handle))
    with contextlib.suppress(pynvml.NVMLError_NotSupported):  # type: ignore
        pynvml.nvmlDeviceSetMemoryLockedClocks(handle, max_mem_freq, max_mem_freq)

    # Set the SM frequency to be the highest.
    max_freq = max(
        pynvml.nvmlDeviceGetSupportedGraphicsClocks(handle, max_mem_freq)
    )
    pynvml.nvmlDeviceSetGpuLockedClocks(handle, max_freq, max_freq)
    current_freq = max_freq

    # Wait on the queue for the next frequency to set.
    while True:
        target_freq = self._q.get(block=True)
        if target_freq is None:
            break
        if current_freq != target_freq:
            pynvml.nvmlDeviceSetGpuLockedClocks(handle, target_freq, target_freq)
            current_freq = target_freq

    # Reset everything.
    with contextlib.suppress(pynvml.NVMLError_NotSupported):  # type: ignore
        pynvml.nvmlDeviceResetMemoryLockedClocks(handle)
    pynvml.nvmlDeviceResetGpuLockedClocks(handle)
    pynvml.nvmlShutdown()