Skip to content

rapl

zeus.device.cpu.rapl

RAPL CPUs.

  • RAPL (Running Average Power Limit): RAPL is a technology introduced by Intel that allows for power consumption monitoring and control at the processor and memory subsystem level. It provides mechanisms to enforce power limits and manage thermal conditions effectively.

  • Power Zone: A power zone in the context of RAPL refers to a logical grouping of components within the CPU or system that share a common power domain. Each power zone can be monitored and controlled independently. Typical power zones include the entire package, specific cores, and memory subsystems.

  • Package: The package refers to the physical CPU chip, which may contain multiple cores and integrated components. In RAPL, the package power domain encompasses the power consumption of all the cores and integrated units within the CPU package.

RaplWraparoundTracker

Monitor the wrapping around of RAPL counters.

This class acts as a lower level wrapper around a Python process that polls the wrapping of RAPL counters. This is primarily used by RAPLCPUs.

Warning

Since the monitor spawns a child process, it should not be instantiated as a global variable. Python puts a protection to prevent creating a process in global scope. Refer to the "Safe importing of main module" section in the Python documentation for more details.

Attributes:

Name Type Description
rapl_file_path str

File path of rapl file to track wraparounds for.

max_energy_uj float

Max value of rapl counter for rapl_file_path file. Used to determine the sleep period between polls

Source code in zeus/device/cpu/rapl.py
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
class RaplWraparoundTracker:
    """Monitor the wrapping around of RAPL counters.

    This class acts as a lower level wrapper around a Python process that polls
    the wrapping of RAPL counters. This is primarily used by
    [`RAPLCPUs`][zeus.device.cpu.rapl.RAPLCPUs].

    !!! Warning
        Since the monitor spawns a child process, **it should not be instantiated as a global variable**.
        Python puts a protection to prevent creating a process in global scope.
        Refer to the "Safe importing of main module" section in the
        [Python documentation](https://docs.python.org/3/library/multiprocessing.html#the-spawn-and-forkserver-start-methods)
        for more details.

    Attributes:
        rapl_file_path (str): File path of rapl file to track wraparounds for.
        max_energy_uj (float): Max value of rapl counter for `rapl_file_path` file. Used to
            determine the sleep period between polls
    """

    def __init__(
        self,
        rapl_file_path: str,
        max_energy_uj: float,
    ) -> None:
        """Initialize the rapl monitor.

        Args:
            rapl_file_path: File path where the RAPL file is located
            max_energy_uj: Max energy range uj value
        """
        if not os.path.exists(rapl_file_path):
            raise ValueError(f"{rapl_file_path} is not a valid file path")

        # Set up logging.
        self.logger = get_logger(type(self).__name__)

        self.logger.info("Monitoring wrap around of %s", rapl_file_path)

        context = mp.get_context("spawn")
        self.wraparound_counter = context.Value("i", 0)
        # Spawn the power polling process.
        atexit.register(self._stop)
        self.process = context.Process(
            target=_polling_process,
            args=(rapl_file_path, max_energy_uj, self.wraparound_counter),
        )
        self.process.start()

    def _stop(self) -> None:
        """Stop monitoring power usage."""
        if self.process is not None:
            self.process.terminate()
            self.process.join(timeout=1.0)
            self.process.kill()
            self.process = None

    def get_num_wraparounds(self) -> int:
        """Get the number of wraparounds detected by the polling process."""
        with self.wraparound_counter.get_lock():
            return self.wraparound_counter.value

__init__

__init__(rapl_file_path, max_energy_uj)

Parameters:

Name Type Description Default
rapl_file_path str

File path where the RAPL file is located

required
max_energy_uj float

Max energy range uj value

required
Source code in zeus/device/cpu/rapl.py
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
def __init__(
    self,
    rapl_file_path: str,
    max_energy_uj: float,
) -> None:
    """Initialize the rapl monitor.

    Args:
        rapl_file_path: File path where the RAPL file is located
        max_energy_uj: Max energy range uj value
    """
    if not os.path.exists(rapl_file_path):
        raise ValueError(f"{rapl_file_path} is not a valid file path")

    # Set up logging.
    self.logger = get_logger(type(self).__name__)

    self.logger.info("Monitoring wrap around of %s", rapl_file_path)

    context = mp.get_context("spawn")
    self.wraparound_counter = context.Value("i", 0)
    # Spawn the power polling process.
    atexit.register(self._stop)
    self.process = context.Process(
        target=_polling_process,
        args=(rapl_file_path, max_energy_uj, self.wraparound_counter),
    )
    self.process.start()

_stop

_stop()

Stop monitoring power usage.

Source code in zeus/device/cpu/rapl.py
 94
 95
 96
 97
 98
 99
100
def _stop(self) -> None:
    """Stop monitoring power usage."""
    if self.process is not None:
        self.process.terminate()
        self.process.join(timeout=1.0)
        self.process.kill()
        self.process = None

get_num_wraparounds

get_num_wraparounds()

Get the number of wraparounds detected by the polling process.

Source code in zeus/device/cpu/rapl.py
102
103
104
105
def get_num_wraparounds(self) -> int:
    """Get the number of wraparounds detected by the polling process."""
    with self.wraparound_counter.get_lock():
        return self.wraparound_counter.value

ZeusRAPLNotSupportedError

Bases: ZeusBaseCPUError

Zeus CPU exception class wrapper for RAPL not supported on CPU.

Source code in zeus/device/cpu/rapl.py
140
141
142
143
144
145
class ZeusRAPLNotSupportedError(ZeusBaseCPUError):
    """Zeus CPU exception class wrapper for RAPL not supported on CPU."""

    def __init__(self, message: str) -> None:
        """Initialize Zeus Exception."""
        super().__init__(message)

__init__

__init__(message)
Source code in zeus/device/cpu/rapl.py
143
144
145
def __init__(self, message: str) -> None:
    """Initialize Zeus Exception."""
    super().__init__(message)

ZeusRAPLFileInitError

Bases: ZeusBaseCPUError

Zeus CPU exception class wrapper for RAPL file initialization error on CPU.

Source code in zeus/device/cpu/rapl.py
148
149
150
151
152
153
class ZeusRAPLFileInitError(ZeusBaseCPUError):
    """Zeus CPU exception class wrapper for RAPL file initialization error on CPU."""

    def __init__(self, message: str) -> None:
        """Initialize Zeus Exception."""
        super().__init__(message)

__init__

__init__(message)
Source code in zeus/device/cpu/rapl.py
151
152
153
def __init__(self, message: str) -> None:
    """Initialize Zeus Exception."""
    super().__init__(message)

ZeusRAPLPermissionError

Bases: ZeusBaseCPUError

Zeus GPU exception that wraps No Permission to perform GPU operation.

Source code in zeus/device/cpu/rapl.py
156
157
158
159
160
161
class ZeusRAPLPermissionError(ZeusBaseCPUError):
    """Zeus GPU exception that wraps No Permission to perform GPU operation."""

    def __init__(self, message: str) -> None:
        """Intialize the exception object."""
        super().__init__(message)

__init__

__init__(message)
Source code in zeus/device/cpu/rapl.py
159
160
161
def __init__(self, message: str) -> None:
    """Intialize the exception object."""
    super().__init__(message)

RAPLFile

RAPL File class for each RAPL file.

This class defines the interface for interacting with a RAPL file for a package. A package can be a CPU or DRAM

Source code in zeus/device/cpu/rapl.py
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
class RAPLFile:
    """RAPL File class for each RAPL file.

    This class defines the interface for interacting with a RAPL file for a package. A package can
    be a CPU or DRAM
    """

    def __init__(self, path: str) -> None:
        """Initialize RAPL file object. Each RAPL file object manages one energy_uj file."""
        self.path = path
        self.energy_uj_path = os.path.join(path, "energy_uj")
        try:
            with open(os.path.join(path, "name"), "r") as name_file:
                self.name: str = name_file.read().strip()
        except FileNotFoundError as err:
            raise ZeusRAPLFileInitError("Error reading package name") from err
        try:
            with open(self.energy_uj_path) as energy_file:
                self.last_energy = float(energy_file.read().strip())
        except FileNotFoundError as err:
            raise ZeusRAPLFileInitError("Error reading package energy") from err
        except PermissionError as err:
            raise cpu_common.ZeusCPUNoPermissionError(
                "Can't read file due to permission error"
            ) from err
        try:
            with open(
                os.path.join(path, "max_energy_range_uj"), "r"
            ) as max_energy_file:
                self.max_energy_range_uj = float(max_energy_file.read().strip())
        except FileNotFoundError as err:
            raise ZeusRAPLFileInitError(
                "Error reading package max energy range"
            ) from err

        self.wraparound_tracker = RaplWraparoundTracker(
            self.energy_uj_path, self.max_energy_range_uj
        )

    def __str__(self) -> str:
        """Return a string representation of the RAPL file object."""
        return f"RAPLFile(Path: {self.path}\nEnergy_uj_path: {self.energy_uj_path}\nName: {self.name}\
        \nLast_energy: {self.last_energy}\nMax_energy: {self.max_energy_range_uj})"

    def read(self) -> float:
        """Read the current energy value from the energy_uj file.

        Returns:
            The current energy value in millijoules.
        """
        with open(self.energy_uj_path) as energy_file:
            new_energy_uj = float(energy_file.read().strip())
        num_wraparounds = self.wraparound_tracker.get_num_wraparounds()
        return (new_energy_uj + num_wraparounds * self.max_energy_range_uj) / 1000.0

__init__

__init__(path)
Source code in zeus/device/cpu/rapl.py
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
def __init__(self, path: str) -> None:
    """Initialize RAPL file object. Each RAPL file object manages one energy_uj file."""
    self.path = path
    self.energy_uj_path = os.path.join(path, "energy_uj")
    try:
        with open(os.path.join(path, "name"), "r") as name_file:
            self.name: str = name_file.read().strip()
    except FileNotFoundError as err:
        raise ZeusRAPLFileInitError("Error reading package name") from err
    try:
        with open(self.energy_uj_path) as energy_file:
            self.last_energy = float(energy_file.read().strip())
    except FileNotFoundError as err:
        raise ZeusRAPLFileInitError("Error reading package energy") from err
    except PermissionError as err:
        raise cpu_common.ZeusCPUNoPermissionError(
            "Can't read file due to permission error"
        ) from err
    try:
        with open(
            os.path.join(path, "max_energy_range_uj"), "r"
        ) as max_energy_file:
            self.max_energy_range_uj = float(max_energy_file.read().strip())
    except FileNotFoundError as err:
        raise ZeusRAPLFileInitError(
            "Error reading package max energy range"
        ) from err

    self.wraparound_tracker = RaplWraparoundTracker(
        self.energy_uj_path, self.max_energy_range_uj
    )

__str__

__str__()

Return a string representation of the RAPL file object.

Source code in zeus/device/cpu/rapl.py
203
204
205
206
def __str__(self) -> str:
    """Return a string representation of the RAPL file object."""
    return f"RAPLFile(Path: {self.path}\nEnergy_uj_path: {self.energy_uj_path}\nName: {self.name}\
    \nLast_energy: {self.last_energy}\nMax_energy: {self.max_energy_range_uj})"

read

read()

Read the current energy value from the energy_uj file.

Returns:

Type Description
float

The current energy value in millijoules.

Source code in zeus/device/cpu/rapl.py
208
209
210
211
212
213
214
215
216
217
def read(self) -> float:
    """Read the current energy value from the energy_uj file.

    Returns:
        The current energy value in millijoules.
    """
    with open(self.energy_uj_path) as energy_file:
        new_energy_uj = float(energy_file.read().strip())
    num_wraparounds = self.wraparound_tracker.get_num_wraparounds()
    return (new_energy_uj + num_wraparounds * self.max_energy_range_uj) / 1000.0

RAPLCPU

Bases: CPU

Control a single CPU that supports RAPL.

Source code in zeus/device/cpu/rapl.py
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
class RAPLCPU(cpu_common.CPU):
    """Control a single CPU that supports RAPL."""

    def __init__(self, cpu_index: int, rapl_dir: str) -> None:
        """Initialize the Intel CPU with a specified index."""
        super().__init__(cpu_index)
        self.rapl_dir = rapl_dir
        self._get_powerzone()

    _exception_map = {
        FileNotFoundError: cpu_common.ZeusCPUNotFoundError,
        PermissionError: cpu_common.ZeusCPUNoPermissionError,
        OSError: cpu_common.ZeusCPUInitError,
    }

    def _get_powerzone(self) -> None:
        self.path = os.path.join(self.rapl_dir, f"intel-rapl:{self.cpu_index}")
        self.rapl_file: RAPLFile = RAPLFile(self.path)
        self.dram: RAPLFile | None = None
        for dir in os.listdir(self.path):
            if "intel-rapl" in dir:
                try:
                    rapl_file = RAPLFile(os.path.join(self.path, dir))
                except ZeusRAPLFileInitError as err:
                    warnings.warn(
                        f"Failed to initialize subpackage {err}", stacklevel=1
                    )
                    continue
                if rapl_file.name == "dram":
                    self.dram = rapl_file

    def getTotalEnergyConsumption(self) -> CpuDramMeasurement:
        """Returns the total energy consumption of the specified powerzone. Units: mJ."""
        cpu_mj = self.rapl_file.read()
        dram_mj = None
        if self.dram is not None:
            dram_mj = self.dram.read()
        return CpuDramMeasurement(cpu_mj=cpu_mj, dram_mj=dram_mj)

    def supportsGetDramEnergyConsumption(self) -> bool:
        """Returns True if the specified CPU powerzone supports retrieving the subpackage energy consumption."""
        return self.dram is not None

__init__

__init__(cpu_index, rapl_dir)
Source code in zeus/device/cpu/rapl.py
223
224
225
226
227
def __init__(self, cpu_index: int, rapl_dir: str) -> None:
    """Initialize the Intel CPU with a specified index."""
    super().__init__(cpu_index)
    self.rapl_dir = rapl_dir
    self._get_powerzone()

getTotalEnergyConsumption

getTotalEnergyConsumption()

Returns the total energy consumption of the specified powerzone. Units: mJ.

Source code in zeus/device/cpu/rapl.py
251
252
253
254
255
256
257
def getTotalEnergyConsumption(self) -> CpuDramMeasurement:
    """Returns the total energy consumption of the specified powerzone. Units: mJ."""
    cpu_mj = self.rapl_file.read()
    dram_mj = None
    if self.dram is not None:
        dram_mj = self.dram.read()
    return CpuDramMeasurement(cpu_mj=cpu_mj, dram_mj=dram_mj)

supportsGetDramEnergyConsumption

supportsGetDramEnergyConsumption()

Returns True if the specified CPU powerzone supports retrieving the subpackage energy consumption.

Source code in zeus/device/cpu/rapl.py
259
260
261
def supportsGetDramEnergyConsumption(self) -> bool:
    """Returns True if the specified CPU powerzone supports retrieving the subpackage energy consumption."""
    return self.dram is not None

RAPLCPUs

Bases: CPUs

RAPL CPU Manager object, containing individual RAPLCPU objects, abstracting RAPL calls and handling related exceptions.

Source code in zeus/device/cpu/rapl.py
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
class RAPLCPUs(cpu_common.CPUs):
    """RAPL CPU Manager object, containing individual RAPLCPU objects, abstracting RAPL calls and handling related exceptions."""

    def __init__(self) -> None:
        """Instantiates IntelCPUs object, setting up tracking for specified Intel CPUs."""
        if not rapl_is_available():
            raise ZeusRAPLNotSupportedError("RAPL is not supported on this CPU.")

        self.rapl_dir = RAPL_DIR if os.path.exists(RAPL_DIR) else CONTAINER_RAPL_DIR
        self._init_cpus()

    @property
    def cpus(self) -> Sequence[RAPLCPU]:
        """Returns a list of CPU objects being tracked."""
        return self._cpus

    def _init_cpus(self) -> None:
        """Initialize all Intel CPUs."""
        self._cpus = []

        def sort_key(dir):
            return int(dir.split(":")[1])

        for dir in sorted(glob(f"{self.rapl_dir}/intel-rapl:*"), key=sort_key):
            parts = dir.split(":")
            if len(parts) > 1 and parts[1].isdigit():
                self._cpus.append(RAPLCPU(int(parts[1]), self.rapl_dir))

    def __del__(self) -> None:
        """Shuts down the Intel CPU monitoring."""
        pass

cpus property

cpus

Returns a list of CPU objects being tracked.

__init__

__init__()
Source code in zeus/device/cpu/rapl.py
267
268
269
270
271
272
273
def __init__(self) -> None:
    """Instantiates IntelCPUs object, setting up tracking for specified Intel CPUs."""
    if not rapl_is_available():
        raise ZeusRAPLNotSupportedError("RAPL is not supported on this CPU.")

    self.rapl_dir = RAPL_DIR if os.path.exists(RAPL_DIR) else CONTAINER_RAPL_DIR
    self._init_cpus()

_init_cpus

_init_cpus()

Initialize all Intel CPUs.

Source code in zeus/device/cpu/rapl.py
280
281
282
283
284
285
286
287
288
289
290
def _init_cpus(self) -> None:
    """Initialize all Intel CPUs."""
    self._cpus = []

    def sort_key(dir):
        return int(dir.split(":")[1])

    for dir in sorted(glob(f"{self.rapl_dir}/intel-rapl:*"), key=sort_key):
        parts = dir.split(":")
        if len(parts) > 1 and parts[1].isdigit():
            self._cpus.append(RAPLCPU(int(parts[1]), self.rapl_dir))

__del__

__del__()

Shuts down the Intel CPU monitoring.

Source code in zeus/device/cpu/rapl.py
292
293
294
def __del__(self) -> None:
    """Shuts down the Intel CPU monitoring."""
    pass

_polling_process

_polling_process(
    rapl_file_path, max_energy_uj, wraparound_counter
)

Check for wraparounds in the specified rapl file.

Source code in zeus/device/cpu/rapl.py
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
def _polling_process(
    rapl_file_path: str, max_energy_uj: float, wraparound_counter: Synchronized[int]
) -> None:
    """Check for wraparounds in the specified rapl file."""
    try:
        with open(rapl_file_path) as rapl_file:
            last_energy_uj = float(rapl_file.read().strip())
        while True:
            sleep_time = 1.0
            with open(rapl_file_path, "r") as rapl_file:
                energy_uj = float(rapl_file.read().strip())
            if max_energy_uj - energy_uj < RAPL_COUNTER_MAX_INCREASE:
                sleep_time = 0.1
            if energy_uj < last_energy_uj:
                with wraparound_counter.get_lock():
                    wraparound_counter.value += 1
            last_energy_uj = energy_uj
            time.sleep(sleep_time)
    except KeyboardInterrupt:
        return

rapl_is_available cached

rapl_is_available()

Check if RAPL is available.

Source code in zeus/device/cpu/rapl.py
130
131
132
133
134
135
136
137
@lru_cache(maxsize=1)
def rapl_is_available() -> bool:
    """Check if RAPL is available."""
    if not os.path.exists(RAPL_DIR) and not os.path.exists(CONTAINER_RAPL_DIR):
        logger.info("RAPL is not supported on this CPU.")
        return False
    logger.info("RAPL is available.")
    return True