diff --git a/cats/__init__.py b/cats/__init__.py index 3318da4..be09461 100644 --- a/cats/__init__.py +++ b/cats/__init__.py @@ -7,8 +7,7 @@ from datetime import timedelta from typing import Optional -from .carbonFootprint import Estimates, greenAlgorithmsCalculator -from .check_clean_arguments import validate_jobinfo +from .carbonFootprint import Estimates, get_footprint_reduction_estimate from .CI_api_interface import InvalidLocationError from .CI_api_query import get_CI_forecast # noqa: F401 from .configure import get_runtime_config @@ -170,7 +169,7 @@ class CATSOutput: emmissionEstimate: Optional[Estimates] = None def __str__(self) -> str: - out = f"Best job start time: {self.carbonIntensityOptimal.start}\n" + out = f"Best job start time: {self.carbonIntensityOptimal.start}" if self.emmissionEstimate: out += ( @@ -215,7 +214,8 @@ def main(arguments=None) -> Optional[int]: " specify the scheduler with the -s or --scheduler option" ) return 1 - config, CI_API_interface, location, duration = get_runtime_config(args) + + CI_API_interface, location, duration, jobinfo, PUE = get_runtime_config(args) ######################## ## Obtain CI forecast ## @@ -244,24 +244,15 @@ def main(arguments=None) -> Optional[int]: ## Calculate carbon footprint ## ################################ - if args.jobinfo: - jobinfo = validate_jobinfo( - args.jobinfo, expected_partition_names=config["partitions"].keys() + if args.footprint: + output.emmissionEstimate = get_footprint_reduction_estimate( + PUE=PUE, + jobinfo=jobinfo, + runtime=timedelta(minutes=args.duration), + average_best_ci=best_avg.value, + average_now_ci=now_avg.value, ) - if not (jobinfo and config): - logging.warning( - "Not enough information to estimate total carbon footprint, " - "both --jobinfo and config files are needed.\n" - ) - else: - output.emmissionEstimate = greenAlgorithmsCalculator( - config=config, - runtime=timedelta(minutes=args.duration), - averageBest_carbonIntensity=best_avg.value, # TODO replace with real carbon intensity - averageNow_carbonIntensity=now_avg.value, - **jobinfo, - ).get_footprint() if args.format == "json": if isinstance(args.dateformat, str) and "%" not in args.dateformat: dateformat = SCHEDULER_DATE_FORMAT.get(args.dateformat, "") diff --git a/cats/carbonFootprint.py b/cats/carbonFootprint.py index 2cb1ff0..968e22a 100644 --- a/cats/carbonFootprint.py +++ b/cats/carbonFootprint.py @@ -1,178 +1,24 @@ +import datetime from collections import namedtuple -import yaml - Estimates = namedtuple("Estimates", ["now", "best", "savings"]) -class greenAlgorithmsCalculator: - def __init__( - self, - config, - partition, - runtime, - memory, - cpus, - gpus, - averageBest_carbonIntensity, - averageNow_carbonIntensity, - ): - """ - - :param partition: [str] has to match one of the partitions in `config.yml` - :param runtime: [datetime.timedelta] - :param memory: [int] in GB - :param cpus: [int] - :param gpus: [int] - :param averageBest_carbonIntensity: [float] in gCO2e/kWh - :param averageNow_carbonIntensity: [float] in gCO2e/kWh - """ - # ### Load cluster specific info - # with open(config, "r") as stream: - # try: - # self.cluster_info = yaml.safe_load(stream) - # except yaml.YAMLError as exc: - # print(exc) - self.cluster_info = config - - ### Load fixed parameters - with open("fixed_parameters.yaml", "r") as stream: - try: - self.fParams = yaml.safe_load(stream) - except yaml.YAMLError as exc: - print(exc) - - self.partition = partition - self.runtime = runtime - self.memory = memory - self.cpus = cpus - self.gpus = gpus - self.averageBest_carbonIntensity = averageBest_carbonIntensity - self.averageNow_carbonIntensity = averageNow_carbonIntensity - - def formatText_footprint(self, footprint_g): - """ - Format the text to display the carbon footprint - :param footprint_g: [float] carbon footprint, in gCO2e - :return: [str] the text to display - """ - if footprint_g < 1e3: - text_footprint = f"{footprint_g:,.0f} gCO2e" - elif footprint_g < 1e6: - text_footprint = f"{footprint_g / 1e3:,.0f} kgCO2e" - else: - text_footprint = f"{footprint_g / 1e3:,.0f} TCO2e" - return text_footprint - - def formatText_treemonths(self, tm_float): - """ - Format the text to display the tree months - :param tm_float: [float] tree-months - :return: [str] the text to display - """ - tm = int(tm_float) - ty = int(tm / 12) - if tm < 1: - text_trees = f"{tm_float:.3f} tree-months" - elif tm == 1: - text_trees = f"{tm_float:.1f} tree-month" - elif tm < 6: - text_trees = f"{tm_float:.1f} tree-months" - elif tm <= 24: - text_trees = f"{tm} tree-months" - elif tm < 120: - text_trees = f"{ty} tree-years and {tm - ty * 12} tree-months" - else: - text_trees = f"{ty} tree-years" - return text_trees - - def formatText_driving(self, dist): - """ - Format the text to display the driving distance - :param dist: [float] driving distance, in km - :return: [str] text to display - """ - if dist < 10: - text_driving = f"driving {dist:,.2f} km" - else: - text_driving = f"driving {dist:,.0f} km" - return text_driving - - def formatText_flying(self, footprint_g, fParams): - """ - Format the text to display about flying - :param footprint_g: [float] carbon footprint, in gCO2e - :param fParams: [dict] Fixed parameters, from fixed_parameters.yaml - :return: [str] text to display - """ - if footprint_g < 0.5 * fParams["flight_NY_SF"]: - text_flying = f"{footprint_g / fParams['flight_PAR_LON']:,.2f} flights between Paris and London" - elif footprint_g < 0.5 * fParams["flight_NYC_MEL"]: - text_flying = f"{footprint_g / fParams['flight_NY_SF']:,.2f} flights between New York and San Francisco" - else: - text_flying = f"{footprint_g / fParams['flight_NYC_MEL']:,.2f} flights between New York and Melbourne" - return text_flying - - def calculate_energies(self): - ### Power draw CPU and GPU - partition_info = self.cluster_info["partitions"][self.partition] - if partition_info["type"] == "CPU": - TDP2use4CPU = partition_info["TDP"] - TDP2use4GPU = 0 - else: - TDP2use4CPU = partition_info["TDP_CPU"] - TDP2use4GPU = partition_info["TDP"] - - ### Energy usage - energies = { - "energy_CPUs": self.runtime.total_seconds() - / 3600 - * self.cpus - * TDP2use4CPU - / 1000, # in kWh - "energy_GPUs": self.runtime.total_seconds() - / 3600 - * self.gpus - * TDP2use4GPU - / 1000, # in kWh - "energy_memory": self.runtime.total_seconds() - / 3600 - * self.memory - * self.fParams["power_memory_perGB"] - / 1000, # in kWh - } - - energies["total_energy"] = self.cluster_info["PUE"] * ( - energies["energy_CPUs"] - + energies["energy_GPUs"] - + energies["energy_memory"] - ) - - return energies - - def calculate_CF(self, energies): - CF_best = { - "CF_CPUs": energies["energy_CPUs"] * self.averageBest_carbonIntensity, - "CF_GPUs": energies["energy_GPUs"] * self.averageBest_carbonIntensity, - "CF_memory": energies["energy_memory"] * self.averageBest_carbonIntensity, - "total_CF": energies["total_energy"] * self.averageBest_carbonIntensity, - } - - CF_now = { - "CF_CPUs": energies["energy_CPUs"] * self.averageNow_carbonIntensity, - "CF_GPUs": energies["energy_GPUs"] * self.averageNow_carbonIntensity, - "CF_memory": energies["energy_memory"] * self.averageNow_carbonIntensity, - "total_CF": energies["total_energy"] * self.averageNow_carbonIntensity, - } - - return CF_best, CF_now - - def get_footprint(self): - energies = self.calculate_energies() - CF_best, CF_now = self.calculate_CF(energies) - best = CF_best["total_CF"] - now = CF_now["total_CF"] - - return Estimates( - *[self.formatText_footprint(e) for e in [now, best, now - best]] - ) +def get_footprint_reduction_estimate( + PUE: float, + jobinfo: list[tuple[int, float]], + runtime: datetime.timedelta, + average_best_ci: float, # in gCO2/kWh + average_now_ci: float, +) -> Estimates: + # energy in kWh + energy = ( + PUE + * (runtime.total_seconds() / 3600) + * sum([(nunits * power) for nunits, power in jobinfo]) + / 1000 + ) + best = energy * average_best_ci + now = energy * average_now_ci + + return Estimates(now, best, now - best) diff --git a/cats/check_clean_arguments.py b/cats/check_clean_arguments.py deleted file mode 100644 index a301f06..0000000 --- a/cats/check_clean_arguments.py +++ /dev/null @@ -1,56 +0,0 @@ -import re -import sys -from typing import Iterable, Optional, TypedDict - - -class JobInfo(TypedDict): - partition: str - memory: int - cpus: int - gpus: int - - -def validate_jobinfo( - jobinfo: str, expected_partition_names: Iterable[str] -) -> Optional[JobInfo]: - """Parses a string of job info keys in the form - - partition=CPU_partition,memory=8,ncpus=8,ngpus=0 - - and checks all required info keys are present and of the right type. - - :return: A dictionary mapping info key to their specified values - """ - - expected_info_keys = ( - "partition", - "memory", - "cpus", - "gpus", - ) - info = dict([match.groups() for match in re.finditer(r"(\w+)=([\w.]+)", jobinfo)]) - - # Check if some information is missing - if missing_keys := set(expected_info_keys) - set(info.keys()): - sys.stderr.write(f"ERROR: Missing job info keys: {missing_keys}") - return None - - # Validate partition value - if info["partition"] not in expected_partition_names: - sys.stderr.write( - f"ERROR: job info key 'partition' should be one of {expected_partition_names}. Typo?\n" - ) - return None - - # check that `cpus`, `gpus` and `memory` are numeric and convert to int - for key in [k for k in info if k != "partition"]: - try: - info[key] = int(info[key]) - assert info[key] >= 0 - except (ValueError, AssertionError): - sys.stderr.write( - f"ERROR: job info key {key} should be a positive integer\n" - ) - return None - - return JobInfo(info) diff --git a/cats/configure.py b/cats/configure.py index f7b73f7..d1fb245 100644 --- a/cats/configure.py +++ b/cats/configure.py @@ -64,7 +64,7 @@ def get_runtime_config(args) -> tuple[dict, APIInterface, str, int]: jobinfo = None PUE = None - return configmapping, CI_API_interface, location, duration, jobinfo, PUE + return CI_API_interface, location, duration, jobinfo, PUE def config_from_file(configpath="") -> Mapping[str, Any]: diff --git a/config.yml b/config.yml deleted file mode 100644 index f236652..0000000 --- a/config.yml +++ /dev/null @@ -1,22 +0,0 @@ - -## ~~~ TO BE EDITED TO BE TAILORED TO THE CLUSTER ~~~ -## -## Settings for fictive CW23 -## -## Updated: 12/07/2023 - ---- -location: "EH8" -api: "carbonintensity.org.uk" -PUE: 1.20 # > 1 -partitions: - CPU_partition: - type: CPU # CPU or GPU - model: "Xeon Gold 6142" - TDP: 9.4 # in W, per core - GPU_partition: - type: GPU - model: "NVIDIA A100-SXM-80GB GPUs" # from https://docs.hpc.cam.ac.uk/hpc/user-guide/a100.html - TDP: 300 # from https://www.nvidia.com/content/dam/en-zz/Solutions/Data-Center/a100/pdf/PB-10577-001_v02.pdf - CPU_model: "AMD EPYC 7763" # from HPC team - TDP_CPU: 4.4 # from https://www.amd.com/fr/products/cpu/amd-epyc-7763 diff --git a/docs/source/quickstart.rst b/docs/source/quickstart.rst index 28dc5e8..d23c366 100644 --- a/docs/source/quickstart.rst +++ b/docs/source/quickstart.rst @@ -12,12 +12,14 @@ You can run CATS with: :caption: *A basic command to run CATS when a job duration and postcode are provided.* - $ python -m cats -d --loc + $ cats --duration 480 --location "EH8" -The ``postcode`` is optional, and can be pulled from the ``config.yml`` file -or, if that is not present, inferred using the server IP address. +The ``--postcode`` option is optional, and can be pulled from a +configuration file (see :ref:`configuration-file`), or inferred using +the server's IP address. -The ``job_duration`` is in minutes, specified as an integer. +The ``--duration`` option indicates the expected job duration in +minutes. The scheduler then calls a function that estimates the best time to start the job given predicted carbon intensity over the next 48 hours. The @@ -27,45 +29,98 @@ should be transparent to cluster users. It will display the time to start the job on standard out and optionally some information about the carbon intensity on standard error. +.. _configuration-file: + +Using a configuration file +-------------------------- + +Information about location can be provided by a configuration file +instead of a command line arguments to the ``cats`` command. + +.. code-block:: yaml + + location: "EH8" + +Use the ``--config`` option to specify a path to the configuration +file, relative to the current directory. + +In case of a missing location command line argument, ``cats`` looks +for a file named ``config.yaml`` in the current directory. + +.. code-block:: shell + + # Override duration value at the command line + cats --config /path/to/config.yaml --location "OX1" + +.. code-block:: shell + + # location information is assumed to be provided in + # ./config.yaml. If not, 'cats' errors out. + cats --duration 480 + Displaying carbon footprint estimates ------------------------------------- CATS is able to provide an estimate for the carbon footprint reduction resulting from delaying your job. To enable the footprint estimation, -you must provide information about the machine in the form of a YAML -configuration file. An example is given below: +you must provide the ``--footprint`` option, the memory consumption in GB +and a hardware profile: + +.. code-block:: shell + + cats --duration 480 --location "EH8" --footprint --memory 8 --profile + +The ``--profile`` option specifies information power consumption and +quantity of hardware the job using. This information is provided by +adding a section ``profiles`` to the :ref:`cats YAML configuration +file `. + +You can define an arbitraty number of profiles as subsection of the +top-level ``profiles`` section: .. code-block:: yaml :caption: *An example provision of machine information by YAML file to enable estimation of the carbon footprint reduction.* - cluster_name: "CW23" - postcode: "EH8 9BT" - PUE: 1.20 # > 1 - partitions: - CPU_partition: - type: CPU # CPU or GPU - model: "Xeon Gold 6142" - TDP: 9.4 # Thermal Design Power, in W per core - GPU_partition: - type: GPU - model: "NVIDIA A100-SXM-80GB GPUs" - TDP: 300 # from https://www.nvidia.com/content/dam/en-zz/Solutions/Data-Center/a100/pdf/PB-10577-001_v02.pdf - CPU_model: "AMD EPYC 7763" - TDP_CPU: 4.4 # from https://www.amd.com/fr/products/cpu/amd-epyc-7763 - - -Use the ``--config`` option to specify a path to the configuration file, -relative to the current directory. If no path is specified, CATS looks for a -file named ``config.yml`` in the current directory. - -Additionally, to obtain carbon footprints, job-specific information -must be provided to CATS through the ``--jobinfo`` option. The example -below demonstrates running CATS with footprint estimation for a job using -8GB of memory, 2 CPU cores and no GPU: - -.. code-block:: console - :caption: *An example run command showing provision of job information.* - - $ cats -d 120 --config .config/config.yml --jobinfo cpus=2,gpus=0,memory=8,partition=CPU_partition + profiles: + my_cpu_only_profile: + cpu: + model: "Xeon Gold 6142" + power: 9.4 # in W, per core + nunits: 2 + my_gpu_profile: + gpu: + model: "NVIDIA A100-SXM-80GB GPUs" + power: 300 + nunits: 2 + cpu: + model: "AMD EPYC 7763" + power: 4.4 + nunits: 1 + +The name of the profile section is arbitrary, but each profile section +*must* contain one ``cpu`` section, or one ``gpu`` section, or both. +Each hardware type (``cpu`` or ``gpu``) section *must* contain the +``power`` (in Watts, for one unit) and ``nunits`` sections. The ``model`` section is optional, +meant for documentation. + +When running ``cats``, you can specify which profile to use for carbon +footprint estimation with the ``--profile`` option: + +.. code-block:: shell + + cats --duration 480 --location "EH8" --footprint --memory 6.7 --profile my_gpu_profile + +The default number of units specified for a profile can be overidden +at the command line: + +.. code-block:: shell + + cats --duration 480 --location "EH8" --footprint --memory 16 \ + --profile my_gpu_profile --gpu 4 --cpu 1 + +.. warning:: + The ``--profile`` option is optional. Is not provided, ``cats`` uses the + first profile defined in the configuration file as the default + profile. diff --git a/tests/test_check_clean_arguments.py b/tests/test_check_clean_arguments.py deleted file mode 100644 index 4bbea58..0000000 --- a/tests/test_check_clean_arguments.py +++ /dev/null @@ -1,25 +0,0 @@ -import pytest - -from cats.check_clean_arguments import validate_jobinfo - -# This is usually read from a config.yml file under the 'partitions' key -PARTITIONS = ["CPU_partition", "GPU_partition"] - - -def test_validate_jobinfo_ok(): - assert validate_jobinfo( - "cpus=2,gpus=0,memory=8,partition=CPU_partition", PARTITIONS - ) == dict(cpus=2, gpus=0, memory=8, partition="CPU_partition") - - -@pytest.mark.parametrize( - "jobinfo", - [ - "cpus=2.5,gpus=1,memory=8,partition=CPU_partition", # floating CPUs - "cpus=2,gpus=-1,memory=8,partition=CPU_partition", # negative integer - "cpus=2", # missing keys - "cpus=2,gpus=2,memory=8,partition=one", # unknown partition - ], -) -def test_validate_jobinfo_notok(jobinfo): - assert validate_jobinfo(jobinfo, PARTITIONS) is None diff --git a/tests/test_footprint.py b/tests/test_footprint.py new file mode 100644 index 0000000..e590d7b --- /dev/null +++ b/tests/test_footprint.py @@ -0,0 +1,19 @@ +import datetime + +from numpy.testing import assert_allclose + +from cats.carbonFootprint import Estimates, get_footprint_reduction_estimate + +JOBINFO = [(1, 2.0), (2, 3.0), (8, 1.0)] + + +def test_get_footprint_reduction_estimate(): + expected = Estimates(now=3.2, best=2.4, savings=0.8) + est = get_footprint_reduction_estimate( + PUE=1.0, + jobinfo=JOBINFO, + runtime=datetime.timedelta(minutes=60), + average_best_ci=150, # gCO2/kWh + average_now_ci=200, # gCO2/kWh + ) + assert_allclose(expected, est)