GreenAlgorithms · Aug 19, 2024
diff --git a/‎.gitignore
+3-1 b/‎.gitignore
+3-1
diff --git a/‎GreenAlgorithms_global.py
-443 b/‎GreenAlgorithms_global.py
-443
diff --git a/‎__init__.py
+153 b/‎__init__.py
+153
diff --git a/‎backend/__init__.py
+265 b/‎backend/__init__.py
+265
diff --git a/‎backend/helpers.py
+54 b/‎backend/helpers.py
+54
diff --git a/‎GreenAlgorithms_workloadManager.py ‎backend/slurm_extract.py
+127-118 b/‎GreenAlgorithms_workloadManager.py ‎backend/slurm_extract.py
+127-118
diff --git a/‎cluster_info.yaml
+12 b/‎cluster_info.yaml
+12
diff --git a/‎data/cluster_info.yaml
+36 b/‎data/cluster_info.yaml
+36
diff --git a/‎data/fixed_parameters.yaml
+15 b/‎data/fixed_parameters.yaml
+15
diff --git a/‎fixed_parameters.yaml
+1-1 b/‎fixed_parameters.yaml
+1-1
diff --git a/‎frontend/__init__.py
+69 b/‎frontend/__init__.py
+69
diff --git a/‎frontend/dashboard_output.py
+244 b/‎frontend/dashboard_output.py
+244
diff --git a/‎frontend/helpers.py
+72 b/‎frontend/helpers.py
+72
diff --git a/‎frontend/templates/_user.html
+91 b/‎frontend/templates/_user.html
+91
diff --git a/‎frontend/templates/report_blank.html
+148 b/‎frontend/templates/report_blank.html
+148
diff --git a/‎frontend/templates/styles.css
+339 b/‎frontend/templates/styles.css
+339
diff --git a/‎frontend/terminal_output.py
+125 b/‎frontend/terminal_output.py
+125
diff --git a/‎myCarbonFootprint.sh
+3-1 b/‎myCarbonFootprint.sh
+3-1
diff --git a/‎requirements.txt
+5-6 b/‎requirements.txt
+5-6
@@ -2,8 +2,10 @@
 .idea/
 clustersData/
 testData/
-error_logs/
+error_logs_archived/
 support_files/
+frontend/templates/plotly*
+outputs/*
 
 # Byte-compiled / optimized / DLL files
 __pycache__/
 
@@ -0,0 +1,153 @@
+
+import argparse
+import datetime
+import os
+
+from backend import main_backend
+from frontend import main_frontend
+
+def create_arguments():
+    """
+    Command line arguments for the tool.
+    :return: argparse object
+    """
+    parser = argparse.ArgumentParser(description=f'Calculate your carbon footprint on the server.')
+
+    default_endDay = datetime.date.today().strftime("%Y-%m-%d")  # today
+    default_startDay = f"{datetime.date.today().year}-01-01"  # start of the year
+
+    ## Timeframe
+    parser.add_argument('-S', '--startDay', type=str,
+                        help=f'The first day to take into account, as YYYY-MM-DD (default: {default_startDay})',
+                        default=default_startDay)
+    parser.add_argument('-E', '--endDay', type=str,
+                        help='The last day to take into account, as YYYY-MM-DD (default: today)',
+                        default=default_endDay)
+
+    ## How to display the report
+    parser.add_argument('-o', '--output', type=str,
+                        help="How to display the results, one of 'terminal' or 'html' (default: terminal)",
+                        default='terminal')
+    parser.add_argument('--outputDir', type=str,
+                        help="Export path for the output (default: under `output/`). Only used with `--output html`.",
+                        default='outputs')
+
+    ## Filter out jobs
+    parser.add_argument('--filterCWD', action='store_true',
+                        help='Only report on jobs launched from the current location.')
+    parser.add_argument('--userCWD', type=str, help=argparse.SUPPRESS)
+    parser.add_argument('--filterJobIDs', type=str,
+                        help='Comma separated list of Job IDs you want to filter on. (default: "all")',
+                        default='all')
+    parser.add_argument('--filterAccount', type=str,
+                        help='Only consider jobs charged under this account')
+    parser.add_argument('--customSuccessStates', type=str, default='',
+                        help="Comma-separated list of job states. By default, only jobs that exit with status CD or \
+                                 COMPLETED are considered successful (PENDING, RUNNING and REQUEUD are ignored). \
+                                 Jobs with states listed here will be considered successful as well (best to list both \
+                                 2-letter and full-length codes. Full list of job states: \
+                                 https://slurm.schedmd.com/squeue.html#SECTION_JOB-STATE-CODES")
+
+    ## Reporting bugs
+    group1 = parser.add_mutually_exclusive_group()
+    group1.add_argument('--reportBug', action='store_true',
+                        help='In case of a bug, this flag exports the jobs logs so that you/we can investigate further. '
+                             'The debug file will be stored in the shared folder where this tool is located (under /outputs), '
+                             'to export it to your home folder, user `--reportBugHere`. '
+                             'Note that this will write out some basic information about your jobs, such as runtime, '
+                             'number of cores and memory usage.'
+                        )
+    group1.add_argument('--reportBugHere', action='store_true',
+                        help='Similar to --reportBug, but exports the output to your home folder.')
+    group2 = parser.add_mutually_exclusive_group()
+    group2.add_argument('--useCustomLogs', type=str, default='',
+                        help='This bypasses the workload manager, and enables you to input a custom log file of your jobs. \
+                                 This is mostly meant for debugging, but can be useful in some situations. '
+                             'An example of the expected file can be found at `example_files/example_sacctOutput_raw.txt`.')
+    # Arguments for debugging only (not visible to users)
+    # To ue arbitrary folder for the infrastructure information
+    parser.add_argument('--useOtherInfrastuctureInfo', type=str, default='', help=argparse.SUPPRESS)
+    # Uses mock aggregated usage data, for offline debugging
+    group2.add_argument('--use_mock_agg_data', action='store_true', help=argparse.SUPPRESS)
+
+    args = parser.parse_args()
+    return args
+
+class validate_args():
+    """
+    Class used to validate all the arguments provided.
+    """
+    # TODO add validation
+    # TODO test these
+
+    def _validate_dates(self, args):
+        """
+        Validates that `startDay` and `endDay` are in the right format and in the right order.
+        """
+        for x in [args.startDay, args.endDay]:
+            try:
+                datetime.datetime.strptime(x, '%Y-%m-%d')
+            except ValueError:
+                raise ValueError(f"Incorrect date format, should be YYYY-MM-DD but is: {x}")
+
+        foo = datetime.datetime.strptime(args.startDay, '%Y-%m-%d')
+        bar = datetime.datetime.strptime(args.endDay, '%Y-%m-%d')
+        if foo > bar:
+            raise ValueError(f"Start date ({args.startDay}) is after the end date ({args.endDay}).")
+
+    def _validate_output(self, args):
+        """
+        Validates that --output is one of the accepted options.
+        """
+        list_options = ['terminal', 'html']
+        if args.output not in list_options:
+            raise ValueError(f"output argument invalid. Is {args.output} but should be one of {list_options}")
+
+
+    def all(self, args):
+        self._validate_dates(args)
+        self._validate_output(args)
+
+if __name__ == "__main__":
+    print("Working dir0: ", os.getcwd()) # DEBUGONLY
+
+    args = create_arguments()
+
+    ## Decide which infrastructure info to use
+    if args.useOtherInfrastuctureInfo != '':
+        args.path_infrastucture_info = args.useOtherInfrastuctureInfo
+        print(f"Overriding infrastructure info with: {args.path_infrastucture_info}")
+    else:
+        args.path_infrastucture_info = 'data'
+
+    ## Organise the unique output directory (used for output report and logs export for debugging)
+    ## creating a uniquely named subdirectory in whatever
+    # Decide if an output directory is needed at all
+    if (args.output in ['html']) | args.reportBug | args.reportBugHere:
+        timestamp = datetime.datetime.now().strftime('%Y%m%d-%H%M-%S%f')
+        args.outputDir2use = {
+            'timestamp': timestamp,
+            'path': os.path.join(args.outputDir, f"outputs_{timestamp}")
+        }
+
+        # Create directory
+        os.makedirs(args.outputDir2use["path"])
+
+    else:
+        # no output is created
+        args.outputDir2use = None
+
+    ### Set the WD to filter on, if needed
+    if args.filterCWD:
+        args.filterWD = args.userCWD
+        print("\nNB: --filterCWD doesn't work with symbolic links (yet!)\n")
+    else:
+        args.filterWD = None
+
+    ### Validate input
+    validate_args().all(args)
+
+    ### Run backend to get data
+    extracted_data = main_backend(args)
+
+    main_frontend(extracted_data, args)
@@ -0,0 +1,265 @@
+
+import os
+import yaml
+import pandas as pd
+import numpy as np
+
+from backend.helpers import check_empty_results, simulate_mock_jobs
+from backend.slurm_extract import WorkloadManager
+
+# print("Working dir1: ", os.getcwd()) # DEBUGONLY
+
+class GA_tools():
+
+    def __init__(self, cluster_info, fParams):
+        self.cluster_info = cluster_info
+        self.fParams = fParams
+
+    def calculate_energies(self, row):
+        '''
+        Calculate the energy usaged based on the job's paramaters
+        :param row: [pd.Series] one row of usage statistics, corresponding to one job
+        :return: [pd.Series] the same statistics with the energies added
+        '''
+        ### CPU and GPU
+        partition_info = self.cluster_info['partitions'][row.PartitionX]
+        if row.PartitionTypeX == 'CPU':
+            TDP2use4CPU = partition_info['TDP']
+            TDP2use4GPU = 0
+        else:
+            TDP2use4CPU = partition_info['TDP_CPU']
+            TDP2use4GPU = partition_info['TDP']
+
+        row['energy_CPUs'] = row.TotalCPUtime2useX.total_seconds() / 3600 * TDP2use4CPU / 1000  # in kWh
+
+        row['energy_GPUs'] = row.TotalGPUtime2useX.total_seconds() / 3600 * TDP2use4GPU / 1000  # in kWh
+
+        ### memory
+        for suffix, memory2use in zip(['','_memoryNeededOnly'], [row.ReqMemX,row.NeededMemX]):
+            row[f'energy_memory{suffix}'] = row.WallclockTimeX.total_seconds()/3600 * memory2use * self.fParams['power_memory_perGB'] /1000 # in kWh
+            row[f'energy{suffix}'] = (row.energy_CPUs +  row.energy_GPUs + row[f'energy_memory{suffix}']) * self.cluster_info['PUE'] # in kWh
+
+        return row
+
+    def calculate_carbonFootprint(self, df, col_energy):
+        return df[col_energy] * self.cluster_info['CI']
+
+
+def extract_data(args, cluster_info):
+
+    if args.use_mock_agg_data: # DEBUGONLY
+
+        if args.reportBug | args.reportBugHere:
+            print("\n(!) --reportBug and --reportBugHere are ignored when --useCustomLogs is present\n")
+
+        # df2 = simulate_mock_jobs()
+        # df2.to_pickle("testData/df_agg_X_mockMultiUsers_1.pkl")
+
+        # foo = 'testData/df_agg_test_3.pkl'
+        foo = 'testData/df_agg_X_1.pkl'
+        print(f"Overriding df_agg with `{foo}`")
+        return pd.read_pickle(foo)
+
+
+    ### Pull usage statistics from the workload manager
+    WM = WorkloadManager(args, cluster_info)
+    WM.pull_logs()
+
+    ### Log the output for debugging
+    # TODO cleanup file/dir management here
+    scripts_dir = os.path.dirname(os.path.realpath(__file__))
+    if args.reportBug | args.reportBugHere:
+
+        # log_name = str(datetime.datetime.now().timestamp()).replace(".", "_")
+
+        if args.reportBug:
+            # Create an error_logs subfolder in the output dir
+            errorLogsDir = os.path.join(args.outputDir2use['path'], 'error_logs')
+            os.makedirs(errorLogsDir)
+            log_path = os.path.join(errorLogsDir, f'sacctOutput.csv')
+        else:
+            # i.e. args.reportBugHere is True
+            log_path = f"{args.userCWD}/sacctOutput_{args.outputDir2use['timestamp']}.csv"
+
+        with open(log_path, 'wb') as f:
+            f.write(WM.logs_raw)
+        print(f"\nSLURM statistics logged for debuging: {log_path}\n")
+
+    ### Turn usage logs into DataFrame
+    WM.convert2dataframe()
+
+    # And clean
+    WM.clean_logs_df()
+    # Check if there are any jobs during the period from this directory and with these jobIDs
+    check_empty_results(WM.df_agg, args)
+
+    # Check that there is only one user's data
+    if len(set(WM.df_agg_X.UserX)) > 1:
+        raise ValueError(f"More than one user's logs was included: {set(WM.df_agg_X.UserX)}")
+
+    # WM.df_agg_X.to_pickle("testData/df_agg_X_1.pkl") # DEBUGONLY used to test different steps offline
+
+    return WM.df_agg_X
+
+def enrich_data(df, fParams, GA):
+
+    ### energy
+    df = df.apply(GA.calculate_energies, axis=1)
+
+    df['energy_failedJobs'] = np.where(df.StateX == 0, df.energy, 0)
+
+    ### carbon footprint
+    for suffix in ['', '_memoryNeededOnly', '_failedJobs']:
+        df[f'carbonFootprint{suffix}'] = GA.calculate_carbonFootprint(df, f'energy{suffix}')
+        # Context metrics (part 1)
+        df[f'treeMonths{suffix}'] = df[f'carbonFootprint{suffix}'] / fParams['tree_month']
+        df[f'cost{suffix}'] = df[f'energy{suffix}'] * fParams['electricity_cost'] # TODO use realtime electricity costs
+
+    ### Context metrics (part 2)
+    df['driving'] = df.carbonFootprint / fParams['passengerCar_EU_perkm']
+    df['flying_NY_SF'] = df.carbonFootprint / fParams['flight_NY_SF']
+    df['flying_PAR_LON'] = df.carbonFootprint / fParams['flight_PAR_LON']
+    df['flying_NYC_MEL'] = df.carbonFootprint / fParams['flight_NYC_MEL']
+
+    return df
+
+def summarise_data(df, args):
+    agg_functions_from_raw = {
+        'n_jobs': ('UserX', 'count'),
+        'first_job_period': ('SubmitDatetimeX', 'min'),
+        'last_job_period': ('SubmitDatetimeX', 'max'),
+        'energy': ('energy', 'sum'),
+        'energy_CPUs': ('energy_CPUs', 'sum'),
+        'energy_GPUs': ('energy_GPUs', 'sum'),
+        'energy_memory': ('energy_memory', 'sum'),
+        'carbonFootprint': ('carbonFootprint', 'sum'),
+        'carbonFootprint_memoryNeededOnly': ('carbonFootprint_memoryNeededOnly', 'sum'),
+        'carbonFootprint_failedJobs': ('carbonFootprint_failedJobs', 'sum'),
+        'cpuTime': ('TotalCPUtime2useX', 'sum'),
+        'gpuTime': ('TotalGPUtime2useX', 'sum'),
+        'wallclockTime': ('WallclockTimeX', 'sum'),
+        'CPUhoursCharged': ('CPUhoursChargedX', 'sum'),
+        'GPUhoursCharged': ('GPUhoursChargedX', 'sum'),
+        'memoryRequested': ('ReqMemX', 'sum'),
+        'memoryOverallocationFactor': ('memOverallocationFactorX', 'mean'),
+        'n_success': ('StateX', 'sum'),
+        'treeMonths': ('treeMonths', 'sum'),
+        'treeMonths_memoryNeededOnly': ('treeMonths_memoryNeededOnly', 'sum'),
+        'treeMonths_failedJobs': ('treeMonths_failedJobs', 'sum'),
+        'driving': ('driving', 'sum'),
+        'flying_NY_SF': ('flying_NY_SF', 'sum'),
+        'flying_PAR_LON': ('flying_PAR_LON', 'sum'),
+        'flying_NYC_MEL': ('flying_NYC_MEL', 'sum'),
+        'cost': ('cost', 'sum'),
+        'cost_failedJobs': ('cost_failedJobs', 'sum'),
+        'cost_memoryNeededOnly': ('cost_memoryNeededOnly', 'sum'),
+    }
+
+    # This is to aggregate already aggregated dataset (so names are a bit different)
+    agg_functions_further = agg_functions_from_raw.copy()
+    agg_functions_further['n_jobs'] = ('n_jobs', 'sum')
+    agg_functions_further['first_job_period'] = ('first_job_period', 'min')
+    agg_functions_further['last_job_period'] = ('last_job_period', 'max')
+    agg_functions_further['cpuTime'] = ('cpuTime', 'sum')
+    agg_functions_further['gpuTime'] = ('gpuTime', 'sum')
+    agg_functions_further['wallclockTime'] = ('wallclockTime', 'sum')
+    agg_functions_further['CPUhoursCharged'] = ('CPUhoursCharged', 'sum')
+    agg_functions_further['GPUhoursCharged'] = ('GPUhoursCharged', 'sum')
+    agg_functions_further['memoryRequested'] = ('memoryRequested', 'sum')
+    agg_functions_further['memoryOverallocationFactor'] = ('memoryOverallocationFactor', 'mean') # NB: not strictly correct to do a mean of mean, but ok
+    agg_functions_further['n_success'] = ('n_success', 'sum')
+
+    def agg_jobs(data, agg_names=None):
+        """
+
+        :param data:
+        :param agg_names: if None, then the whole dataset is aggregated
+        :return:
+        """
+        agg_names2 = agg_names if agg_names else lambda _:True
+        if 'UserX' in data.columns:
+            timeseries = data.groupby(agg_names2).agg(**agg_functions_from_raw)
+        else:
+            timeseries = data.groupby(agg_names2).agg(**agg_functions_further)
+
+        timeseries.reset_index(inplace=True, drop=(agg_names is None))
+        timeseries['success_rate'] = timeseries.n_success / timeseries.n_jobs
+        timeseries['failure_rate'] = 1 - timeseries.success_rate
+        timeseries['share_carbonFootprint'] = timeseries.carbonFootprint / timeseries.carbonFootprint.sum()
+
+        return timeseries
+
+    df['SubmitDate'] = df.SubmitDatetimeX.dt.date  # TODO do it with real start time rather than submit day
+
+    df_userdaily = agg_jobs(df, ['SubmitDate'])
+    df_overallStats = agg_jobs(df_userdaily)
+    dict_overallStats = df_overallStats.iloc[0, :].to_dict()
+    userID = df.UserX[0]
+
+    output = {
+        "userDaily": df_userdaily,
+        'userActivity': {userID: dict_overallStats},
+        "user": userID
+    }
+
+    # Some job-level statistics to plot distributions
+    memoryOverallocationFactors = df.groupby('UserX')['memOverallocationFactorX'].apply(list).to_dict()
+    memoryOverallocationFactors['overall'] = df.memOverallocationFactorX.to_numpy()
+    output['memoryOverallocationFactors'] = memoryOverallocationFactors
+
+    return output
+
+
+def main_backend(args):
+    '''
+
+    :param args:
+    :return:
+    '''
+    ### Load cluster specific info
+    with open(os.path.join(args.path_infrastucture_info, 'cluster_info.yaml'), "r") as stream:
+        try:
+            cluster_info = yaml.safe_load(stream)
+        except yaml.YAMLError as exc:
+            print(exc)
+
+    ### Load fixed parameters
+    with open("data/fixed_parameters.yaml", "r") as stream:
+        try:
+            fParams = yaml.safe_load(stream)
+        except yaml.YAMLError as exc:
+            print(exc)
+
+    GA = GA_tools(cluster_info, fParams)
+
+    df = extract_data(args, cluster_info=cluster_info)
+    df2 = enrich_data(df, fParams=fParams, GA=GA)
+    summary_stats = summarise_data(df2, args=args)
+
+    return summary_stats
+
+if __name__ == "__main__":
+
+    #### This is used for testing only ####
+
+    from collections import namedtuple
+    argStruct = namedtuple('argStruct',
+                           'startDay endDay use_mock_agg_data useCustomLogs customSuccessStates filterWD filterJobIDs filterAccount reportBug reportBugHere path_infrastucture_info')
+    args = argStruct(
+        startDay='2022-01-01',
+        endDay='2023-06-30',
+        useCustomLogs=None,
+        use_mock_agg_data=True,
+        customSuccessStates='',
+        filterWD=None,
+        filterJobIDs='all',
+        filterAccount=None,
+        reportBug=False,
+        reportBugHere=False,
+        path_infrastucture_info="clustersData/CSD3",
+    )
+
+    main_backend(args)
+
+
+
@@ -0,0 +1,54 @@
+
+import datetime
+import sys
+import random
+import pandas as pd
+import numpy as np
+
+def check_empty_results(df, args):
+    """
+    This is to check whether any jobs have been run on the period, and stop the script if not.
+    :param df: [pd.DataFrame] Usage logs
+    :param args:
+    """
+    if len(df) == 0:
+        if args.filterWD is not None:
+            addThat = f' from this directory ({args.filterWD})'
+        else:
+            addThat = ''
+        if args.filterJobIDs != 'all':
+            addThat += ' and with these jobIDs'
+        if args.filterAccount is not None:
+            addThat += ' charged under this account'
+
+        print(f'''
+
+    You haven't run any jobs on that period (from {args.startDay} to {args.endDay}){addThat}.
+
+        ''')
+        sys.exit()
+
+def simulate_mock_jobs(): # DEBUGONLY
+    df_list = []
+    n_jobs = random.randint(500,800)
+    foo = {
+        'WallclockTimeX':[datetime.timedelta(minutes=random.randint(50,700)) for _ in range(n_jobs)],
+        'ReqMemX':np.random.randint(4,130, size=n_jobs)*1.,
+        'PartitionX':['icelake']*n_jobs,
+        'SubmitDatetimeX':[datetime.datetime(day=1,month=5,year=2023) + datetime.timedelta(days=random.randint(1,60)) for _ in range(n_jobs)],
+        'StateX':np.random.choice([1,0], p=[.8,.2], size=n_jobs),
+        'UIDX':['11111']*n_jobs,
+        'UserX':['foo']*n_jobs,
+        'PartitionTypeX':['CPU']*n_jobs,
+        'TotalCPUtime2useX':[datetime.timedelta(minutes=random.randint(50,5000)) for _ in range(n_jobs)],
+        'TotalGPUtime2useX':[datetime.timedelta(seconds=0)]*n_jobs,
+    }
+
+    foo_df = pd.DataFrame(foo)
+    foo_df['CPUhoursChargedX'] = foo_df.TotalCPUtime2useX / np.timedelta64(1, 'h')
+    foo_df['GPUhoursChargedX'] = 0.
+    foo_df['NeededMemX'] = foo_df.ReqMemX * np.random.random(n_jobs)
+    foo_df['memOverallocationFactorX'] = foo_df.ReqMemX / foo_df.NeededMemX
+
+    df_list.append(foo_df)
+    return pd.concat(df_list)
@@ -2,7 +2,9 @@
 ## ~~~ TO BE EDITED TO BE TAILORED TO THE CLUSTER ~~~
 ## Fill in the values for your cluster
 ##
+## Updated: 14/12/2023
 ---
+institution: "My institution"
 cluster_name: "My cluster" # [str]
 granularity_memory_request: 6 # [number] in GB,
 partitions: # a list of the different partitions on the cluster
@@ -18,6 +20,16 @@ partitions: # a list of the different partitions on the cluster
     TDP_CPU: 8
 PUE: 1.67 # [number > 1] Power Usage Effectiveness of the facility
 CI: 467 # [number] carbon intensity of the geographic location, in gCO2e/kWh
+energy_cost:
+  cost: 0.34 # in currency/kWh
+  currency: "£"
+#
+# Below are optional parameters if the dashboard output is used.
+# HTML tags can be used
+#
+texts_intro:
+  CPU: "XX W/core"
+  GPU: "e.g. NVIDIA A100 (300 W) and NVIDIA Tesla P100 (250 W)"
 #
 # Below are optional parameters to accommodate some clusters. Do not remove but can be ignored.
 #
 
@@ -0,0 +1,36 @@
+##
+## ~~~ TO BE EDITED TO BE TAILORED TO THE CLUSTER ~~~
+## Fill in the values for your cluster: all the variables in <> need to be changed
+##
+---
+cluster_name: "<My cluster name>" # [str]
+granularity_memory_request: <6> # [number] in GB representing the smallest memory unit users can reserve
+partitions: # a list of the different partitions on the cluster
+  <partition name_1>: # name of the partition
+    type: <CPU> # [CPU or GPU]
+    model: "<Intel XXX>" # [str] the model of the processing core on this partition. Not actually used by the code but useful for reference for others.
+    TDP: <10> # [number] TDP of the processor, in W, per core
+  <partition name_2>: # name of the partition
+    type: <GPU> # [CPU or GPU]
+    model: "<NVIDIA XXX>" # [str] the model of the processing core on this partition. Not actually used by the code but useful for reference for others.
+    TDP: <250> # [number] For GPUs, the TDP is for the entire GPU
+    # For GPU partitions, we also need info about the CPUs available for support.
+    model_CPU: "<Intel XXX>" # [str] Not actually used by the code but useful for reference for others.
+    TDP_CPU: <10> # [number] TDP of the processor, in W, per core
+  # You can keep adding partitions to this
+PUE: <1.67> # [number > 1] Power Usage Effectiveness of the facility
+CI: <467> # [number] average carbon intensity of the geographic location, in gCO2e/kWh
+energy_cost:
+  cost: <0.34> # [number] in currency/kWh
+  currency: "<£>" # [str]
+#
+# Below are optional parameters if the html output is used.
+# HTML tags can be used
+#
+texts_intro:
+  CPU: "XX - XX W/core (see <a>here</a> for models)" # For example
+  GPU: "NVIDIA A100 (300 W) and NVIDIA Tesla P100 (250 W)" # For example
+#
+# Below are optional parameters to accommodate some clusters. Do not remove but can be ignored.
+#
+default_unit_RSS: 'K'
@@ -0,0 +1,15 @@
+
+## ~~~ DO NOT EDIT ~~~
+##
+## These are fixed values, from the Green Algorithms app
+## Hello World
+
+---
+power_memory_perGB: 0.3725 # W/GB
+tree_month: 917 #gCO2e
+passengerCar_EU_perkm: 175 #gCO2e/km
+passengerCar_US_perkm: 251 #gCO2e/km
+flight_NY_SF: 570000 #gCO2e
+flight_PAR_LON: 50000 #gCO2e
+flight_NYC_MEL: 2310000 #gCO2e
+electricity_cost: 0.34 # GBP/kWh (source?)
@@ -2,7 +2,7 @@
 ## ~~~ DO NOT EDIT ~~~
 ##
 ## These are fixed values, from the Green Algorithms app
-## Hello World
+##
 
 ---
 power_memory_perGB: 0.3725 # W/GB
 
@@ -0,0 +1,69 @@
+
+import yaml
+import os
+
+from frontend.terminal_output import generate_terminal_view
+from frontend.dashboard_output import dashboard_html
+
+def main_frontend(dict_stats, args):
+    ### Load cluster specific info
+    with open(os.path.join(args.path_infrastucture_info, 'cluster_info.yaml'), "r") as stream:
+        try:
+            cluster_info = yaml.safe_load(stream)
+        except yaml.YAMLError as exc:
+            print(exc)
+
+    if args.output == 'terminal':
+        print("Generating terminal view... ", end="")
+        terminal_view = generate_terminal_view(dict_stats, args, cluster_info)
+        print("Done\n")
+        print(terminal_view)
+    elif args.output == 'html':
+        print("Generating html... ", end="")
+        dashboard = dashboard_html(
+            dict_stats=dict_stats,
+            args=args,
+            cluster_info=cluster_info,
+        )
+        report_path = dashboard.generate()
+        print(f"done: {report_path}")
+
+    else:
+        raise ValueError("Wrong output format")
+
+
+if __name__ == "__main__":
+
+    #### This is used for testing only ####
+
+    from collections import namedtuple
+    from backend import main_backend
+
+    argStruct = namedtuple('argStruct',
+                           'startDay endDay use_mock_agg_data user output useCustomLogs customSuccessStates filterWD filterJobIDs filterAccount reportBug reportBugHere path_infrastucture_info')
+    args = argStruct(
+        startDay='2022-01-01',
+        endDay='2023-06-30',
+        use_mock_agg_data=True,
+        user='ll582',
+        output='html',
+        useCustomLogs=None,
+        customSuccessStates='',
+        filterWD=None,
+        filterJobIDs='all',
+        filterAccount=None,
+        reportBug=False,
+        reportBugHere=False,
+        path_infrastucture_info="clustersData/CSD3",
+    )
+    with open(os.path.join(args.path_infrastucture_info, 'cluster_info.yaml'), "r") as stream:
+        try:
+            cluster_info = yaml.safe_load(stream)
+        except yaml.YAMLError as exc:
+            print(exc)
+
+    extracted_data = main_backend(args)
+
+    # generate_dashboard_html(dict_stats=extracted_data, args=args, cluster_info=cluster_info, dict_deptGroupsUsers=dict_deptGroupsUsers, dict_users=dict_users)
+
+    main_frontend(dict_stats=extracted_data,args=args)
@@ -0,0 +1,244 @@
+import os
+import shutil
+from jinja2 import Environment, FileSystemLoader
+# from jinja2 import select_autoescape, DebugUndefined, StrictUndefined, Undefined
+import datetime
+from pprint import pprint
+import pandas as pd
+import numpy as np
+import plotly.express as px
+
+from frontend.helpers import formatText_footprint, formatText_treemonths, formatText_flying
+
+# class SilentUndefined(Undefined): # DEBUGONLY
+#     def _fail_with_undefined_error(self, *args, **kwargs):
+#         return '!MISSING!'
+
+def formatText_timedelta_short(dt):
+    dt_sec = dt.total_seconds()
+    hour = 3600
+    day = 24*hour
+    year = 365*day
+    if dt_sec >= year:
+        return f"{dt_sec / year:.1f} year{'' if int(dt_sec/year)==1 else 's'}"
+    elif dt_sec > 2*day:
+        return f"{dt_sec / day:.1f} days"
+    elif dt_sec >= hour:
+        return f"{dt_sec / hour:.1f} hour{'' if int(dt_sec/hour)==1 else 's'}"
+    else:
+        return f"{dt_sec:.2f} seconds"
+
+def formatText_cost(cost, cluster_info):
+    return f"{cluster_info['energy_cost']['currency']}{cost:,.0f}"
+
+def get_summary_texts(dict_in, cluster_info):
+    output = {
+        'cpuTime': formatText_timedelta_short(dict_in['cpuTime']),
+        'gpuTime': formatText_timedelta_short(dict_in['gpuTime']),
+        'carbonFootprint': formatText_footprint(dict_in['carbonFootprint'], use_html=True),
+        'carbonFootprint_failedJobs': formatText_footprint(dict_in['carbonFootprint_failedJobs'], use_html=True),
+        'carbonFootprint_failedJobs_share': f"{dict_in['carbonFootprint_failedJobs']/dict_in['carbonFootprint']:.2%}",
+        'carbonFootprint_wasted_memoryOverallocation': formatText_footprint(dict_in['carbonFootprint']-dict_in['carbonFootprint_memoryNeededOnly'], use_html=True),
+        'share_carbonFootprint': f"{dict_in['share_carbonFootprint']:.2%}",
+        'trees': formatText_treemonths(dict_in['treeMonths'], splitMonthsYear=False),
+        'flying': formatText_flying(dict_in, output_format='dict'),
+        'cost': formatText_cost(dict_in['cost'], cluster_info=cluster_info),
+        'cost_failedJobs': formatText_cost(dict_in['cost_failedJobs'], cluster_info=cluster_info),
+        'cost_wasted_memoryOverallocation': formatText_cost(dict_in['cost']-dict_in['cost_memoryNeededOnly'], cluster_info=cluster_info),
+        'n_jobs': f"{dict_in['n_jobs']:,}"
+    }
+
+    for key in dict_in:
+        if key not in output:
+            # print(f"adding {key}")
+            output[key] = dict_in[key]
+
+    return output
+
+class dashboard_html:
+    def __init__(self, dict_stats, args, cluster_info):
+        self.dict_stats = dict_stats
+        self.args = args
+        self.cluster_info = cluster_info
+
+        self.context = {
+            'last_updated': datetime.datetime.now().strftime("%A %d %b %Y, %H:%M"),
+            'startDay': args.startDay,
+            'endDay': args.endDay,
+            'institution': cluster_info['institution'],
+            'cluster_name': cluster_info['cluster_name'],
+            'PUE': cluster_info['PUE'],
+            'CI': cluster_info['CI'],
+            'energy_cost_perkWh': cluster_info['energy_cost'],
+            'texts_intro': cluster_info['texts_intro'],
+        }
+
+        self.template_plotly = "plotly_white"
+        self.custom_colours = {
+            'area': '#a6cee3'
+        }
+        self.height_plotly = 350
+
+        self.user_here = dict_stats['user']
+
+        self.outputDir = args.outputDir2use['path']
+        self.plotsDir = os.path.join(self.outputDir, 'plots')
+        os.makedirs(self.plotsDir)
+
+    def _user_context(self):
+        ####################################
+        # User-specific part of the report #
+        ####################################
+
+        self.context['user'] = {'userID': self.user_here}
+
+        self.context['usersActivity'] = {
+            self.user_here: get_summary_texts(
+                self.dict_stats['userActivity'][self.user_here],
+                cluster_info=self.cluster_info
+            )
+        }
+
+        ### User's overall metrics
+
+        df_userDaily_here = self.dict_stats['userDaily']
+
+        # Daily carbon footprint
+        fig_userDailyCarbonFootprint = px.area(
+            df_userDaily_here, x='SubmitDate', y="carbonFootprint",
+            labels=dict(SubmitDate='', carbonFootprint='Carbon footprint (gCO2e)'),
+            title="Daily carbon footprint",
+            template=self.template_plotly,
+            color_discrete_sequence=[self.custom_colours['area']]
+        )
+        fig_userDailyCarbonFootprint.update_layout(height=self.height_plotly)
+        fig_userDailyCarbonFootprint.write_html(
+            os.path.join(self.plotsDir, "plotly_thisuserDailyCarbonFootprint.html"),
+            include_plotlyjs='cdn'
+        )
+
+        # Daily number of jobs
+        fig_userDailyNjobs = px.area(
+            df_userDaily_here, x='SubmitDate', y="n_jobs",
+            labels=dict(SubmitDate='', n_jobs='Number of jobs started'),
+            title="Number of jobs started",
+            template=self.template_plotly,
+            color_discrete_sequence=[self.custom_colours['area']]
+        )
+        fig_userDailyNjobs.update_layout(height=self.height_plotly)
+        fig_userDailyNjobs.write_html(
+            os.path.join(self.plotsDir, "plotly_thisuserDailyNjobs.html"),
+            include_plotlyjs='cdn'
+        )
+
+        # Daily CPU time
+        fig_userDailyCpuTime = px.area(
+            df_userDaily_here, x='SubmitDate', y="CPUhoursCharged",
+            labels=dict(SubmitDate='', CPUhoursCharged='CPU core-hours'),
+            title="CPU core hours",
+            template=self.template_plotly,
+            color_discrete_sequence=[self.custom_colours['area']]
+        )
+        fig_userDailyCpuTime.update_layout(height=self.height_plotly)
+        fig_userDailyCpuTime.write_html(
+            os.path.join(self.plotsDir, "plotly_thisuserDailyCpuTime.html"),
+            include_plotlyjs='cdn'
+        )
+
+        # Daily Memory requested
+        fig_userDailyCpuTime = px.area(
+            df_userDaily_here, x='SubmitDate', y="memoryRequested",
+            labels=dict(SubmitDate='', memoryRequested='Memory requested (GB)'),
+            title="Memory requested",
+            template=self.template_plotly,
+            color_discrete_sequence=[self.custom_colours['area']]
+        )
+        fig_userDailyCpuTime.update_layout(height=self.height_plotly)
+        fig_userDailyCpuTime.write_html(
+            os.path.join(self.plotsDir, "plotly_thisuserDailyMemoryRequested.html"),
+            include_plotlyjs='cdn'
+        )
+
+        # Total success rate
+        n_success = self.dict_stats['userActivity'][self.user_here]['n_success']
+        n_failure = self.dict_stats['userActivity'][self.user_here]['n_jobs'] - self.dict_stats['userActivity'][self.user_here]['n_success']
+        foo = pd.DataFrame({
+            'Status': ['Success', 'Failure'],
+            'Number of jobs': [n_success, n_failure]
+        })
+        fig_userSuccessRate = px.pie(
+            foo, values='Number of jobs', names='Status', color='Status',
+            color_discrete_map={'Success':"#A9DFBF", 'Failure': "#F5B7B1"},
+            template=self.template_plotly,
+            hole=.6,
+        )
+        fig_userSuccessRate.update_layout(height=self.height_plotly)
+        fig_userSuccessRate.write_html(
+            os.path.join(self.plotsDir, "plotly_thisuserSuccessRate.html"),
+            include_plotlyjs='cdn'
+        )
+
+        # Daily success rate
+        fig_userDailySuccessRate = px.area(
+            pd.melt(df_userDaily_here, id_vars='SubmitDate', value_vars=['failure_rate', 'success_rate']),
+            x='SubmitDate', y="value", color='variable',
+            color_discrete_map={'failure_rate': "#F5B7B1", 'success_rate': "#A9DFBF"},
+            labels=dict(SubmitDate='', value='% of failed jobs (in red)', variable=""),
+            # title="",
+            template=self.template_plotly
+        )
+        fig_userDailySuccessRate.update_layout(height=self.height_plotly, showlegend=False)
+        fig_userDailySuccessRate.write_html(
+            os.path.join(self.plotsDir, "plotly_thisuserDailySuccessRate.html"),
+            include_plotlyjs='cdn'
+        )
+
+        # Memory efficiency
+        fig_userMemoryEfficiency = px.histogram(
+            np.reciprocal(self.dict_stats['memoryOverallocationFactors'][self.user_here]) * 100,
+            labels=dict(value="Memory efficiency (%)"),
+            template=self.template_plotly,
+            color_discrete_sequence=[self.custom_colours['area']]
+        )
+        fig_userMemoryEfficiency.update_layout(
+            bargap=0.2,
+            yaxis_title="Number of jobs",
+            showlegend=False,
+            height=self.height_plotly
+        )
+        fig_userMemoryEfficiency.write_html(
+            os.path.join(self.plotsDir, "plotly_thisuserMemoryEfficiency.html"),
+            include_plotlyjs='cdn'
+        )
+
+    def generate(self):
+
+        self.context['include_user_context'] = True
+
+        self._user_context()
+
+        environment = Environment(
+            loader=FileSystemLoader(['frontend/templates/', self.plotsDir]),
+            # autoescape=select_autoescape(),
+            # undefined=SilentUndefined  # StrictUndefined is mostly for testing, SilenUndefined to ignore missing ones
+        )
+
+        j2_template = environment.get_template('report_blank.html')
+        j2_rendered = j2_template.render(self.context)
+
+        ## Export
+        # print(os.getcwd())
+        report_path = os.path.join(self.outputDir, f"report_{self.user_here}.html")
+        with open(report_path, 'w') as file:
+            file.write(j2_rendered)
+        # Also copy across the styles.css
+        shutil.copy("frontend/templates/styles.css", self.outputDir)
+
+        return report_path
+
+        # FIXME the pdf export doesn't really work...sticking with html for now
+        # Follows guidelines from https://doc.courtbouillon.org/weasyprint/stable/first_steps.html#command-line
+        # from weasyprint import HTML, CSS
+        #
+        # css = CSS(string=''' @page {size: 53.34cm 167.86 cm;} ''')
+        # HTML("outputs/report_rendered.html").write_pdf("outputs/report_rendered.pdf", stylesheets=[css])
@@ -0,0 +1,72 @@
+def formatText_footprint(footprint_g, use_html=False):
+    '''
+    Format the text to display the carbon footprint
+    :param footprint_g: [float] carbon footprint, in gCO2e
+    :return: [str] the text to display
+    '''
+    if use_html:
+        co2e = "CO<sub>2</sub>e"
+    else:
+        co2e = "CO2e"
+    if footprint_g < 1e3:
+        text_footprint = f"{footprint_g:,.0f} g{co2e}"
+    elif footprint_g < 1e6:
+        text_footprint = f"{footprint_g / 1e3:,.0f} kg{co2e}"
+    else:
+        text_footprint = f"{footprint_g / 1e3:,.0f} T{co2e}"
+    return text_footprint
+
+def formatText_treemonths(tm_float, splitMonthsYear=True):
+    '''
+    Format the text to display the tree months
+    :param tm_float: [float] tree-months
+    :return: [str] the text to display
+    '''
+    tm = int(tm_float)
+    ty = int(tm / 12)
+    if tm < 1:
+        text_trees = f"{tm_float:.3f} tree-months"
+    elif tm == 1:
+        text_trees = f"{tm_float:.1f} tree-month"
+    elif tm < 6:
+        text_trees = f"{tm_float:.1f} tree-months"
+    elif tm <= 24:
+        text_trees = f"{tm} tree-months"
+    elif tm < 120:
+        if splitMonthsYear:
+            text_trees = f"{ty} tree-years and {tm - ty * 12} tree-months"
+        else:
+            text_trees = f"{ty} tree-years"
+    else:
+        text_trees = f"{tm_float/12:.1f} tree-years"
+    return text_trees
+
+def formatText_flying(dict_stats, output_format='single_str'):
+    """
+    Format the text to display about flying
+    :param dict_stats:
+    :param output_format:
+    :return: [str] or [(float,str)] text to display
+    """
+    if output_format not in ['single_str', 'dict']:
+        raise ValueError()
+
+    if dict_stats['flying_NY_SF'] < 0.5:
+        value = round(dict_stats['flying_PAR_LON'], 2)
+        if output_format == 'single_str':
+            output_flying = f"{value:,} flights between Paris and London"
+        else:
+            output_flying = {'number': value, 'trip': 'Paris - London'}
+    elif dict_stats['flying_NYC_MEL'] < 0.5:
+        value = round(dict_stats['flying_NY_SF'], 2)
+        if output_format == 'single_str':
+            output_flying = f"{value:,} flights between New York and San Francisco"
+        else:
+            output_flying = {'number': value, 'trip': 'New York - San Francisco'}
+    else:
+        value = round(dict_stats['flying_NYC_MEL'], 2)
+        if output_format == 'single_str':
+            output_flying = f"{value:,} flights between New York and Melbourne"
+        else:
+            output_flying = {'number': value, 'trip': 'New York - Melbourne'}
+    return output_flying
@@ -0,0 +1,91 @@
+<!--TODO create similar pages for department and group granularity -->
+
+<div id="user" class="section scrollspy">
+     <div>
+        <h3>User's personal report: {{ user.userID }}</h3>
+
+        <p>
+            Find out your carbon footprint from {{ startDay }} to {{ endDay }}.
+        </p>
+    </div>
+
+    <div id="summary_user" class="card-panel stats-summary">
+        <div class="row center-align">
+            <div class="col s3">
+                <i class="fa-solid fa-microchip"></i>
+                <span>CPU time</span>
+                <span data-stat="cpu" >{{ usersActivity[user.userID].cpuTime }}</span>
+            </div>
+            <div class="col s3">
+                <i class="fa-solid fa-smog"></i>
+                <span>Carbon footprint</span>
+                <span data-stat="co2e" >{{ usersActivity[user.userID].carbonFootprint }}</span>
+            </div>
+            <div class="col s3">
+                <i class="fa-solid fa-plane"></i>
+                <span>{{ usersActivity[user.userID].flying.trip }}</span>
+                <span data-stat="flight" >{{ usersActivity[user.userID].flying.number }}</span>
+            </div>
+            <div class="col s3">
+                <i class="fa-solid fa-tree"></i>
+                <span>Carbon sequestration</span>
+                <span data-stat="tree" >{{ usersActivity[user.userID].trees }}</span>
+            </div>
+        </div>
+    </div>
+
+    <div>
+        <div>
+            {% include 'plotly_thisuserDailyCarbonFootprint.html' %}
+        </div>
+        <div>
+            {% include 'plotly_thisuserDailyNjobs.html' %}
+        </div>
+        <div>
+            {% include 'plotly_thisuserDailyCpuTime.html' %}
+        </div>
+        <div>
+            {% include 'plotly_thisuserDailyMemoryRequested.html' %}
+        </div>
+
+        <div>
+            <h5>Failed jobs</h5>
+            <p>
+                Because any resource spent on a job is wasted if the job fails, it
+                is important to test scripts and pipelines on small datasets.
+                The chart below shows the daily success rate of {{ usersActivity[user.userID].n_jobs }}
+                jobs that completed in the period.
+
+                Failed jobs represent {{ usersActivity[user.userID].carbonFootprint_failedJobs }} and
+                a cost of {{ usersActivity[user.userID].cost_failedJobs }}.
+                They are responsible for {{ usersActivity[user.userID].carbonFootprint_failedJobs_share }} of the overall
+                carbon footprint.
+            </p>
+            {% include 'plotly_thisuserSuccessRate.html' %}
+<!--                        TODO put text and pie chart side-by-side-->
+            {% include 'plotly_thisuserDailySuccessRate.html' %}
+        </div>
+
+        <div>
+            <h5>Memory efficiency</h5>
+
+            <p>
+                Memory can be a significant source of waste, because the power draw from memory mainly depends
+                on the memory available, not on the actual memory used. The chart below shows the distribution
+                of the memory efficiency collected from {{ usersActivity[user.userID].n_jobs }} jobs
+                between {{ startDay }} and {{ endDay }} (the closer to 100% the better).
+            </p>
+
+            {% include 'plotly_thisuserMemoryEfficiency.html' %}
+
+            <p>
+                Using the memory efficiency, we can estimate how much memory was needed to run a job.
+                If all jobs above had been submitted with only the memory they needed (rounded up),
+                you would have emitted {{ usersActivity[user.userID].carbonFootprint_wasted_memoryOverallocation }} less
+                and saved {{ usersActivity[user.userID].cost_wasted_memoryOverallocation }}.
+            </p>
+        </div>
+
+
+    </div>
+</div>
@@ -0,0 +1,148 @@
+<!DOCTYPE html>
+<html lang="en">
+<head>
+    <meta charset="UTF-8">
+    <title>Green Algorithms dashboard</title>
+    <link rel="stylesheet" href="https://cdn.jsdelivr.net/npm/@fortawesome/fontawesome-free@6.3.0/css/all.min.css">
+    <link rel="stylesheet" href="https://cdn.jsdelivr.net/npm/@materializecss/materialize@1.1.0/dist/css/materialize.min.css">
+    <link rel="stylesheet" href="styles.css">
+<!--    <link rel="icon" type="image/x-icon" href="https://www.ebi.ac.uk/favicon.ico">-->
+</head>
+<body>
+    <section class="container">
+        <header>
+            <!-- TODO make the page responsive -->
+            <nav id="top-nav">
+                <i class="fa-solid fa-seedling"></i>
+                <div id="title">
+                    <h1>Green Algorithms dashboard</h1>
+                    <h2>{{ institution }}</h2>
+                </div>
+            </nav>
+            <p>
+                Last updated: {{ last_updated }}
+            </p>
+        </header>
+
+        <div class="row">
+            <div class="col l2">
+                <div id="toc-wrapper">
+                    <ul class="section table-of-contents">
+                        <li><a href="#intro">Introduction</a></li>
+                        {% if include_user_context %}
+                            <li><a href="#user">User's data ({{ user.userID }})</a></li>
+                        {% endif %}
+                        <li><a href="#credits">Credits</a></li>
+                        <li><a href="#contact">Contact</a></li>
+                        <li><a href="#faq">FAQ</a></li>
+                    </ul>
+                </div>
+            </div>
+
+<!--        starting column on the right -->
+            <div class="col l10">
+                <div id="intro" class="section scrollspy">
+                    <div class="card-panel warning">
+                        <p>
+                            This is an early version, please report any bug you find!
+                        </p>
+                    </div>
+
+                    <p>
+                        Computing is a major contributor to energy consumption, and thus is one of the main sources of
+                        the carbon emission of our research.
+                        In the context of the global climate crisis, it is imperative that individuals and organizations
+                        find ways to assess then reduce the carbon footprint of their work.
+                    </p>
+
+                    <p>
+                        This page aims to represent the carbon footprint that we are, collectively and individually,
+                        responsible for at {{ institution }}.
+                        SLURM jobs submitted to the {{ cluster_name }} High Performance Cluster are logged automatically
+                        (including information such as resource requested, run time, memory efficiency, etc.),
+                        and the corresponding carbon footprint  was calculated using the framework proposed
+                        by <a href="https://green-algorithms.org/">Green Algorithms</a> and the following assumptions:
+                    </p>
+
+                    <table>
+                        <tbody>
+                            <tr>
+                                <th>CPU</th>
+                                <td>{{ texts_intro.CPU }}</td>
+                            </tr>
+                            <tr>
+                                <th>GPU</th>
+                                <td>{{ texts_intro.GPU }}</td>
+                            </tr>
+                            <tr>
+                                <th>Memory power</th>
+                                <td>0.3725 W/GB</td>
+                            </tr>
+                            <tr>
+                                <th>Power usage effectiveness</th>
+                                <td>{{ PUE }}</td>
+                            </tr>
+                            <tr>
+                                <th>Carbon intensity</th>
+                                <td>{{ CI }} gCO<sub>2</sub>e/kWh</td>
+                            </tr>
+                            <tr>
+                                <th>Energy cost</th>
+                                <td>{{ energy_cost_perkWh.currency }}{{ energy_cost_perkWh.cost }}/kWh</td>
+                            </tr>
+                        </tbody>
+                    </table>
+
+                    <div class="card-panel info">
+                        <p>
+                            We built this tool in the hope to raise awareness of computing usage,
+                            highlight resources waste, and foster good computing practices.
+                            This is intended to be a lightweight carbon footprint calculator, not a cluster monitoring system.
+                        </p>
+                    </div>
+                </div>
+
+                {% if include_user_context %}
+                    {% include "_user.html" %}
+                {% endif %}
+
+                <div id="credits" class="section scrollspy">
+                    <h4>Credits</h4>
+                    <p>
+                        This dashboard is the combination of a template developed at EMBL-EBI by Matthias Blum and Alex Bateman,
+                        and the Green Algorithms project led by Loïc Lannelongue and Michael Inouye.
+                        The carbon footprint calculations are described on <a href="https://www.green-algorithms.org/">the Green Algorithms project's website</a>.
+                    </p>
+                </div>
+
+                <div id="contact" class="section scrollspy">
+                    <h4>Contact</h4>
+                    <p>
+                        If you want to report a bug or a user assigned to the wrong team,
+                        request a feature, or just give some general feedback, you can email LL582@medschl.cam.ac.uk.
+                    </p>
+                </div>
+
+                <div id="faq" class="section scrollspy">
+                    <h4>FAQ</h4>
+                    <p>
+                        <span class="question">How is the information on SLRUM jobs collected?</span>
+                        Logs are pulled using the `sacct` command. It's all powered by the GA4HPC methods,
+                        you can check it out <a href="https://www.green-algorithms.org/GA4HPC/">there</a>.
+                    </p>
+
+                    <p>
+                        <span class="question">Where can I ask more questions?</span>
+                        On the GitHub <a href="https://github.com/GreenAlgorithms/GreenAlgorithms4HPC/issues">here</a> or by email (see above)..
+                    </p>
+
+
+                </div>
+
+            </div>
+
+        </div>
+
+    </section>
+</body>
+</html>
@@ -0,0 +1,339 @@
+@import url('https://fonts.googleapis.com/css2?family=IBM+Plex+Sans:ital,wght@0,400;0,700;1,400;1,700&display=swap');
+
+@media only screen and (min-width: 993px) {
+    .container {
+        width: 85%;
+    }
+}
+
+@media only screen and (min-width: 1201px) {
+    html {
+        font-size:16px;
+    }
+}
+
+/* Override Materialize */
+html,
+button,
+input,
+optgroup,
+select,
+textarea {
+    font-family: "IBM Plex Sans", sans-serif !important;
+}
+.btn, .tabs .tab { text-transform: inherit; }
+.btn.fluid { width: 100%; }
+td, th {
+    padding: .5em .75em;
+}
+.section {
+    padding: 2rem 0 0;
+}
+.section > h4 {
+    margin-top: 0;
+}
+.input-field .right.circle {
+    float: left !important;
+}
+img.circle {
+    border: 1px solid #d4d4d5;
+}
+/*table#teams-table, table#users-table {*/
+/*    font-size: .9rem;*/
+/*}*/
+.card-panel {
+    padding: 1.25rem;
+}
+.card-panel > .card-title {
+    font-size: 1.25rem;
+    margin-bottom: .5rem;
+}
+.card-panel > .card-title + p {
+    margin-top: 0;
+}
+.card-panel.info {
+    background-color: #e1f5fe;
+    border-left: 0.25rem solid #03a9f4;
+}
+.card-panel.warning {
+    background-color: #fff8e1;
+    border-left: 0.25rem solid #ffc107;
+}
+.card-panel.alert {
+    background-color: #ffebee;
+    border-left: 0.25rem solid #f44336;
+}
+.card-panel > :first-child {
+    margin-top: 0;
+}
+.card-panel > :last-child {
+    margin-bottom: 0;
+}
+.tabs .tab a {
+    color: inherit;
+}
+.tabs .tab a:hover, .tabs .tab a.active {
+    color: inherit;
+}
+.tabs .indicator {
+    height: 4px;
+    background-color: #18974c;
+}
+.tabs .tab a:focus, .tabs .tab a:focus.active {
+  /*background-color: rgba(0,123,83,0.2);*/
+    background-color: transparent;
+}
+input:not(.browser-default).invalid ~ .helper-text[data-error] > *,
+input:not(.browser-default).valid ~ .helper-text[data-success] > * {
+    /* hide nested elements */
+    display: none;
+}
+.modal-content > form:last-child {
+    margin-bottom: 0;
+}
+table + h6 {
+    margin-top: 2rem;
+}
+pre {
+    background-color: #f6f8fa;
+    border: 1px solid rgba(30, 30, 30, 0.1);
+    border-radius: 3px;
+    color: #24292F;
+    font-size: .85em;
+    padding: 8px 16px;
+}
+
+#loader {
+    position: fixed;
+    left: 0;
+    top: 0;
+    width: 100%;
+    height: 100%;
+    background: #eee;
+    padding: 5rem 0;
+    color: #333;
+    z-index: 99999;
+}
+input:not(.browser-default):focus:not([readonly]):not(.invalid) {
+    border-bottom: 1px solid #3489ca !important;
+    box-shadow: 0 1px 0 0 #3489ca !important;
+}
+input:not(.browser-default):focus:not([readonly]):not(.invalid) + label {
+    color: #3489ca !important;
+}
+.custom.blue { background-color: #3489ca !important; }
+
+[type="checkbox"].custom.blue.filled-in:checked + span:not(.lever)::after {
+    border: 2px solid #3489ca !important;
+    background-color: #3489ca !important;
+}
+
+::placeholder {
+    color: rgb(90, 95, 95);
+    opacity: 0.5;
+}
+
+/* Header */
+header {
+    border-bottom: 1px solid rgba(0,0,0,.14);
+}
+#top-nav {
+    background-color: inherit;
+    box-shadow: inherit;
+    color: #18974c;
+    height: 150px;
+    display: flex;
+    flex-direction: row;
+    align-items: center;
+    margin-bottom: 20px;
+}
+#title {
+    display: flex;
+    flex-direction: column;
+}
+#title h1 {
+    margin: 1rem 0 0;
+    width: 100%;
+    text-align: center;
+}
+#title h2 {
+    margin: 1rem 0 0;
+    width: 100%;
+    text-align: center;
+    font-size: 3rem;
+}
+#top-nav i {
+    font-size: 4.2rem;
+    width: 10%;
+    margin: 1rem;
+}
+header p {
+    color: rgba(0, 0, 0, .5);
+    margin: 0 0 1rem;
+    text-align: right;
+}
+
+/* Intro/Abstract */
+#intro > p:first-child {
+    font-weight: bold;
+    font-size: 110%;
+}
+
+/* Autocomplete highlight */
+.dropdown-content li > span {
+    color: #444;
+}
+.autocomplete-content li .highlight {
+    color: #26a69a;
+}
+
+#remove-user {
+    width: 100%;
+    height: 48px;
+}
+
+table + .pagination {
+    margin-top: 1rem;
+    text-align: center;
+}
+.pagination li {
+    /* Override Materialize */
+    vertical-align: auto;
+    height: auto;
+    margin: .25rem;
+}
+.pagination li.active {
+    background-color: #485fc7;
+    border-color: #485fc7;
+    color: #fff;
+}
+.pagination li a {
+    color: #363636;
+    font-size: 1rem;
+    height: auto;
+    line-height: normal;
+    min-width: 2.5em;
+    padding: .5rem;
+    user-select: none;
+}
+.pagination li:not(.ellipsis) {
+    border: 1px solid #dbdbdb;
+    border-radius: .375em;
+}
+
+.pagination li.ellipsis {
+    pointer-events: none;
+}
+
+thead th.sortable {
+    position: relative;
+    padding-right: 20px;
+}
+thead th.sortable:hover {
+    cursor: pointer;
+}
+thead th.sortable:hover::after {
+    opacity: .25;
+}
+thead th.sortable::after {
+    position: absolute;
+    display: inline-block;
+    opacity: .15;
+    right: 10px;
+    font-size: .8em;
+    content: "▲";
+}
+
+thead th.sortable.asc::after {
+    content: "▲";
+    opacity: 1;
+}
+
+thead th.sortable.desc::after {
+    content: "▼";
+    opacity: 1;
+}
+.table-search {
+    float: right;
+    width: 350px;
+    margin-bottom: 0;
+}
+.table-search > input[type="text"]:not(.browser-default) {
+    border: 1px solid #9e9e9e;
+    margin: 0;
+    padding-left: 20px;
+}
+.table-search > input[type="text"]:focus:not(.browser-default) {
+    border: 1px solid #3489ca !important;
+    box-shadow: none !important;
+}
+
+.table-of-contents a {
+    border-color: #4caf50 !important;
+}
+.table-of-contents a.active {
+    font-weight: bold;
+}
+.table-of-contents a ~ ul li {
+    line-height: 1;
+    /*padding: 0;*/
+}
+.table-of-contents a ~ ul li a {
+    font-size: .85em;
+    color: rgba(117, 117, 117, 0.75);
+    line-height: 1;
+    height: auto;
+}
+.table-of-contents a:not(.active) ~ ul {
+    /*display: none;*/
+}
+
+.highcharts-tooltip table > tbody > tr {
+    border: none !important;
+}
+
+.highcharts-tooltip table > tbody > tr > td {
+    padding: .25em .5em;
+}
+#user-info {
+    display: flex;
+    align-items: center;
+    margin-bottom: 1rem;
+    /*height: 100px;*/
+}
+#user-info > img {
+    height: 100px;
+    width: 100px;
+}
+#user-info > .content {
+    padding-left: 1.5rem;
+}
+#user-info > .content > h6 {
+    /*display: inline-block;*/
+    font-weight: 700;
+    margin: 0;
+}
+#user-info > .content > .block {
+    margin: .25em 0 .25em;
+    color: rgba(0,0,0,.6);
+}
+#faq .question {
+    font-weight: bold;
+    display: block;
+}
+.stats-summary .col i {
+    display: block;
+    font-size: 3rem;
+    margin-bottom: .5rem;
+}
+.stats-summary .col span:not([data-stat]) {
+    font-size: 1.15rem;
+}
+.stats-summary .col span[data-stat] {
+    display: block;
+    font-size: 1.5rem;
+    font-weight: 700;
+}
+#contact-email, #contact-slack {
+    white-space: nowrap;
+}
@@ -0,0 +1,125 @@
+
+import math
+from frontend.helpers import formatText_footprint, formatText_treemonths, formatText_flying
+import pandas as pd
+import os
+
+
+def formatText_driving(dist):
+    """
+    Format the text to display the driving distance
+    :param dist: [float] driving distance, in km
+    :return: [str] text to display
+    """
+    if dist < 10:
+        text_driving = f"driving {dist:,.2f} km"
+    else:
+        text_driving = f"driving {dist:,.0f} km"
+    return text_driving
+
+def generate_terminal_view(dict_stats_all, args, cluster_info):
+
+    user_here = dict_stats_all['user']
+    dict_stats = dict_stats_all['userActivity'][user_here]
+    text_nUsers = f"- user: {user_here} -"
+
+    ## various variables
+    clusterName = cluster_info['cluster_name']
+
+    ## energy
+    dcOverheads = dict_stats['energy'] - dict_stats['energy_CPUs'] - dict_stats['energy_GPUs'] - dict_stats['energy_memory']
+
+    ## Carbon footprint
+    text_footprint = formatText_footprint(dict_stats['carbonFootprint'])
+    text_footprint_failedJobs = formatText_footprint(dict_stats['carbonFootprint_failedJobs'])
+    text_footprint_wasted_memoryOverallocation = formatText_footprint(dict_stats['carbonFootprint']-dict_stats['carbonFootprint_memoryNeededOnly'])
+
+    ## Context
+    text_trees = formatText_treemonths(dict_stats['treeMonths'])
+    text_trees_failedJobs = formatText_treemonths(dict_stats['treeMonths_failedJobs'])
+    text_trees_wasted_memoryOverallocation = formatText_treemonths(dict_stats['treeMonths']-dict_stats['treeMonths_memoryNeededOnly'])
+    text_driving = formatText_driving(dict_stats['driving'])
+    text_flying = formatText_flying(dict_stats)
+
+    ### Text filterCWD
+    if args.filterWD is None:
+        text_filterCWD = ''
+    else:
+        text_filterCWD = f"\n        (NB: The only jobs considered here are those launched from {args.filterWD})\n"
+
+    ### Text filterJobIDs
+    if args.filterJobIDs == 'all':
+        text_filterJobIDs = ''
+    else:
+        text_filterJobIDs = f"\n        (NB: The only jobs considered here are those with job IDs: {args.filterJobIDs})\n"
+
+    ### Text filter Account
+    if args.filterAccount is None:
+        text_filterAccount = ''
+    else:
+        text_filterAccount = f"\n        (NB: The only jobs considered here are those charged under {args.filterAccount})\n"
+
+    ### To get the title length right
+    title_row1 = f"Carbon footprint on {clusterName}"
+    title_row2 = text_nUsers
+    title_row3 = f"({args.startDay} / {args.endDay})"
+    max_length = max([len(title_row1), len(title_row2), len(title_row3)])
+
+    title_row1_full = f"#  {' '*math.floor((max_length-len(title_row1))/2)}{title_row1}{' '*math.ceil((max_length-len(title_row1))/2)}  #"
+    title_row2_full = f"#  {' '*math.floor((max_length-len(title_row2))/2)}{title_row2}{' '*math.ceil((max_length-len(title_row2))/2)}  #"
+    title_row3_full = f"#  {' '*math.floor((max_length-len(title_row3))/2)}{title_row3}{' '*math.ceil((max_length-len(title_row3))/2)}  #"
+
+    title = f'''
+        {'#'*(max_length+6)}
+        #{' '*(max_length+4)}#
+        {title_row1_full}
+        {title_row2_full}
+        {title_row3_full}
+        #{' '*(max_length+4)}#
+        {'#'*(max_length+6)}
+    '''
+
+    return f'''
+      {title}
+      
+              {'-' * (len(text_footprint) + 6)}
+             |   {text_footprint}   |
+              {'-' * (len(text_footprint) + 6)}
+              
+    ...This is equivalent to:
+         - {text_trees}
+         - {text_driving}
+         - {text_flying}
+         
+    ...{dict_stats['failure_rate']:.1%} of the jobs failed, these represent a waste of {text_footprint_failedJobs} ({text_trees_failedJobs}).
+    ...On average, the jobs request at least {dict_stats['memoryOverallocationFactor']:,.1f} times the memory needed. By only requesting the memory needed, {text_footprint_wasted_memoryOverallocation} ({text_trees_wasted_memoryOverallocation}) could have been saved.
+    {text_filterCWD}{text_filterJobIDs}{text_filterAccount}
+    Energy used: {dict_stats['energy']:,.2f} kWh
+         - CPUs: {dict_stats['energy_CPUs']:,.2f} kWh ({dict_stats['energy_CPUs'] / dict_stats['energy']:.2%})
+         - GPUs: {dict_stats['energy_GPUs']:,.2f} kWh ({dict_stats['energy_GPUs'] / dict_stats['energy']:.2%})
+         - Memory: {dict_stats['energy_memory']:,.2f} kWh ({dict_stats['energy_memory'] / dict_stats['energy']:.2%})
+         - Data centre overheads: {dcOverheads:,.2f} kWh ({dcOverheads / dict_stats['energy']:.2%})
+    Carbon intensity used for the calculations: {cluster_info['CI']:,} gCO2e/kWh
+    
+    Summary of usage:
+    - First/last job recorded on that period: {str(dict_stats['first_job_period'].date())}/{str(dict_stats['last_job_period'].date())}
+    - Number of jobs: {dict_stats['n_jobs']:,} ({dict_stats['n_success']:,} completed)
+    - Core hours used/charged: {dict_stats['CPUhoursCharged']:,.1f} (CPU), {dict_stats['GPUhoursCharged']:,.1f} (GPU), {dict_stats['CPUhoursCharged']+dict_stats['GPUhoursCharged']:,.1f} (total).
+    - Total usage time (i.e. when cores were performing computations):
+        - CPU: {str(dict_stats['cpuTime'])} ({dict_stats['cpuTime'].total_seconds()/3600:,.0f} hours)
+        - GPU: {str(dict_stats['gpuTime'])} ({dict_stats['gpuTime'].total_seconds()/3600:,.0f} hours)
+    - Total wallclock time: {str(dict_stats['wallclockTime'])}
+    - Total memory requested: {dict_stats['memoryRequested']:,.0f} GB
+    
+    Limitations to keep in mind:
+         - The workload manager doesn't alway log the exact CPU usage time, and when this information is missing, we assume that all cores are used at 100%.
+         - For now, we assume that for GPU jobs, the GPUs are used at 100% (as the information needed for more accurate measurement is not available)
+         (this may lead to slightly overestimated carbon footprints, although the order of magnitude is likely to be correct)
+         - Conversely, the wasted energy due to memory overallocation may be largely underestimated, as the information needed is not always logged.
+
+    Any bugs, questions, suggestions? Post on GitHub (GreenAlgorithms/GreenAlgorithms4HPC) or email LL582@medschl.cam.ac.uk
+    {'-' * 80}
+    Calculated using the Green Algorithms framework: www.green-algorithms.org
+    Please cite https://onlinelibrary.wiley.com/doi/10.1002/advs.202100707 
+    '''
+
@@ -1,5 +1,7 @@
 #!/bin/bash
 
+# TODO update to new code. I think maybe best to have two script, one for the first install (creates virtual env, etc), and then a regular one
+
 ## ~~~ TO BE EDITED TO BE TAILORED TO THE CLUSTER ~~~
 ##
 ## You only need to edit the module loading line (l.13), make sure you are loading python 3.7 or greater.
@@ -43,4 +45,4 @@ echo "Python versions: OK"
 
 # Run the python code and pass on the arguments
 #userCWD="/home/ll582/ with space" # DEBUGONLY
-python GreenAlgorithms_global.py "$@" --userCWD "$userCWD"
+python __init__.py "$@" --userCWD "$userCWD"
@@ -1,6 +1,5 @@
-numpy==1.20.1
-pandas==1.2.3
-python-dateutil==2.8.1
-pytz==2021.1
-PyYAML==5.4.1
-six==1.15.0
+numpy==1.24
+pandas==2.0
+PyYAML==6.0
+jinja2==3.1
+plotly==5.18