Metomilo/filter_excel.py at main · fisa47/Metomilo · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
# -*- coding: utf-8 -*-
"""
Spyder Editor

This is a temporary script file.
"""

import pandas as pd
from shapely.wkt import loads
from shapely.geometry import Point


def convert_to_degrees_minutes(decimal_degrees):
    """
    Convert a latitude or longitude value from decimal degrees to degrees and minutes format.

    Args:
        decimal_degrees (float): The decimal degree value.

    Returns:
        str: The value in degrees and minutes format (DD.MM.MM).
    """
    degrees = int(decimal_degrees)
    minutes_float = abs(decimal_degrees - degrees) * 60
    minutes = int(minutes_float)
    return f"{degrees:02d}.{minutes_float:05.2f}"


def read_csv_to_dataframe(file_path):
    """
    Reads a .csv file into a pandas DataFrame, assuming the first row contains column names.

    Args:
        file_path (str): The path to the .csv file.

    Returns:
        pd.DataFrame: The loaded DataFrame.
    """
    try:
        # Read the CSV file into a DataFrame
        df = pd.read_csv(file_path)
        # Select only the required columns
        selected_columns = ['site_id', 'sector', 'site_x_utm33', 'site_y_utm33', 'outlet_x_utm33', 'outlet_y_utm33', 'regine', 'year', 'TOC_kg', 'lon', 'lat']
        df = df[selected_columns]
        print("CSV file successfully loaded.")
        return df
    except FileNotFoundError:
        print(f"Error: The file at '{file_path}' was not found.")
    except Exception as e:
        print(f"An error occurred: {e}")


def filter_points_within_polygon(df, polygon_wkt):
    """
    Filters DataFrame rows that fall within the given polygon WKT.

    Args:
        df (pd.DataFrame): The input DataFrame with 'site_x_utm33' and 'site_y_utm33' columns.
        polygon_wkt (str): The Well-Known Text (WKT) representation of the polygon.

    Returns:
        pd.DataFrame: Filtered DataFrame containing only points inside the polygon.
    """
    polygon = loads(polygon_wkt)
    df['geometry'] = df.apply(lambda row: Point(row['site_x_utm33'], row['site_y_utm33']), axis=1)
    df_filtered = df[df['geometry'].apply(lambda point: point.within(polygon))]
    df_filtered.drop(columns=['geometry'], inplace=True)

    return df_filtered

def filter_by_regine(df, regine_list):
    selection = dataframe[df['regine'].isin(regine_list)]
    return selection

if __name__ == "__main__":
    file_path = "filtered_Hardanger.csv"
    dataframe = read_csv_to_dataframe(file_path)
    if dataframe is not None:
        print(dataframe.head())

    # #Filter by REGINE
    # sel_regine = ['053.43', '053.42', '053.41', '053.3', '045.40', '046.11', '046.12', '046.21', '046.510', '046.52', '053.220']
    # selection = filter_by_regine(dataframe, sel_regine)

    poly = "POLYGON((-55656.384317704 6769310.0142572,-50195.373395682 6768463.3458972,-48375.036421675 6761647.6655991,-49475.705289679 6757541.3240531,-48713.703765676 6755551.6534071,-47147.36729967 6753180.9819991,-45792.697923664 6750937.3108451,-41093.688525646 6748481.9726011,-37368.347741631 6738533.619371,-42363.691065651 6734511.944661,-42871.692081653 6733072.608449,-45284.696907662 6731590.938819,-47782.368569673 6732437.607179,-51465.375935687 6745984.3009391,-51042.041755686 6748820.6399451,-52481.377967691 6749624.9748871,-54513.382031699 6758133.9919051,-55360.050391703 6761605.3321811,-55656.384317704 6769310.0142572))"
    selection = filter_points_within_polygon(dataframe, poly)

    selection = selection[selection['sector'].isin(['Aquaculture', 'Large wastewater'])]
    selection = selection.drop_duplicates(subset='site_id', keep='last').reset_index(drop=True)

    # selection['lat'] = selection['lat'].apply(convert_to_degrees_minutes)
    # selection['lon'] = selection['lon'].apply(convert_to_degrees_minutes)

    selection = selection.sort_values(by='TOC_kg')
    selection['cages'] = selection['sector'].apply(lambda x: 150 if x == 'Aquaculture' else 25) #might want to use other values

    #Rescaling Carbon
    largest_TOC = selection['TOC_kg'].iloc[-1]
    maximum_release = 100000 #might want a different value
    selection['TOC_kg'] = selection['TOC_kg'].apply(lambda x: maximum_release * (x/largest_TOC))

    cages = selection[['lat', 'lon', 'cages']]
    scaled_carbon = selection[['site_id', 'sector', 'TOC_kg', 'lat', 'lon']]
    cages.to_csv('Hjeltefjorden/cages.csv', index = False)
    scaled_carbon.to_csv('Hjeltefjorden/scaled_carbon.csv', index = False)