-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathfilter_excel.py
More file actions
executable file
·115 lines (81 loc) · 4.51 KB
/
filter_excel.py
File metadata and controls
executable file
·115 lines (81 loc) · 4.51 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
# -*- coding: utf-8 -*-
"""
Spyder Editor
This is a temporary script file.
"""
import pandas as pd
from shapely.wkt import loads
from shapely.geometry import Point
def convert_to_degrees_minutes(decimal_degrees):
"""
Convert a latitude or longitude value from decimal degrees to degrees and minutes format.
Args:
decimal_degrees (float): The decimal degree value.
Returns:
str: The value in degrees and minutes format (DD.MM.MM).
"""
degrees = int(decimal_degrees)
minutes_float = abs(decimal_degrees - degrees) * 60
minutes = int(minutes_float)
return f"{degrees:02d}.{minutes_float:05.2f}"
def read_csv_to_dataframe(file_path):
"""
Reads a .csv file into a pandas DataFrame, assuming the first row contains column names.
Args:
file_path (str): The path to the .csv file.
Returns:
pd.DataFrame: The loaded DataFrame.
"""
try:
# Read the CSV file into a DataFrame
df = pd.read_csv(file_path)
# Select only the required columns
selected_columns = ['site_id', 'sector', 'site_x_utm33', 'site_y_utm33', 'outlet_x_utm33', 'outlet_y_utm33', 'regine', 'year', 'TOC_kg', 'lon', 'lat']
df = df[selected_columns]
print("CSV file successfully loaded.")
return df
except FileNotFoundError:
print(f"Error: The file at '{file_path}' was not found.")
except Exception as e:
print(f"An error occurred: {e}")
def filter_points_within_polygon(df, polygon_wkt):
"""
Filters DataFrame rows that fall within the given polygon WKT.
Args:
df (pd.DataFrame): The input DataFrame with 'site_x_utm33' and 'site_y_utm33' columns.
polygon_wkt (str): The Well-Known Text (WKT) representation of the polygon.
Returns:
pd.DataFrame: Filtered DataFrame containing only points inside the polygon.
"""
polygon = loads(polygon_wkt)
df['geometry'] = df.apply(lambda row: Point(row['site_x_utm33'], row['site_y_utm33']), axis=1)
df_filtered = df[df['geometry'].apply(lambda point: point.within(polygon))]
df_filtered.drop(columns=['geometry'], inplace=True)
return df_filtered
def filter_by_regine(df, regine_list):
selection = dataframe[df['regine'].isin(regine_list)]
return selection
if __name__ == "__main__":
file_path = "filtered_Hardanger.csv"
dataframe = read_csv_to_dataframe(file_path)
if dataframe is not None:
print(dataframe.head())
# #Filter by REGINE
# sel_regine = ['053.43', '053.42', '053.41', '053.3', '045.40', '046.11', '046.12', '046.21', '046.510', '046.52', '053.220']
# selection = filter_by_regine(dataframe, sel_regine)
poly = "POLYGON((-55656.384317704 6769310.0142572,-50195.373395682 6768463.3458972,-48375.036421675 6761647.6655991,-49475.705289679 6757541.3240531,-48713.703765676 6755551.6534071,-47147.36729967 6753180.9819991,-45792.697923664 6750937.3108451,-41093.688525646 6748481.9726011,-37368.347741631 6738533.619371,-42363.691065651 6734511.944661,-42871.692081653 6733072.608449,-45284.696907662 6731590.938819,-47782.368569673 6732437.607179,-51465.375935687 6745984.3009391,-51042.041755686 6748820.6399451,-52481.377967691 6749624.9748871,-54513.382031699 6758133.9919051,-55360.050391703 6761605.3321811,-55656.384317704 6769310.0142572))"
selection = filter_points_within_polygon(dataframe, poly)
selection = selection[selection['sector'].isin(['Aquaculture', 'Large wastewater'])]
selection = selection.drop_duplicates(subset='site_id', keep='last').reset_index(drop=True)
# selection['lat'] = selection['lat'].apply(convert_to_degrees_minutes)
# selection['lon'] = selection['lon'].apply(convert_to_degrees_minutes)
selection = selection.sort_values(by='TOC_kg')
selection['cages'] = selection['sector'].apply(lambda x: 150 if x == 'Aquaculture' else 25) #might want to use other values
#Rescaling Carbon
largest_TOC = selection['TOC_kg'].iloc[-1]
maximum_release = 100000 #might want a different value
selection['TOC_kg'] = selection['TOC_kg'].apply(lambda x: maximum_release * (x/largest_TOC))
cages = selection[['lat', 'lon', 'cages']]
scaled_carbon = selection[['site_id', 'sector', 'TOC_kg', 'lat', 'lon']]
cages.to_csv('Hjeltefjorden/cages.csv', index = False)
scaled_carbon.to_csv('Hjeltefjorden/scaled_carbon.csv', index = False)