-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathArgo_gridding_ANHA4.py
More file actions
160 lines (142 loc) · 6.22 KB
/
Argo_gridding_ANHA4.py
File metadata and controls
160 lines (142 loc) · 6.22 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
# Creates netcdf of Argo data from mixedlayer.ucsd.edu
# Puts mld on the ANHA4 grid
# Uses the algorithm and threshold mlds
# Rowan Brown
# July 2025
import xarray as xr
import numpy as np
from datetime import datetime as dt
from datetime import timedelta as td
def regrid_Argo(region="LabSea"):
"""Regrids the ARGO MLD data so that it can be compared to
ANHA4 output.
region is a string ("LabSea" or "NorthAtlantic") referring to
whether I am looking at the Lab Sea only or the wider North
Atlantic."""
# Open netCDF files (argo data from mixedlayer.ucoloursd.edu)
ds = xr.open_dataset('Argo_mixedlayers_all_04142022.nc')
mesh = xr.open_dataset('masks/mesh_hgr_ANHA4.nc') # standard ANHA4 grid
# Do we want to mask the general Lab Sea region
if region == "LabSea":
# If we don't want the full NA, we might as well save time now
ds = ds.where(
(ds.profilelat > 50) & (ds.profilelat < 65) &
(ds.profilelon < -45) & (ds.profilelon > -65),
drop=True
)
mesh = mesh.where(
(mesh.x > 100) & (mesh.x < 250) &
(mesh.y > 300) & (mesh.y < 500),
drop=True
)
elif region == "NorthAtlantic":
# If we do want the wider NA, we can only exclude certain points
# Define NA as much larger than the LS, more like
# equator <-> Fram Strait and Gulf of Mexico <-> Baltic
# We also only need 01.01.2008--31.12.2017, so might as well cut that
# Note the Argo profiledates are saved as days since Jan-1-0000
# Also note that we'll properly mask the NA later
ds = ds.where(
(ds.profilelat > -1) & (ds.profilelat < 81) &
(ds.profilelon < 15) & (ds.profilelon > -100),
drop=True
)
# First legal dt is Jan 1, 0001
# Basically converts into a td from 1-1-0000
start_date = (
dt(2008, 1, 1, 0, 0, 0) - dt(1, 1, 1, 0, 0, 0) + td(days=365)
)
end_date = (
dt(2018, 1, 1, 0, 0, 0) - dt(1, 1, 1, 0, 0, 0) + td(days=365)
)
ds = ds.where(
(ds.profiledate > start_date.days) &
(ds.profiledate < end_date.days),
drop=True
)
else:
print("region string is illegal; must be either "
"'LabSea' or 'NorthAtlantic'")
quit()
print("General region masked")
# Binning the ARGO data in time (based on matlab datevec, I suppose)
# Jan 1, year 1 (need to subtract 365 w/r/t profiledate, which measures
# from year 0)
start_dt = dt(1, 1, 1, 0, 0, 0)
profiledates = ds.profiledate.to_numpy() # days from Jan 1, year 0
# Function for binning every 5 days:
def rounded_profiledate(profiledate): return 5*round(profiledate/5)
rounded_profiledates = [
rounded_profiledate(profiledate)
for profiledate
in profiledates
]
# Function for converting to dt objects:
def dtdate(profiledate): return start_dt + td(days=(profiledate-365))
dtdates = [
dtdate(rounded_profiledate)
for rounded_profiledate
in rounded_profiledates
]
# Initializing output dataset
ARGO = mesh[['nav_lat', 'nav_lon']] # Starting off using the grid coords
ARGO.attrs['TimeStamp'] = 'August 2025'
ARGO.attrs['file_name'] = 'ARGO_mld_ANHA4_'+region
ARGO.attrs['description'] = 'Argo MLD data on the ANHA4 grid in '+region
ARGO.attrs['variables'] = 'Density algorithm and density threshold'
ARGO.attrs['source'] = 'http://mixedlayer.ucoloursd.edu'
datecoord = sorted(np.unique(dtdates)) # Creating coordinate from dates
ARGO = ARGO.assign_coords({'date': datecoord})
# Init the important variables
for var in ['da_mld', 'dt_mld', 'num_profiles']: # Initializing mld vars
ARGO[var] = (
['y', 'x', 'date'],
np.empty((mesh.sizes['y'], mesh.sizes['x'], ARGO.sizes['date']))
)
# Loading into memory
grid_lats = mesh.nav_lat.to_numpy()
grid_lons = mesh.nav_lon.to_numpy()
# Looping through and populating ARGO output dataset, slow but works
print("Beginning loop through Argo points")
for i in range(ds.sizes['iNPROF']): # Go through each Argo data point
# Load the lat-lon coordinate for the Argo data point
lat = ds.profilelat.isel(iNPROF=i).to_numpy()
lon = ds.profilelon.isel(iNPROF=i).to_numpy()
# Finding the "distance" between the the Argo point and each grid cell
abslat = np.abs(grid_lats - lat)
abslon = np.abs(grid_lons - lon)
distances = (abslat**2 + abslon**2)**0.5
# Finding the shortest distance and the closest grid cell
shortest_distance = np.nanmin(distances)
try:
[idy], [idx] = np.where(distances == shortest_distance)
except ValueError:
# If except it's because the point is equidistant to two cells
ids = np.where(distances == shortest_distance)
idy = ids[0][0]
idx = ids[1][0]
# Loading the number of Argo profiles in the same cell during
# the same period
count = ARGO['num_profiles'].loc[
dict(x=idx, y=idy, date=dtdates[i])
].to_numpy()
# Save the MLDs
# Either adding the mld to the appropr cell and date or taking the avg
# with any other profiles that are already saved in that cell and date
# Do it this way to avoid over-weighting areas that are highly sampled
for mld in ['da_mld', 'dt_mld']: # (both variables)
old_value = ARGO[mld].loc[dict(x=idx, y=idy, date=dtdates[i])]
ARGO[mld].loc[
dict(x=idx, y=idy, date=dtdates[i])
] = (old_value*count + ds[mld][i])/(count+1)
# Incrementing the count to keep track of any cells and dates with
# multiple data points
ARGO['num_profiles'].loc[dict(x=idx, y=idy, date=dtdates[i])] += 1
print(str(dtdates[i]) + ' done')
# Save
ARGO.to_netcdf('Argo_mld_ANHA4_'+region+'.nc')
print('Argo MLD data saved for the '+region)
# Note if the region is the whole NA, the resultant file is around 7G
if __name__ == "__main__":
regrid_Argo(region="LabSea")
# regrid_Argo(region="NorthAtlantic")