-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
add scripts to intersect db + some documentation
- Loading branch information
1 parent
ddfcb4c
commit 8d38d2c
Showing
7 changed files
with
330 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,3 @@ | ||
__pycache__/ | ||
.ipynb_checkpoints/ | ||
data/*/ |
Empty file.
Empty file.
Empty file.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,2 @@ | ||
import os.path as osp | ||
absPathData = osp.join(osp.dirname(__file__), '../data/') |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,196 @@ | ||
import geopandas as gpd | ||
import rasterio | ||
import rasterio.plot | ||
from rasterio.windows import from_bounds | ||
from rasterio.mask import mask | ||
import numpy as np | ||
from pyproj import Transformer | ||
from tqdm import tqdm | ||
import os.path as osp | ||
import json | ||
import glob | ||
from zipfile import ZipFile | ||
|
||
import warnings | ||
warnings.filterwarnings('ignore', module='pyproj') | ||
|
||
import dbExplorer | ||
|
||
|
||
def hasValidSpeed(glacier, tiff): | ||
pos = np.array(glacier.geometry.get_coordinates()) | ||
xmin = pos[:,0].min() | ||
xmax = pos[:,0].max() | ||
ymin = pos[:,1].min() | ||
ymax = pos[:,1].max() | ||
dx=xmax-xmin | ||
dy=ymax-ymin | ||
|
||
left = xmin-0.05*dx | ||
bottom = ymin-0.05*dy | ||
right = xmax+0.05*dx | ||
top = ymax+0.05*dy | ||
wd = from_bounds(left, bottom, right, top, tiff.transform) | ||
rst = tiff.read(1, window=wd) | ||
|
||
return (1-np.isnan(rst)).sum() | ||
|
||
def mapToEpsg(glacier, epsgNum): | ||
glacierMapped = glacier.to_crs("epsg:"+str(epsgNum)) | ||
|
||
cenlon, cenlat = np.array(glacierMapped.geometry.centroid.get_coordinates())[0] | ||
glacierMapped.cenlon = cenlon | ||
glacierMapped.cenlat = cenlat | ||
|
||
transformer = Transformer.from_crs("epsg:4326", "epsg:"+str(epsgNum)) # epsg:4326 is WGS 84 | ||
d = transformer.transform(glacier.termlat, glacier.termlon) | ||
glacierMapped.termlon = d[0][0] | ||
glacierMapped.termlat = d[1][0] | ||
return glacierMapped | ||
|
||
def getSpeed(tiff, glacier): | ||
out, _ = mask(tiff, glacier.geometry, invert=False, crop=True, filled=False) | ||
valInside = out[~out.mask].data | ||
valInside = valInside[~np.isnan(valInside)] | ||
return valInside | ||
|
||
|
||
areaIdToName = {1: 'alaska', 2: 'western_canada_usa', 3: 'arctic_canada_north', 4: 'arctic_canada_south', | ||
5: 'greenland_periphery', 6: 'iceland', 7: 'svalbard_jan_mayen', 8: 'scandinavia', | ||
9: 'russian_arctic', 10: 'north_asia', 11: 'central_europe', 12: 'caucasus_middle_east', | ||
13: 'central_asia', 14: 'south_asia_west', 15: 'south_asia_east', 16: 'low_latitudes', | ||
17: 'southern_andes', 18: 'new_zealand', 19: 'subantarctic_antarctic_islands'} | ||
|
||
def rgiShpFile(areaId): | ||
areaName = areaIdToName[areaId] | ||
return f'{dbExplorer.absPathData}/RGI/RGI2000-v7.0-G-{areaId:02d}_{areaName}/RGI2000-v7.0-G-{areaId:02d}_{areaName}.shp' | ||
|
||
def tiffFile(areaId): | ||
folder = f"{dbExplorer.absPathData}/Theia_annual_speed/velocity/RGI-{areaId}" | ||
if not osp.isdir(folder): | ||
if osp.exists(folder+'.zip'): | ||
print("Folder doesn't exist but zip was found. Extracting it...") | ||
with ZipFile(folder+'.zip', 'r') as zipFile: | ||
zipFile.extractall(osp.join(folder, '..')) | ||
else: | ||
raise FileNotFoundError(f"File {folder}.zip wasn't found") | ||
f = glob.glob(f"{folder}/V_RGI-*.tif") | ||
assert len(f)>0, "Tiff files don't seem to exist" | ||
if len(f)>1: | ||
return '.'.join(f[0].split('.')[:-2])+'.*_'+f[0].split('_')[-1] | ||
else: | ||
return f[0] | ||
|
||
class Region: | ||
def __init__(self, areaId): | ||
self.areaId = areaId | ||
self.areaName = areaIdToName[areaId] | ||
self.shpFileName = rgiShpFile(self.areaId) | ||
self.tiffFileName = tiffFile(self.areaId) | ||
self.gdf = gpd.read_file(self.shpFileName) | ||
self.glaciersOfInterest = self.gdf.copy() | ||
if '*' in self.tiffFileName: | ||
filenames = sorted(glob.glob(self.tiffFileName)) | ||
else: | ||
filenames = [self.tiffFileName] | ||
self.tiffs = [] | ||
for fn in filenames: | ||
self.tiffs.append(rasterio.open(fn)) | ||
self.associatedTiffNb = None | ||
|
||
def filterGlaciersOutsideTiff(self): | ||
associatedTiffNb = self.getAssociatedTiffDict() | ||
glaciersToRemove = [] | ||
for gid in tqdm(self.glaciersOfInterest.rgi_id): | ||
glacierWgs84 = self.glaciersOfInterest.loc[self.glaciersOfInterest.rgi_id==gid] | ||
tiffNb = associatedTiffNb[glacierWgs84['rgi_id'].item()] | ||
if tiffNb is None: | ||
glaciersToRemove.append(gid) | ||
continue | ||
print(f"{len(glaciersToRemove)=}") | ||
for gid in glaciersToRemove: | ||
self.glaciersOfInterest = self.glaciersOfInterest.drop(self.glaciersOfInterest[self.glaciersOfInterest.rgi_id==gid].index) | ||
|
||
def filterArea(self, thresArea=1): | ||
self.glaciersOfInterest = self.glaciersOfInterest[self.glaciersOfInterest['area_km2']>thresArea] | ||
|
||
def getAssociatedTiffDict(self): | ||
if self.associatedTiffNb is not None: return self.associatedTiffNb | ||
if len(self.tiffs)>1: | ||
fileName_associatedTiffNb = f"{dbExplorer.absPathData}/Theia_annual_speed/associatedTiffNb_{self.areaName}.json" | ||
if osp.exists(fileName_associatedTiffNb): | ||
with open(fileName_associatedTiffNb, "r") as infile: | ||
self.associatedTiffNb = json.load(infile) | ||
else: | ||
self.associatedTiffNb = {} | ||
for rgi_id in tqdm(self.gdf['rgi_id']): | ||
glacier = self.gdf.loc[self.gdf.rgi_id==rgi_id] | ||
self.associatedTiffNb[rgi_id] = self.findCorrespondingTiff(glacier) | ||
for k in self.associatedTiffNb: | ||
if isinstance(self.associatedTiffNb[k], np.int64): | ||
self.associatedTiffNb[k] = int(self.associatedTiffNb[k]) | ||
with open(fileName_associatedTiffNb, "w") as outfile: | ||
json.dump(self.associatedTiffNb, outfile) | ||
else: | ||
self.associatedTiffNb = {rgi_id: 0 for rgi_id in self.gdf['rgi_id']} | ||
return self.associatedTiffNb | ||
|
||
def findCorrespondingTiff(self, glacierWgs84): | ||
associatedTiffNb = [] | ||
for e,tiff in enumerate(self.tiffs): | ||
glacier = mapToEpsg(glacierWgs84, tiff.crs.to_string().split(':')[1]) | ||
x,y=np.array(glacier.geometry.centroid.get_coordinates())[0] | ||
bndx = (tiff.bounds.left, tiff.bounds.right) | ||
if bndx[0]>bndx[1]: | ||
bndx = bndx[::-1] | ||
bndy = (tiff.bounds.bottom, tiff.bounds.top) | ||
if bndy[0]>bndy[1]: | ||
bndy = bndy[::-1] | ||
if bndx[0]<x<bndx[1] and bndy[0]<y<bndy[1]: | ||
associatedTiffNb.append(e) | ||
if len(associatedTiffNb)==0: | ||
warnings.warn(f"Glacier {glacier['glac_name'].to_string()} with RGI ID {glacier['rgi_id'].to_string()} was not found in tiffs") | ||
return None | ||
if len(associatedTiffNb)>1: | ||
nbNonNanVoxels = [] | ||
for e in associatedTiffNb: | ||
nbNonNanVoxels.append( hasValidSpeed(glacier, self.tiffs[e]) ) | ||
valids = [e for i,e in enumerate(associatedTiffNb) if nbNonNanVoxels[i]>0] | ||
nbValid = len(valids) | ||
if nbValid==0: | ||
warnings.warn(f"Expected to find glacier {glacier['glac_name'].item()} with RGI ID {glacier['rgi_id'].item()} with non NaNs velocities in at least one tiff file but none of the files match") | ||
return None | ||
if nbValid>1: | ||
warnings.warn(f"Expected to find glacier {glacier['glac_name'].item()} with RGI ID {glacier['rgi_id'].item()} with non NaNs velocities in exactly one tiff file but {nbValid} files match. The one with the most non NaNs velocities is selected") | ||
return associatedTiffNb[np.argmax(nbNonNanVoxels)] | ||
else: | ||
associatedTiffNb = associatedTiffNb[0] | ||
return associatedTiffNb | ||
|
||
def getGlacier(self, gid): | ||
associatedTiffNb = self.getAssociatedTiffDict() | ||
glacierWgs84 = self.gdf.loc[self.gdf.rgi_id==gid] | ||
tiff = self.tiffs[associatedTiffNb[glacierWgs84['rgi_id'].item()]] | ||
epsgTiff = tiff.crs.to_string().split(':')[1] | ||
glacier = mapToEpsg(glacierWgs84, epsgTiff) | ||
return glacier | ||
|
||
def getSpeed(self, gid): | ||
associatedTiffNb = self.getAssociatedTiffDict() | ||
glacierWgs84 = self.gdf.loc[self.gdf.rgi_id==gid] | ||
tiff = self.tiffs[associatedTiffNb[glacierWgs84['rgi_id'].item()]] | ||
epsgTiff = tiff.crs.to_string().split(':')[1] | ||
glacier = mapToEpsg(glacierWgs84, epsgTiff) | ||
return getSpeed(tiff, glacier) | ||
|
||
def getGlacierInBox(self, leftLon, rightLon, topLat, bottomLat, useFiltered=False): | ||
rgi_ids = self.glaciersOfInterest.rgi_id if useFiltered else self.gdf.rgi_id | ||
glaciersInBox = [] | ||
for gid in tqdm(rgi_ids): | ||
glacierWgs84 = self.gdf.loc[self.gdf.rgi_id==gid] | ||
extremePts = np.array(glacierWgs84.geometry.get_coordinates()) | ||
for p in extremePts: | ||
if bottomLat<=p[1]<=topLat and leftLon<=p[0]<=rightLon: | ||
glaciersInBox.append(glacierWgs84) | ||
break | ||
return glaciersInBox |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,129 @@ | ||
import numpy as np | ||
import netCDF4 | ||
from osgeo import osr | ||
from tqdm import tqdm | ||
import rioxarray | ||
import rasterio | ||
import tempfile | ||
import glob | ||
import os | ||
import os.path as osp | ||
import subprocess | ||
import socket | ||
from itertools import product | ||
|
||
import warnings | ||
warnings.filterwarnings('ignore', module='rasterio') | ||
|
||
import dbExplorer | ||
|
||
validYears = {'new_zealand': [2017, 2018], 'alps': [2015, 2016, 2017, 2018, 2019, 2020, 2021, 2022]} | ||
|
||
def getServerFiles(area, period, user, year=None, copyLocal=False): | ||
assert area.lower() in validYears.keys() | ||
ret = [] | ||
if year is None: year = validYears[area.lower()] | ||
elif not isinstance(year, list): year = [year] | ||
if not isinstance(period, list): period = [period] | ||
fileYears = [] | ||
filePeriods = [] | ||
for y, p in product(year, period): | ||
assert y in validYears[area.lower()], f"Year {y} is not in the list of valid years {validYears[area.lower()]}" | ||
path = f'surface_flow_velocity/{area.upper()}/SENTINEL-2/{y}/{p}d/MOSAIC/cubes/' | ||
srcpath = f"/summer/ice_speed/{path}" | ||
if copyLocal: | ||
locpath = osp.join(dbExplorer.absPathData, path) | ||
if not osp.isdir(locpath) or len(glob.glob(locpath+'/*'))<=1: | ||
print("Syncing folder") | ||
os.makedirs(locpath, exist_ok=True) | ||
if 'bigfoot' in socket.gethostname(): | ||
subprocess.call(["rsync", "-avxHO", "--no-perms", srcpath, locpath]) | ||
else: | ||
subprocess.call(["rsync", "-avxHO", "--no-perms", f"{user}@cargo.univ-grenoble-alpes.fr:{srcpath}", locpath]) | ||
print("Syncing done") | ||
tmp = glob.glob(locpath+'/*') | ||
else: | ||
tmp = glob.glob(srcpath+'/*') | ||
ret += tmp | ||
fileYears += [y]*len(tmp) | ||
filePeriods += [p]*len(tmp) | ||
return ret, fileYears, filePeriods | ||
|
||
class DataCube: | ||
def __init__(self, filePath, mapping=None): | ||
self.filePath = filePath | ||
self.fnetcdf = netCDF4.Dataset(self.filePath, 'r') # netCDF is more efficient if we just need to retrieve the bounds | ||
self.mapping = mapping or self.fnetcdf['mapping'].getncattr("spatial_ref") | ||
self.date1 = self.fnetcdf['date1'][:].tolist() | ||
self.date2 = self.fnetcdf['date2'][:].tolist() | ||
|
||
p1 = osr.SpatialReference() | ||
p1.ImportFromEPSG(4326) | ||
p2 = osr.SpatialReference() | ||
p2.ImportFromWkt(self.mapping) | ||
|
||
self.transformLocToWgs = osr.CoordinateTransformation(p2, p1) | ||
self.bottomlat, self.leftlon, _ = (self.transformLocToWgs.TransformPoint(np.min(self.fnetcdf['x'][:]).item(), np.min(self.fnetcdf['y'][:]).item())) | ||
self.toplat, self.rightlon, _ = (self.transformLocToWgs.TransformPoint(np.max(self.fnetcdf['x'][:]).item(), np.max(self.fnetcdf['y'][:]).item())) | ||
|
||
def readAsTiff(self, drop_vars=[]): | ||
self.rds = rioxarray.open_rasterio(self.filePath) | ||
if isinstance(self.rds, list): # Datacube corresponds to raw velocities | ||
tmp = self.rds[0] | ||
self.x = tmp.x.to_numpy() | ||
self.y = tmp.y.to_numpy() | ||
|
||
ret = self.transformLocToWgs.TransformPoints(np.stack([tmp.x,tmp.y]).T) | ||
self.Ywgs, self.Xwgs, _ = np.array(ret).T | ||
tmp['x'] = self.Xwgs | ||
tmp['y'] = self.Ywgs | ||
|
||
self.tiffs = {} | ||
for bandId in tqdm(range(len(tmp.band))): | ||
tmpName = tempfile.NamedTemporaryFile().name+".tif" | ||
tmp.isel(band=bandId).rio.to_raster(tmpName) | ||
julianTime = self.date1[bandId] | ||
self.tiffs[julianTime] = rasterio.open(tmpName) | ||
|
||
else: # Datacube corresponds to interpolated velocities | ||
tmp = self.rds | ||
self.x = tmp.x.to_numpy() | ||
self.y = tmp.y.to_numpy() | ||
|
||
ret = self.transformLocToWgs.TransformPoints(np.stack([tmp.x,tmp.y]).T) | ||
self.Ywgs, self.Xwgs, _ = np.array(ret).T | ||
tmp['x'] = self.Xwgs | ||
tmp['y'] = self.Ywgs | ||
|
||
self.tiffs = {} | ||
for e in tqdm(range(len(tmp.mid_date))): | ||
tmpName = tempfile.NamedTemporaryFile().name+".tif" | ||
tmp.isel(mid_date=e).drop_vars(drop_vars).rio.to_raster(tmpName) | ||
julianTime = self.date1[e] | ||
self.tiffs[julianTime] = rasterio.open(tmpName) | ||
|
||
def getSpeeds(self): | ||
return self.fnetcdf['vx'], self.fnetcdf['vy'] | ||
|
||
class Glacier: | ||
def __init__(self, glacier, files, sortValues=None): | ||
# glacier: glacier instance from netCDF | ||
self.glacier = glacier | ||
self.files = files | ||
self.dcContainingGlacier = None | ||
self.sortedIndex = np.argsort(sortValues) if sortValues is not None else range(len(self.files)) | ||
def getDataCubes(self, mapping=None, drop_vars=[]): | ||
if self.dcContainingGlacier is not None: | ||
return self.dcContainingGlacier | ||
self.dcContainingGlacier = [] | ||
extremePts = np.array(self.glacier.geometry.get_coordinates()) | ||
for idx in tqdm(reversed(self.sortedIndex)): | ||
f = self.files[idx] | ||
dc = DataCube(f, mapping=mapping) | ||
for p in extremePts: | ||
if dc.bottomlat<=p[1]<=dc.toplat and dc.leftlon<=p[0]<=dc.rightlon: | ||
self.dcContainingGlacier.append(dc) | ||
break | ||
for dc in self.dcContainingGlacier: | ||
dc.readAsTiff(drop_vars=drop_vars) | ||
return self.dcContainingGlacier |