Skip to content

Commit

Permalink
add scripts to intersect db + some documentation
Browse files Browse the repository at this point in the history
  • Loading branch information
albangossard committed Jan 13, 2025
1 parent ddfcb4c commit 8d38d2c
Show file tree
Hide file tree
Showing 7 changed files with 330 additions and 0 deletions.
3 changes: 3 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
__pycache__/
.ipynb_checkpoints/
data/*/
Empty file added data/RGI/.gitkeep
Empty file.
Empty file.
Empty file.
2 changes: 2 additions & 0 deletions dbExplorer/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
import os.path as osp
absPathData = osp.join(osp.dirname(__file__), '../data/')
196 changes: 196 additions & 0 deletions dbExplorer/rgi.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,196 @@
import geopandas as gpd
import rasterio
import rasterio.plot
from rasterio.windows import from_bounds
from rasterio.mask import mask
import numpy as np
from pyproj import Transformer
from tqdm import tqdm
import os.path as osp
import json
import glob
from zipfile import ZipFile

import warnings
warnings.filterwarnings('ignore', module='pyproj')

import dbExplorer


def hasValidSpeed(glacier, tiff):
pos = np.array(glacier.geometry.get_coordinates())
xmin = pos[:,0].min()
xmax = pos[:,0].max()
ymin = pos[:,1].min()
ymax = pos[:,1].max()
dx=xmax-xmin
dy=ymax-ymin

left = xmin-0.05*dx
bottom = ymin-0.05*dy
right = xmax+0.05*dx
top = ymax+0.05*dy
wd = from_bounds(left, bottom, right, top, tiff.transform)
rst = tiff.read(1, window=wd)

return (1-np.isnan(rst)).sum()

def mapToEpsg(glacier, epsgNum):
glacierMapped = glacier.to_crs("epsg:"+str(epsgNum))

cenlon, cenlat = np.array(glacierMapped.geometry.centroid.get_coordinates())[0]
glacierMapped.cenlon = cenlon
glacierMapped.cenlat = cenlat

transformer = Transformer.from_crs("epsg:4326", "epsg:"+str(epsgNum)) # epsg:4326 is WGS 84
d = transformer.transform(glacier.termlat, glacier.termlon)
glacierMapped.termlon = d[0][0]
glacierMapped.termlat = d[1][0]
return glacierMapped

def getSpeed(tiff, glacier):
out, _ = mask(tiff, glacier.geometry, invert=False, crop=True, filled=False)
valInside = out[~out.mask].data
valInside = valInside[~np.isnan(valInside)]
return valInside


areaIdToName = {1: 'alaska', 2: 'western_canada_usa', 3: 'arctic_canada_north', 4: 'arctic_canada_south',
5: 'greenland_periphery', 6: 'iceland', 7: 'svalbard_jan_mayen', 8: 'scandinavia',
9: 'russian_arctic', 10: 'north_asia', 11: 'central_europe', 12: 'caucasus_middle_east',
13: 'central_asia', 14: 'south_asia_west', 15: 'south_asia_east', 16: 'low_latitudes',
17: 'southern_andes', 18: 'new_zealand', 19: 'subantarctic_antarctic_islands'}

def rgiShpFile(areaId):
areaName = areaIdToName[areaId]
return f'{dbExplorer.absPathData}/RGI/RGI2000-v7.0-G-{areaId:02d}_{areaName}/RGI2000-v7.0-G-{areaId:02d}_{areaName}.shp'

def tiffFile(areaId):
folder = f"{dbExplorer.absPathData}/Theia_annual_speed/velocity/RGI-{areaId}"
if not osp.isdir(folder):
if osp.exists(folder+'.zip'):
print("Folder doesn't exist but zip was found. Extracting it...")
with ZipFile(folder+'.zip', 'r') as zipFile:
zipFile.extractall(osp.join(folder, '..'))
else:
raise FileNotFoundError(f"File {folder}.zip wasn't found")
f = glob.glob(f"{folder}/V_RGI-*.tif")
assert len(f)>0, "Tiff files don't seem to exist"
if len(f)>1:
return '.'.join(f[0].split('.')[:-2])+'.*_'+f[0].split('_')[-1]
else:
return f[0]

class Region:
def __init__(self, areaId):
self.areaId = areaId
self.areaName = areaIdToName[areaId]
self.shpFileName = rgiShpFile(self.areaId)
self.tiffFileName = tiffFile(self.areaId)
self.gdf = gpd.read_file(self.shpFileName)
self.glaciersOfInterest = self.gdf.copy()
if '*' in self.tiffFileName:
filenames = sorted(glob.glob(self.tiffFileName))
else:
filenames = [self.tiffFileName]
self.tiffs = []
for fn in filenames:
self.tiffs.append(rasterio.open(fn))
self.associatedTiffNb = None

def filterGlaciersOutsideTiff(self):
associatedTiffNb = self.getAssociatedTiffDict()
glaciersToRemove = []
for gid in tqdm(self.glaciersOfInterest.rgi_id):
glacierWgs84 = self.glaciersOfInterest.loc[self.glaciersOfInterest.rgi_id==gid]
tiffNb = associatedTiffNb[glacierWgs84['rgi_id'].item()]
if tiffNb is None:
glaciersToRemove.append(gid)
continue
print(f"{len(glaciersToRemove)=}")
for gid in glaciersToRemove:
self.glaciersOfInterest = self.glaciersOfInterest.drop(self.glaciersOfInterest[self.glaciersOfInterest.rgi_id==gid].index)

def filterArea(self, thresArea=1):
self.glaciersOfInterest = self.glaciersOfInterest[self.glaciersOfInterest['area_km2']>thresArea]

def getAssociatedTiffDict(self):
if self.associatedTiffNb is not None: return self.associatedTiffNb
if len(self.tiffs)>1:
fileName_associatedTiffNb = f"{dbExplorer.absPathData}/Theia_annual_speed/associatedTiffNb_{self.areaName}.json"
if osp.exists(fileName_associatedTiffNb):
with open(fileName_associatedTiffNb, "r") as infile:
self.associatedTiffNb = json.load(infile)
else:
self.associatedTiffNb = {}
for rgi_id in tqdm(self.gdf['rgi_id']):
glacier = self.gdf.loc[self.gdf.rgi_id==rgi_id]
self.associatedTiffNb[rgi_id] = self.findCorrespondingTiff(glacier)
for k in self.associatedTiffNb:
if isinstance(self.associatedTiffNb[k], np.int64):
self.associatedTiffNb[k] = int(self.associatedTiffNb[k])
with open(fileName_associatedTiffNb, "w") as outfile:
json.dump(self.associatedTiffNb, outfile)
else:
self.associatedTiffNb = {rgi_id: 0 for rgi_id in self.gdf['rgi_id']}
return self.associatedTiffNb

def findCorrespondingTiff(self, glacierWgs84):
associatedTiffNb = []
for e,tiff in enumerate(self.tiffs):
glacier = mapToEpsg(glacierWgs84, tiff.crs.to_string().split(':')[1])
x,y=np.array(glacier.geometry.centroid.get_coordinates())[0]
bndx = (tiff.bounds.left, tiff.bounds.right)
if bndx[0]>bndx[1]:
bndx = bndx[::-1]
bndy = (tiff.bounds.bottom, tiff.bounds.top)
if bndy[0]>bndy[1]:
bndy = bndy[::-1]
if bndx[0]<x<bndx[1] and bndy[0]<y<bndy[1]:
associatedTiffNb.append(e)
if len(associatedTiffNb)==0:
warnings.warn(f"Glacier {glacier['glac_name'].to_string()} with RGI ID {glacier['rgi_id'].to_string()} was not found in tiffs")
return None
if len(associatedTiffNb)>1:
nbNonNanVoxels = []
for e in associatedTiffNb:
nbNonNanVoxels.append( hasValidSpeed(glacier, self.tiffs[e]) )
valids = [e for i,e in enumerate(associatedTiffNb) if nbNonNanVoxels[i]>0]
nbValid = len(valids)
if nbValid==0:
warnings.warn(f"Expected to find glacier {glacier['glac_name'].item()} with RGI ID {glacier['rgi_id'].item()} with non NaNs velocities in at least one tiff file but none of the files match")
return None
if nbValid>1:
warnings.warn(f"Expected to find glacier {glacier['glac_name'].item()} with RGI ID {glacier['rgi_id'].item()} with non NaNs velocities in exactly one tiff file but {nbValid} files match. The one with the most non NaNs velocities is selected")
return associatedTiffNb[np.argmax(nbNonNanVoxels)]
else:
associatedTiffNb = associatedTiffNb[0]
return associatedTiffNb

def getGlacier(self, gid):
associatedTiffNb = self.getAssociatedTiffDict()
glacierWgs84 = self.gdf.loc[self.gdf.rgi_id==gid]
tiff = self.tiffs[associatedTiffNb[glacierWgs84['rgi_id'].item()]]
epsgTiff = tiff.crs.to_string().split(':')[1]
glacier = mapToEpsg(glacierWgs84, epsgTiff)
return glacier

def getSpeed(self, gid):
associatedTiffNb = self.getAssociatedTiffDict()
glacierWgs84 = self.gdf.loc[self.gdf.rgi_id==gid]
tiff = self.tiffs[associatedTiffNb[glacierWgs84['rgi_id'].item()]]
epsgTiff = tiff.crs.to_string().split(':')[1]
glacier = mapToEpsg(glacierWgs84, epsgTiff)
return getSpeed(tiff, glacier)

def getGlacierInBox(self, leftLon, rightLon, topLat, bottomLat, useFiltered=False):
rgi_ids = self.glaciersOfInterest.rgi_id if useFiltered else self.gdf.rgi_id
glaciersInBox = []
for gid in tqdm(rgi_ids):
glacierWgs84 = self.gdf.loc[self.gdf.rgi_id==gid]
extremePts = np.array(glacierWgs84.geometry.get_coordinates())
for p in extremePts:
if bottomLat<=p[1]<=topLat and leftLon<=p[0]<=rightLon:
glaciersInBox.append(glacierWgs84)
break
return glaciersInBox
129 changes: 129 additions & 0 deletions dbExplorer/velocitiesDataCube.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,129 @@
import numpy as np
import netCDF4
from osgeo import osr
from tqdm import tqdm
import rioxarray
import rasterio
import tempfile
import glob
import os
import os.path as osp
import subprocess
import socket
from itertools import product

import warnings
warnings.filterwarnings('ignore', module='rasterio')

import dbExplorer

validYears = {'new_zealand': [2017, 2018], 'alps': [2015, 2016, 2017, 2018, 2019, 2020, 2021, 2022]}

def getServerFiles(area, period, user, year=None, copyLocal=False):
assert area.lower() in validYears.keys()
ret = []
if year is None: year = validYears[area.lower()]
elif not isinstance(year, list): year = [year]
if not isinstance(period, list): period = [period]
fileYears = []
filePeriods = []
for y, p in product(year, period):
assert y in validYears[area.lower()], f"Year {y} is not in the list of valid years {validYears[area.lower()]}"
path = f'surface_flow_velocity/{area.upper()}/SENTINEL-2/{y}/{p}d/MOSAIC/cubes/'
srcpath = f"/summer/ice_speed/{path}"
if copyLocal:
locpath = osp.join(dbExplorer.absPathData, path)
if not osp.isdir(locpath) or len(glob.glob(locpath+'/*'))<=1:
print("Syncing folder")
os.makedirs(locpath, exist_ok=True)
if 'bigfoot' in socket.gethostname():
subprocess.call(["rsync", "-avxHO", "--no-perms", srcpath, locpath])
else:
subprocess.call(["rsync", "-avxHO", "--no-perms", f"{user}@cargo.univ-grenoble-alpes.fr:{srcpath}", locpath])
print("Syncing done")
tmp = glob.glob(locpath+'/*')
else:
tmp = glob.glob(srcpath+'/*')
ret += tmp
fileYears += [y]*len(tmp)
filePeriods += [p]*len(tmp)
return ret, fileYears, filePeriods

class DataCube:
def __init__(self, filePath, mapping=None):
self.filePath = filePath
self.fnetcdf = netCDF4.Dataset(self.filePath, 'r') # netCDF is more efficient if we just need to retrieve the bounds
self.mapping = mapping or self.fnetcdf['mapping'].getncattr("spatial_ref")
self.date1 = self.fnetcdf['date1'][:].tolist()
self.date2 = self.fnetcdf['date2'][:].tolist()

p1 = osr.SpatialReference()
p1.ImportFromEPSG(4326)
p2 = osr.SpatialReference()
p2.ImportFromWkt(self.mapping)

self.transformLocToWgs = osr.CoordinateTransformation(p2, p1)
self.bottomlat, self.leftlon, _ = (self.transformLocToWgs.TransformPoint(np.min(self.fnetcdf['x'][:]).item(), np.min(self.fnetcdf['y'][:]).item()))
self.toplat, self.rightlon, _ = (self.transformLocToWgs.TransformPoint(np.max(self.fnetcdf['x'][:]).item(), np.max(self.fnetcdf['y'][:]).item()))

def readAsTiff(self, drop_vars=[]):
self.rds = rioxarray.open_rasterio(self.filePath)
if isinstance(self.rds, list): # Datacube corresponds to raw velocities
tmp = self.rds[0]
self.x = tmp.x.to_numpy()
self.y = tmp.y.to_numpy()

ret = self.transformLocToWgs.TransformPoints(np.stack([tmp.x,tmp.y]).T)
self.Ywgs, self.Xwgs, _ = np.array(ret).T
tmp['x'] = self.Xwgs
tmp['y'] = self.Ywgs

self.tiffs = {}
for bandId in tqdm(range(len(tmp.band))):
tmpName = tempfile.NamedTemporaryFile().name+".tif"
tmp.isel(band=bandId).rio.to_raster(tmpName)
julianTime = self.date1[bandId]
self.tiffs[julianTime] = rasterio.open(tmpName)

else: # Datacube corresponds to interpolated velocities
tmp = self.rds
self.x = tmp.x.to_numpy()
self.y = tmp.y.to_numpy()

ret = self.transformLocToWgs.TransformPoints(np.stack([tmp.x,tmp.y]).T)
self.Ywgs, self.Xwgs, _ = np.array(ret).T
tmp['x'] = self.Xwgs
tmp['y'] = self.Ywgs

self.tiffs = {}
for e in tqdm(range(len(tmp.mid_date))):
tmpName = tempfile.NamedTemporaryFile().name+".tif"
tmp.isel(mid_date=e).drop_vars(drop_vars).rio.to_raster(tmpName)
julianTime = self.date1[e]
self.tiffs[julianTime] = rasterio.open(tmpName)

def getSpeeds(self):
return self.fnetcdf['vx'], self.fnetcdf['vy']

class Glacier:
def __init__(self, glacier, files, sortValues=None):
# glacier: glacier instance from netCDF
self.glacier = glacier
self.files = files
self.dcContainingGlacier = None
self.sortedIndex = np.argsort(sortValues) if sortValues is not None else range(len(self.files))
def getDataCubes(self, mapping=None, drop_vars=[]):
if self.dcContainingGlacier is not None:
return self.dcContainingGlacier
self.dcContainingGlacier = []
extremePts = np.array(self.glacier.geometry.get_coordinates())
for idx in tqdm(reversed(self.sortedIndex)):
f = self.files[idx]
dc = DataCube(f, mapping=mapping)
for p in extremePts:
if dc.bottomlat<=p[1]<=dc.toplat and dc.leftlon<=p[0]<=dc.rightlon:
self.dcContainingGlacier.append(dc)
break
for dc in self.dcContainingGlacier:
dc.readAsTiff(drop_vars=drop_vars)
return self.dcContainingGlacier

0 comments on commit 8d38d2c

Please sign in to comment.