forked from simonpf/chimp_smhi
-
Notifications
You must be signed in to change notification settings - Fork 0
/
hritfiles2chimp.py
166 lines (133 loc) · 4.81 KB
/
hritfiles2chimp.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
"""
hritfiles2chimp
==========
Preprocessing script to convert High Rate Seviri images to CHIMP SEVIRI
input data.
"""
from datetime import datetime, timedelta
import logging
from pathlib import Path
from typing import Dict, List
import numpy as np
from satpy import Scene
from satpy.cf.area import area2cf
import xarray as xr
from chimp.areas import NORDICS_4
from chimp.data.seviri import CHANNEL_CONFIGURATIONS
from chimp.data.utils import get_output_filename
logging.basicConfig(level="INFO", force=True)
LOGGER = logging.getLogger(__name__)
def sort_hrit_files(hrit_files: List[Path]) -> Dict[datetime, List[Path]]:
"""
Sort HRIT files by time.
Args:
path: A path object pointing to the directory containing SEVIRI
observation in HRIT format.
Return:
A dictionary mapping datetime objects to the corresponding observation
files.
"""
files = sorted(list(hrit_files))
files_sorted = {}
for path in files:
filename = path.name
date = datetime.strptime(filename[-15:-3], "%Y%m%d%H%M")
files_sorted.setdefault(date, []).append(path)
return files_sorted
def load_and_resample_data(hrit_files: List[Path]) -> xr.Dataset:
"""
Loads and resamples and combines seviri data into a single xarray.Dataset.
Args:
hrit_files: List of SEVIRI files for a given time step.
Return:
An xarray.Dataset containing SEVIRI observations resampled to the BALTRAD
domain and combined into a single observation dataset.
"""
datasets = CHANNEL_CONFIGURATIONS["all"]
scene = Scene(hrit_files, reader="seviri_l1b_hrit")
scene.load(datasets)
scene_r = scene.resample(NORDICS_4, radius_of_influence=4e3)
obs = []
# names = []
for name in datasets:
obs.append(scene_r[name].compute().data)
# acq_time = scene[datasets[0]].compute().acq_time.mean().data
obs = np.stack(obs, -1)
data = xr.Dataset({"obs": (("y", "x", "channels"), obs)})
return data
def process(model, input_datasets, input_path, output_path, device="cuda", precision="single", verbose=0):
from chimp.processing import InputLoader, load_model, torch, retrieval_step, to_datetime
input_data = InputLoader(input_path, input_datasets)
model = load_model(model)
output_path = Path(output_path)
if not output_path.exists():
output_path.mkdir(parents=True)
if verbose > 0:
logging.basicConfig(level="INFO", force=True)
if precision == "single":
float_type = torch.float32
else:
float_type = torch.bfloat16
for time, model_input in input_data:
LOGGER.info("Starting processing input @ %s", time)
results = retrieval_step(
model,
model_input,
tile_size=128,
device=device,
float_type=float_type
)
LOGGER.info("Finished processing input @ %s", time)
results["time"] = time.astype("datetime64[ns]")
date = to_datetime(time)
date_str = date.strftime("%Y%m%d_%H_%M")
output_file = output_path / f"chimp_{date_str}.nc"
LOGGER.info("Writing results to %s", output_file)
add_grid_mapping(results)
results.to_netcdf(output_file)
def add_grid_mapping(results):
"""Add the grid mapping and geographic coordinates to the Dataset."""
area = NORDICS_4
results.attrs["area"] = area
gm, _ = area2cf(results)
results["grid_mapping"] = gm
del results.attrs["area"]
results.coords["x"], results.coords["y"] = area.get_proj_vectors()
if __name__ == '__main__':
import argparse
import tempfile
parser = argparse.ArgumentParser()
parser.add_argument(
"input_files",
nargs="*",
type=Path,
help="Directory containing the SEVIRI input observations in HRIT format."
)
parser.add_argument(
"-o",
"--output-path",
type=Path,
help="Directory to which to write the 'chimp' input files.",
default="/tmp/chimp"
)
parser.add_argument(
"-m",
"--model-file",
type=str,
help="Model file"
)
args = parser.parse_args()
input_files = args.input_files
model_file = args.model_file
files = sort_hrit_files(input_files)
LOGGER.info("Found input files.")
for time, hrit_files in files.items():
output_filename = get_output_filename("seviri", time, timedelta(minutes=15))
with tempfile.TemporaryDirectory() as td:
temp_path = Path(td)
output_path = temp_path / "seviri"
output_path.mkdir(parents=True)
data_file = output_path / output_filename
data = load_and_resample_data(next(iter(files.values())))
data.to_netcdf(data_file)
process(model_file, ["seviri"], temp_path, args.output_path, device="cpu")