-
Notifications
You must be signed in to change notification settings - Fork 94
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Merge pull request #6400 from oliver-sanders/examples.loading-data-files
examples: using data to define your workflow
- Loading branch information
Showing
6 changed files
with
283 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,32 @@ | ||
#!/bin/bash | ||
# THIS FILE IS PART OF THE CYLC WORKFLOW ENGINE. | ||
# Copyright (C) NIWA & British Crown (Met Office) & Contributors. | ||
# | ||
# This program is free software: you can redistribute it and/or modify | ||
# it under the terms of the GNU General Public License as published by | ||
# the Free Software Foundation, either version 3 of the License, or | ||
# (at your option) any later version. | ||
# | ||
# This program is distributed in the hope that it will be useful, | ||
# but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | ||
# GNU General Public License for more details. | ||
# | ||
# You should have received a copy of the GNU General Public License | ||
# along with this program. If not, see <http://www.gnu.org/licenses/>. | ||
|
||
set -eux | ||
|
||
ID="$(< /dev/urandom tr -dc A-Za-z | head -c6)" | ||
|
||
# run the workflow | ||
cylc vip --check-circular --no-run-name --no-detach --workflow-name "$ID" | ||
|
||
# check the station:ID mapping came out as expected | ||
grep 'fetch data for heathrow, WMO ID: 03772' "${HOME}/cylc-run/${ID}/log/job/20000101T0000Z/fetch_heathrow/NN/job.out" | ||
|
||
# lint | ||
cylc lint "$ID" | ||
|
||
# clean up | ||
cylc clean "$ID" |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,66 @@ | ||
#!Jinja2 | ||
|
||
[meta] | ||
title = Weather Station Workflow | ||
description = """ | ||
This workflow demonstrates how to read in a data file for use in | ||
defining your workflow. | ||
We have a file called "stations.json" which contains a list of weather | ||
stations with some data for each. This workflow reads the | ||
"stations.json" file and creates a family for each weather station | ||
with an environment variable for each data field. | ||
You can load data in other formats too. Try changing "load_json" to | ||
"load_csv" and "stations.json" to "stations.csv" for a CSV example. | ||
""" | ||
|
||
|
||
{# Import a Python function to load our data. #} | ||
{% from "load_data" import load_json %} | ||
|
||
{# Load data from the specified file. #} | ||
{% set stations = load_json('stations.json') %} | ||
|
||
{# Extract a list of station names from the data file. #} | ||
{% set station_names = stations | map(attribute="name") | list %} | ||
|
||
|
||
{# Provide Cylc with a list of weather stations. #} | ||
[task parameters] | ||
station = {{ station_names | join(', ') }} | ||
|
||
|
||
[scheduling] | ||
initial cycle point = 2000-01-01 | ||
final cycle point = 2000-01-02 | ||
[[graph]] | ||
P1D = fetch<station> => process<station> => collate | ||
|
||
|
||
[runtime] | ||
{# Define a family for each weather station #} | ||
{% for station in stations %} | ||
[[STATION<station={{ station["name"] }}>]] | ||
[[[environment]]] | ||
{# Turn the <station> parameter into an environment variable #} | ||
{# NB: Just to show how, we could also have used `station["name"]`. #} | ||
name = %(station)s | ||
{# Turn the data for this station into environment variables. #} | ||
wmo = {{ station["wmo"] }} | ||
alt = {{ station["alt"] }} | ||
lat = {{ station["lat"] }} | ||
lon = {{ station["lon"] }} | ||
{% endfor %} | ||
|
||
# a task that gets data | ||
[[fetch<station>]] | ||
inherit = STATION<station> | ||
script = echo "fetch data for $name, WMO ID: $wmo" | ||
|
||
[[process<station>]] | ||
inherit = STATION<station> | ||
script = echo "process data for $name, location: $lat,$lon" | ||
|
||
[[collate]] | ||
script = "echo collate data for stations: {{ station_names }}" |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,122 @@ | ||
Using Data To Define Your Workflow | ||
================================== | ||
|
||
.. admonition:: Get a copy of this example | ||
:class: hint | ||
|
||
.. code-block:: console | ||
$ cylc get-resources examples/external-data-files | ||
We often want to read in a dataset for use in defining our workflow. | ||
|
||
The :ref:`Cylc tutorial <tutorial-cylc-consolidating-configuration>` is an | ||
example of this where we want one ``get_observations`` task for each of a list | ||
of weather stations. Each weather station has a name (e.g. "heathrow") and an | ||
ID (e.g. 3772). | ||
|
||
.. code-block:: cylc | ||
[runtime] | ||
[[get_observations_heathrow]] | ||
script = get-observations | ||
[[[environment]]] | ||
SITE_ID = 3772 | ||
[[get_observations_camborne]] | ||
script = get-observations | ||
[[[environment]]] | ||
SITE_ID = 3808 | ||
[[get_observations_shetland]] | ||
script = get-observations | ||
[[[environment]]] | ||
SITE_ID = 3005 | ||
[[get_observations_aldergrove]] | ||
script = get-observations | ||
[[[environment]]] | ||
SITE_ID = 3917 | ||
It can be inconvenient to write out the name and ID of each station in your | ||
workflow like this, however, you may already have this information in a more | ||
convenient format (i.e. a data file of some form). | ||
|
||
With Cylc, we can use :ref:`Jinja2 <Jinja>` to read in a data file and use that data to | ||
define your workflow. | ||
|
||
|
||
The Approach | ||
------------ | ||
|
||
This example has three components: | ||
|
||
1. A JSON file containing a list of weather stations along with all the data | ||
associated with them. | ||
|
||
.. literalinclude:: stations.json | ||
:language: json | ||
:caption: stations.json | ||
|
||
2. A Python function that reads the JSON file. | ||
|
||
.. code-block:: python | ||
:caption: lib/python/load_data.py | ||
import json | ||
def load_json(filename): | ||
with open(filename, 'r') as json_file: | ||
return json.load(json_file) | ||
We put this Python code in the workflow's ``lib/python`` directory which | ||
allows us to import it from within our workflow. | ||
|
||
3. A ``flow.cylc`` file that uses the Python function to load the | ||
data file. | ||
|
||
We can import Python functions with Jinja2 using the following syntax: | ||
|
||
.. code-block:: | ||
{% from "load_data" import load_json %} | ||
For more information, see :ref:`jinja2.importing_python_modules`. | ||
|
||
|
||
|
||
The Workflow | ||
------------ | ||
|
||
The three files are arranged like so: | ||
|
||
.. code-block:: none | ||
:caption: File Structure | ||
|-- flow.cylc | ||
|-- lib | ||
| `-- python | ||
| `-- load_data.py | ||
`-- stations.json | ||
The ``flow.cylc`` file: | ||
|
||
* Imports the Python function. | ||
* Uses it to load the data. | ||
* Then uses the data to define the workflow. | ||
|
||
.. literalinclude:: flow.cylc | ||
:language: ini | ||
:caption: flow.cylc | ||
|
||
|
||
Data Types | ||
---------- | ||
|
||
We can load other types of data file too. This example also includes the same | ||
data in CSV format along with a Python function to load CSV data. To try it | ||
out, open the ``flow.cylc`` file and replace ``stations.json`` with | ||
``stations.csv`` and ``load_json`` with ``load_csv``. | ||
|
||
Any Python code that you import using Jinja2 will be executed using the Python | ||
environment that Cylc is running in. So if you want to import Python code that | ||
isn't in the standard library, you may need to get your system administrator to | ||
install this dependency into the Cylc environment for you. |
12 changes: 12 additions & 0 deletions
12
cylc/flow/etc/examples/external-data-files/lib/python/load_data.py
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,12 @@ | ||
import csv | ||
import json | ||
|
||
|
||
def load_csv(filename): | ||
with open(filename, 'r') as csv_file: | ||
return list(csv.DictReader(csv_file)) | ||
|
||
|
||
def load_json(filename): | ||
with open(filename, 'r') as json_file: | ||
return json.load(json_file) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,7 @@ | ||
name,wmo,alt,lat,lon | ||
camborne,03808,87,50.21841,-5.32753 | ||
heathrow,03772,25,51.47922.-0.45061 | ||
lerwick,03005,82,60.13893,-1.18491, | ||
aldergrove,03917,63,54.66365,-6.22534, | ||
exeter,03844,27,50.73717,-3.40579, | ||
middle_wallop,03749,90,51.14987,-1.56994 |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,44 @@ | ||
[ | ||
{ | ||
"name": "camborne", | ||
"wmo": "03808", | ||
"alt": 87, | ||
"lat": 50.21841, | ||
"lon": -5.32753 | ||
}, | ||
{ | ||
"name": "heathrow", | ||
"wmo": "03772", | ||
"alt": 25, | ||
"lat": 51.47922, | ||
"lon": -0.45061 | ||
}, | ||
{ | ||
"name": "lerwick", | ||
"wmo": "03005", | ||
"alt": 82, | ||
"lat": 60.13893, | ||
"lon": -1.18491 | ||
}, | ||
{ | ||
"name": "aldergrove", | ||
"wmo": "03917", | ||
"alt": 63, | ||
"lat": 54.66365, | ||
"lon": -6.22534 | ||
}, | ||
{ | ||
"name": "exeter", | ||
"wmo": "03844", | ||
"alt": 27, | ||
"lat": 50.73717, | ||
"lon": -3.40579 | ||
}, | ||
{ | ||
"name": "middle_wallop", | ||
"wmo": "03749", | ||
"alt": 90, | ||
"lat": 51.14987, | ||
"lon": -1.56994 | ||
} | ||
] |