@@ -26,51 +26,9 @@ def __str__(self):
2626 return f"{ self .source } :{ self .signal } "
2727
2828
29- def fetch_data (
30- time_types : Optional [List [str ]],
31- geo_types : Optional [List [str ]],
32- signals : Optional [List [SourceSignal ]],
33- ):
34- # complain if the cache is more than 75 minutes old
35- max_age = 75 * 60
36-
37- row = db .execute (
38- text (
39- "SELECT UNIX_TIMESTAMP(NOW()) - timestamp AS age, epidata FROM covidcast_meta_cache LIMIT 1"
40- )
41- ).fetchone ()
42-
43- if not row or not row ["epidata" ]:
44- get_structured_logger ('server_api' ).warning ("no data in covidcast_meta cache" )
45- return
46-
47- age = row ["age" ]
48- if age > max_age and row ["epidata" ]:
49- get_structured_logger ('server_api' ).warning ("covidcast_meta cache is stale" , cache_age = age )
50-
51- epidata = loads (row ["epidata" ])
52-
53- if not epidata :
54- return
55-
56- def filter_row (row : Dict ):
57- if time_types and row .get ("time_type" ) not in time_types :
58- return False
59- if geo_types and row .get ("geo_type" ) not in geo_types :
60- return False
61- if not signals :
62- return True
63- for signal in signals :
64- # match source and (signal or no signal or signal = *)
65- if row .get ("data_source" ) == signal .source and (
66- signal .signal == "*" or signal .signal == row .get ("signal" )
67- ):
68- return True
69- return False
70-
71- for row in epidata :
72- if filter_row (row ):
73- yield row
29+ # empty generator that never yields
30+ def _nonerator ():
31+ return (x for x in [])
7432
7533
7634@bp .route ("/" , methods = ("GET" , "POST" ))
@@ -79,4 +37,60 @@ def handle():
7937 signals = [SourceSignal (v ) for v in (extract_strings ("signals" ) or [])]
8038 geo_types = extract_strings ("geo_types" )
8139
82- return create_printer (request .values .get ("format" ))(filter_fields (fetch_data (time_types , geo_types , signals )))
40+ printer = create_printer (request .values .get ("format" ))
41+
42+ metadata = db .execute (
43+ text (
44+ "SELECT UNIX_TIMESTAMP(NOW()) - timestamp AS age, epidata FROM covidcast_meta_cache LIMIT 1"
45+ )
46+ ).fetchone ()
47+
48+ if not metadata or "epidata" not in metadata :
49+ # the db table `covidcast_meta_cache` has no rows
50+ get_structured_logger ('server_api' ).warning ("no data in covidcast_meta cache" )
51+ return printer (_nonerator ())
52+
53+ metadata_list = loads (metadata ["epidata" ])
54+
55+ if not metadata_list :
56+ # the db table has a row, but there is no metadata about any signals in it
57+ get_structured_logger ('server_api' ).warning ("empty entry in covidcast_meta cache" )
58+ return printer (_nonerator ())
59+
60+ # the expected metadata regeneration interval in seconds, aka time between runs of
61+ # src/acquisition/covidcast/covidcast_meta_cache_updater.py (currently 2h)
62+ standard_age = 2 * 60 * 60
63+ # a short period when a client can continue to use this metadata even if its slightly stale,
64+ # which also gives some padding if the md generation is running slow,
65+ # and which also acts as a minimum cacheable time (currently 10 mins)
66+ age_margin = 10 * 60
67+ # these should be updated if a stale cache will have undue impact on user activities, such as
68+ # if we start updating the metadata table much more frequently and having up-to-the-minute
69+ # metadata accuracy becomes important to users once more.
70+ # TODO: get the above two values ^ from config vars?
71+ age = metadata ["age" ]
72+ reported_age = max (0 , min (age , standard_age ) - age_margin )
73+
74+ def cache_entry_gen ():
75+ for entry in metadata_list :
76+ if time_types and entry .get ("time_type" ) not in time_types :
77+ continue
78+ if geo_types and entry .get ("geo_type" ) not in geo_types :
79+ continue
80+ if not signals :
81+ yield entry
82+ for signal in signals :
83+ # match source and (signal or no signal or signal = *)
84+ if entry .get ("data_source" ) == signal .source and (
85+ signal .signal == "*" or signal .signal == entry .get ("signal" )
86+ ):
87+ yield entry
88+
89+ return printer (
90+ filter_fields (cache_entry_gen ()),
91+ headers = {
92+ "Cache-Control" : f"max-age={ standard_age } , public" ,
93+ "Age" : f"{ reported_age } " ,
94+ # TODO?: "Expires": f"{}", # superseded by Cache-Control: https://developer.mozilla.org/en-US/docs/Web/HTTP/Headers/Expires
95+ }
96+ )
0 commit comments