Skip to content
Open
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
243 changes: 243 additions & 0 deletions sdk-parsers/RMParserFramework/parsers/noaa-observed-parser.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,243 @@
from RMParserFramework.rmParser import RMParser # Mandatory include for parser definition
from RMUtilsFramework.rmLogging import log # Optional include for logging
from RMUtilsFramework.rmTimeUtils import * #rmNowDateTime, rmGetStartOfDay, rmCurrentDayTimestamp, rmDeltaDayFromTimestamp, rmCurrentTimestamp

import json
from datetime import datetime
from HTMLParser import HTMLParser
import re


class NoaaTableParser(HTMLParser):

#Initializing lists
tableList = []
tableRef = None
rowRef = None
colRef = None

#HTML Parser Methods
def handle_starttag(self, startTag, attrs):
if startTag == 'table':
self.tableList.append([])
self.tableRef = self.tableList[-1]
elif startTag == 'tr':
self.tableRef.append([])
self.rowRef = self.tableRef[-1]
elif startTag == 'th':
#This may span cols, so add additional columns to bridge the span
numCols = 1
for tup in attrs:
if tup[0] == 'colspan':
numCols = int(tup[1])
break
self.rowRef += ['']*numCols
self.colRef = self.rowRef
elif startTag == 'td':
#This may span cols, so add additional columns to bridge the span
numCols = 1
for tup in attrs:
if tup[0] == 'colspan':
numCols = int(tup[1])
break
self.rowRef += ['']*numCols
self.colRef = self.rowRef
else:
pass

def handle_endtag(self, endTag):
if endTag == 'table':
self.tableRef = None
elif endTag == 'tr':
self.rowRef = None
elif endTag == 'th':
self.colRef = None
elif endTag == 'td':
self.colRef = None
else:
pass

def handle_data(self, data):
if self.colRef:
self.colRef[-1]+= data.strip()

def handle_startendtag(self,startendTag, attrs):
pass

def handle_comment(self,data):
pass

def get_table(self):
return self.tableList



class NoaaObsParser(RMParser):
parserName = "NOAA Observations" # Your parser name
parserDescription = "NOAA Observations Data" # A short description of your parser
parserForecast = False # True if parser provides future forecast data
parserHistorical = True # True if parser also provides historical data (only actual observed data)
parserInterval = 6 * 3600 # Your parser running interval in seconds, data will only be mixed in hourly intervals
parserDebug = False
parserEnabled = False
params = {
"_stationURL" : "https://forecast.weather.gov/data/obhistory/KBDU.html",
"stationID" : 'KBDU',
"dailyAccum": True,
"_lastRain" : '',
"_lastRainTS": '',
"_lastTS": ''
}

def perform(self): # The function that will be executed must have this name
if self.params['stationID']:
stationID = self.params['stationID']
else:
log.error("*** No Station ID")
self.lastKnownError = "No Station ID"


#NOAA has a bunch of different feeds for the weather data, but only obhistory has rainfall
# A full listing of stations/urls can be found here: https://forecast.weather.gov/xml/current_obs/index.xml
stationURL = "https://forecast.weather.gov/data/obhistory/" + stationID + ".html"
self.params['_stationURL'] = stationURL

# downloading data from a URL convenience function since other python libraries can be used
self.getObservations(stationURL)

if self.parserDebug:
log.debug(self.result)

def __parse_time(self, date, time):
try:
log_day = int(date)
log_time = [int(x) for x in time.split(':')]
except:
return None

if self.params['dailyAccum']:
tsToday = rmCurrentDayTimestamp()
tsYesterDay = rmDeltaDayFromTimestamp(tsToday, -1)
today = rmTimestampToDate(tsToday)
yester = rmTimestampToDate(tsYesterDay)

#Only update rain for today and yesterday as a daily average
if(today.day == log_day):
log_date = today
elif(yester.day == log_day):
log_date = yester
else:
return None

else:
curTime = rmNowDateTime()

# The date format logged is very annoying... it's just the day of the month.
# Since the log is only 1 week long, we can't have a date bigger than today unless it's from the previous month.
month = curTime.month
day = curTime.day
year = curTime.year

#Don't short circuit for just today since NOAA only updates HTML every few hours and we may miss rain
# in the late evening
#if day != log_day:
# return None

if day - log_day < 0:
month -= 1
if month == 0:
month = 12
year -= 1

#Construct a new date with the month/day/time information
# Rainmachine addValue rounds to the hour, so drop the minutes and we'll accumulate outside
log_date = datetime(year, month, log_day, log_time[0])


#convert to our unix timestamp for logging
# This is the same method RM uses in rmGetStartOfDay() which matches all the timestamp styles
timestamp = int(log_date.strftime("%s"))

return timestamp

def __parse_precip(self, hr1, hr3, hr6):
try:
rain = float(hr1) * 25.4 #i n to mm
return rain
except:
return 0

def getObservations(self,stationURL):
d = self.openURL(stationURL)
parser = NoaaTableParser()
if d is None:
log.error("*** Failed to fetch URL")
self.lastKnownError = "Failed to Fetch"
return False
try:
parser.feed(d.read().decode('utf-8'))
tree = parser.get_table()
except:
log.error("*** Failed to parse response!")
self.lastKnownError = "Failed to Parse"
return False

# It's very likely that all stations generate the same table layout, but just quickly search for 'Date' to find
# the correct table
tableIdx = None
for tIdx, table in enumerate(tree):
if len(table) and len(table[0]) and 'Date' == table[0][0]:
tableIdx = tIdx

if tableIdx==None:
log.error("*** No information found in response!")
self.lastKnownError = "Retrying hourly data retrieval"
return False


# Dynamically build a list of column names for indexing
weatherObs = tree[tableIdx]
fieldIdx = {}
for cIdx, col in enumerate(weatherObs[0]):
colStr = re.split('[^a-zA-Z]',col)
fieldIdx[colStr[0]] = cIdx

# Walk our table and build a list of tuples to insert
# Skip the first 3 and last 3 rows since that's the headers
headerLen = len(weatherObs[0])
rainData = {}
for row in weatherObs[3:-3]:
if len(row) == headerLen:
time = self.__parse_time(row[fieldIdx['Date']],row[fieldIdx['Time']])

if time:
# Need to see how the precipitation is logged for 1/3/6 hr, when rows are ~28min apart
# Update: It looks like each row is just the rain (inches) for the current interval, so while we query today, just insert the rain
# at the matching timestamp and let the rainmachine parser sum it up for the entire day.
rain = self.__parse_precip(row[fieldIdx['Precipitation'] - 2], row[fieldIdx['Precipitation'] - 1],
row[fieldIdx['Precipitation']])

if time in rainData:
rainData[time] += rain
else:
rainData[time] = rain

for k, v in sorted(rainData.items()):
self.addValue(RMParser.dataType.RAIN, k, v)

if v > 0:
self.params['_lastRain'] = v
self.params['_lastRainTS'] = k
self.params['_lastTS'] = k
#log.info("times:%s (%d) Rain:%f"%(rmTimestampToDateAsString(k), k, v))

# Reset lastKnownError from a previous function call
self.lastKnownError = ""

pass



if __name__ == "__main__":
p = NoaaObsParser()
p.perform()