|
| 1 | +# Copyright (c) 2007 Michael Twomey |
| 2 | +# |
| 3 | +# Permission is hereby granted, free of charge, to any person obtaining a |
| 4 | +# copy of this software and associated documentation files (the |
| 5 | +# "Software"), to deal in the Software without restriction, including |
| 6 | +# without limitation the rights to use, copy, modify, merge, publish, |
| 7 | +# distribute, sublicense, and/or sell copies of the Software, and to |
| 8 | +# permit persons to whom the Software is furnished to do so, subject to |
| 9 | +# the following conditions: |
| 10 | +# |
| 11 | +# The above copyright notice and this permission notice shall be included |
| 12 | +# in all copies or substantial portions of the Software. |
| 13 | +# |
| 14 | +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS |
| 15 | +# OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF |
| 16 | +# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. |
| 17 | +# IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY |
| 18 | +# CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, |
| 19 | +# TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE |
| 20 | +# SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. |
| 21 | + |
| 22 | +"""ISO 8601 date time string parsing |
| 23 | +
|
| 24 | +Basic usage: |
| 25 | +>>> import iso8601 |
| 26 | +>>> iso8601.parse_date("2007-01-25T12:00:00Z") |
| 27 | +datetime.datetime(2007, 1, 25, 12, 0, tzinfo=<iso8601.iso8601.Utc ...>) |
| 28 | +>>> |
| 29 | +
|
| 30 | +""" |
| 31 | + |
| 32 | +from datetime import datetime, timedelta, tzinfo |
| 33 | +import re |
| 34 | + |
| 35 | +__all__ = ["parse_date", "ParseError"] |
| 36 | + |
| 37 | +# Adapted from http://delete.me.uk/2005/03/iso8601.html |
| 38 | +ISO8601_REGEX = re.compile(r"(?P<year>[0-9]{4})(-(?P<month>[0-9]{1,2})(-(?P<day>[0-9]{1,2})" |
| 39 | + r"((?P<separator>.)(?P<hour>[0-9]{2}):(?P<minute>[0-9]{2})(:(?P<second>[0-9]{2})(\.(?P<fraction>[0-9]+))?)?" |
| 40 | + r"(?P<timezone>Z|(([-+])([0-9]{2}):([0-9]{2})))?)?)?)?" |
| 41 | +) |
| 42 | +TIMEZONE_REGEX = re.compile("(?P<prefix>[+-])(?P<hours>[0-9]{2}).(?P<minutes>[0-9]{2})") |
| 43 | + |
| 44 | +class ParseError(Exception): |
| 45 | + """Raised when there is a problem parsing a date string""" |
| 46 | + |
| 47 | +# Yoinked from python docs |
| 48 | +ZERO = timedelta(0) |
| 49 | +class Utc(tzinfo): |
| 50 | + """UTC |
| 51 | + |
| 52 | + """ |
| 53 | + def utcoffset(self, dt): |
| 54 | + return ZERO |
| 55 | + |
| 56 | + def tzname(self, dt): |
| 57 | + return "UTC" |
| 58 | + |
| 59 | + def dst(self, dt): |
| 60 | + return ZERO |
| 61 | +UTC = Utc() |
| 62 | + |
| 63 | +class FixedOffset(tzinfo): |
| 64 | + """Fixed offset in hours and minutes from UTC |
| 65 | + |
| 66 | + """ |
| 67 | + def __init__(self, offset_hours, offset_minutes, name): |
| 68 | + self.__offset = timedelta(hours=offset_hours, minutes=offset_minutes) |
| 69 | + self.__name = name |
| 70 | + |
| 71 | + def utcoffset(self, dt): |
| 72 | + return self.__offset |
| 73 | + |
| 74 | + def tzname(self, dt): |
| 75 | + return self.__name |
| 76 | + |
| 77 | + def dst(self, dt): |
| 78 | + return ZERO |
| 79 | + |
| 80 | + def __repr__(self): |
| 81 | + return "<FixedOffset %r>" % self.__name |
| 82 | + |
| 83 | +def parse_timezone(tzstring, default_timezone=UTC): |
| 84 | + """Parses ISO 8601 time zone specs into tzinfo offsets |
| 85 | + |
| 86 | + """ |
| 87 | + if tzstring == "Z": |
| 88 | + return default_timezone |
| 89 | + # This isn't strictly correct, but it's common to encounter dates without |
| 90 | + # timezones so I'll assume the default (which defaults to UTC). |
| 91 | + # Addresses issue 4. |
| 92 | + if tzstring is None: |
| 93 | + return default_timezone |
| 94 | + m = TIMEZONE_REGEX.match(tzstring) |
| 95 | + prefix, hours, minutes = m.groups() |
| 96 | + hours, minutes = int(hours), int(minutes) |
| 97 | + if prefix == "-": |
| 98 | + hours = -hours |
| 99 | + minutes = -minutes |
| 100 | + return FixedOffset(hours, minutes, tzstring) |
| 101 | + |
| 102 | +def parse_date(datestring, default_timezone=UTC): |
| 103 | + """Parses ISO 8601 dates into datetime objects |
| 104 | + |
| 105 | + The timezone is parsed from the date string. However it is quite common to |
| 106 | + have dates without a timezone (not strictly correct). In this case the |
| 107 | + default timezone specified in default_timezone is used. This is UTC by |
| 108 | + default. |
| 109 | + """ |
| 110 | + if not isinstance(datestring, basestring): |
| 111 | + raise ParseError("Expecting a string %r" % datestring) |
| 112 | + m = ISO8601_REGEX.match(datestring) |
| 113 | + if not m: |
| 114 | + raise ParseError("Unable to parse date string %r" % datestring) |
| 115 | + groups = m.groupdict() |
| 116 | + tz = parse_timezone(groups["timezone"], default_timezone=default_timezone) |
| 117 | + if groups["fraction"] is None: |
| 118 | + groups["fraction"] = 0 |
| 119 | + else: |
| 120 | + groups["fraction"] = int(float("0.%s" % groups["fraction"]) * 1e6) |
| 121 | + return datetime(int(groups["year"]), int(groups["month"]), int(groups["day"]), |
| 122 | + int(groups["hour"]), int(groups["minute"]), int(groups["second"]), |
| 123 | + int(groups["fraction"]), tz) |
0 commit comments