Skip to content

Commit

Permalink
Merge remote-tracking branch 'origin/develop' into release-v2.7.0
Browse files Browse the repository at this point in the history
chadell committed Jan 7, 2025

Unverified

This user has not yet uploaded their public signing key.
2 parents d9197d3 + 0bf7922 commit 3b4e1d6
Showing 36 changed files with 2,067 additions and 837 deletions.
2 changes: 1 addition & 1 deletion .github/workflows/ci.yml
Original file line number Diff line number Diff line change
@@ -113,7 +113,7 @@ jobs:
strategy:
fail-fast: true
matrix:
python-version: ["3.8", "3.9", "3.10", "3.11"]
python-version: ["3.9", "3.10", "3.11", "3.12"]
pydantic: ["2.x"]
include:
- python-version: "3.11"
3 changes: 3 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
@@ -63,10 +63,12 @@ By default, there is a `GenericProvider` that supports a `SimpleProcessor` using
- EXA (formerly GTT) (\*)
- NTT
- PacketFabric
- PCCW
- Telstra (\*)

#### Supported providers based on other parsers

- Apple
- AWS
- AquaComms
- BSO
@@ -82,6 +84,7 @@ By default, there is a `GenericProvider` that supports a `SimpleProcessor` using
- Megaport
- Momentum
- Netflix (AS2906 only)
- PCCW
- Seaborn
- Sparkle
- Tata
4 changes: 4 additions & 0 deletions circuit_maintenance_parser/__init__.py
Original file line number Diff line number Diff line change
@@ -12,6 +12,7 @@
HGC,
NTT,
AquaComms,
Apple,
Arelion,
Cogent,
Colt,
@@ -26,6 +27,7 @@
Momentum,
Netflix,
PacketFabric,
PCCW,
Seaborn,
Sparkle,
Tata,
@@ -39,6 +41,7 @@

SUPPORTED_PROVIDERS = (
GenericProvider,
Apple,
AquaComms,
Arelion,
AWS,
@@ -58,6 +61,7 @@
Netflix,
NTT,
PacketFabric,
PCCW,
Seaborn,
Sparkle,
Tata,
2 changes: 1 addition & 1 deletion circuit_maintenance_parser/parser.py
Original file line number Diff line number Diff line change
@@ -43,7 +43,7 @@ class Parser(BaseModel):
def get_data_types(cls) -> List[str]:
"""Return the expected data type."""
try:
return cls._data_types.get_default()
return cls._data_types.get_default() # type: ignore[attr-defined]
except AttributeError:
# TODO: This exception handling is required for Pydantic 1.x compatibility. To be removed when the dependency is deprecated.
return cls()._data_types
88 changes: 88 additions & 0 deletions circuit_maintenance_parser/parsers/apple.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,88 @@
"""Apple peering parser."""
import email
import re

from datetime import datetime, timezone
from typing import Dict, List

from circuit_maintenance_parser.output import Impact, Status
from circuit_maintenance_parser.parser import EmailSubjectParser, Text, CircuitImpact


class SubjectParserApple(EmailSubjectParser):
"""Subject parser for Apple notification."""

def parse_subject(self, subject: str) -> List[Dict]:
"""Use the subject of the email as summary.
Args:
subject (str): Message subjects
Returns:
List[Dict]: List of attributes for Maintenance object
"""
return [{"summary": subject}]


class TextParserApple(Text):
"""Parse the plaintext content of an Apple notification.
Args:
Text (str): Plaintext message
"""

def parse_text(self, text: str) -> List[Dict]:
"""Extract attributes from an Apple notification email.
Args:
text (str): plaintext message
Returns:
List[Dict]: List of attributes for Maintenance object
"""
data = {
"circuits": self._circuits(text),
"maintenance_id": self._maintenance_id(text),
"start": self._start_time(text),
"stamp": self._start_time(text),
"end": self._end_time(text),
"status": Status.CONFIRMED, # Have yet to see anything but confirmation.
"organizer": "[email protected]",
"provider": "apple",
"account": "Customer info unavailable",
}
return [data]

def _circuits(self, text):
pattern = r"Peer AS: (\d*)"
match = re.search(pattern, text)
return [CircuitImpact(circuit_id=f"AS{match.group(1)}", impact=Impact.OUTAGE)]

def _maintenance_id(self, text):
# Apple ticket numbers always starts with "CHG".
pattern = r"CHG(\d*)"
match = re.search(pattern, text)
return match.group(0)

def _get_time(self, pattern, text):
# Apple sends timestamps as RFC2822 for the US
# but a custom format for EU datacenters.
match = re.search(pattern, text)
try:
# Try EU timestamp
return int(
datetime.strptime(match.group(1), "%Y-%m-%d(%a) %H:%M %Z").replace(tzinfo=timezone.utc).timestamp()
)
except ValueError:
# Try RFC2822 - US timestamp
rfc2822 = match.group(1)
time_tuple = email.utils.parsedate_tz(rfc2822)
return email.utils.mktime_tz(time_tuple)

def _start_time(self, text):
pattern = "Start Time: ([a-zA-Z0-9 :()-]*)"
return self._get_time(pattern, text)

def _end_time(self, text):
pattern = "End Time: ([a-zA-Z0-9 :()-]*)"
return self._get_time(pattern, text)
89 changes: 89 additions & 0 deletions circuit_maintenance_parser/parsers/pccw.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,89 @@
"""Circuit maintenance parser for PCCW Email notifications."""
import re
from typing import List, Dict, Tuple, Any, ClassVar
from datetime import datetime

from bs4.element import ResultSet # type: ignore
from circuit_maintenance_parser.output import Status
from circuit_maintenance_parser.parser import Html, EmailSubjectParser


class HtmlParserPCCW(Html):
"""Custom Parser for HTML portion of PCCW circuit maintenance notifications."""

DATE_TIME_FORMAT: ClassVar[str] = "%d/%m/%Y %H:%M:%S"
PROVIDER: ClassVar[str] = "PCCW Global"

def parse_html(self, soup: ResultSet) -> List[Dict]:
"""Parse PCCW circuit maintenance email.
Args:
soup: BeautifulSoup ResultSet containing the email HTML content
Returns:
List containing a dictionary with parsed maintenance data
"""
data: Dict[str, Any] = {
"circuits": [],
"provider": self.PROVIDER,
"account": self._extract_account(soup),
}
start_time, end_time = self._extract_maintenance_window(soup)
data["start"] = self.dt2ts(start_time)
data["end"] = self.dt2ts(end_time)

return [data]

def _extract_account(self, soup: ResultSet) -> str:
"""Extract customer account from soup."""
customer_field = soup.find(string=re.compile("Customer Name :", re.IGNORECASE))
return customer_field.split(":")[1].strip()

def _extract_maintenance_window(self, soup: ResultSet) -> Tuple[datetime, datetime]:
"""Extract start and end times from maintenance window."""
datetime_field = soup.find(string=re.compile("Date Time :", re.IGNORECASE))
time_parts = (
datetime_field.lower().replace("date time :", "-").replace("to", "-").replace("gmt", "-").split("-")
)
start_time = datetime.strptime(time_parts[1].strip(), self.DATE_TIME_FORMAT)
end_time = datetime.strptime(time_parts[2].strip(), self.DATE_TIME_FORMAT)
return start_time, end_time


class SubjectParserPCCW(EmailSubjectParser):
"""Custom Parser for Email subject of PCCW circuit maintenance notifications.
This parser extracts maintenance ID, status and summary from the email subject line.
"""

# Only completion notification doesn't come with ICal. Other such as planned outage, urgent maintenance,
# amendment and cacellation notifications come with ICal. Hence, maintenance status is set to COMPLETED.
DEFAULT_STATUS: ClassVar[Status] = Status.COMPLETED

def parse_subject(self, subject: str) -> List[Dict]:
"""Parse PCCW circuit maintenance email subject.
Args:
subject: Email subject string to parse
Returns:
List containing a dictionary with parsed subject data including:
- maintenance_id: Extracted from end of subject
- status: Default COMPLETED status
- summary: Cleaned subject line
"""
data: Dict[str, Any] = {
"maintenance_id": self._extract_maintenance_id(subject),
"status": self.DEFAULT_STATUS,
"summary": self._clean_summary(subject),
}

return [data]

def _extract_maintenance_id(self, subject: str) -> str:
"""Extract maintenance ID from the end of subject line."""
return subject.split("-")[-1].strip()

def _clean_summary(self, subject: str) -> str:
"""Clean and format the summary text."""
return subject.strip().replace("\n", "")
11 changes: 3 additions & 8 deletions circuit_maintenance_parser/parsers/tata.py
Original file line number Diff line number Diff line change
@@ -35,20 +35,15 @@ def parse_html(self, soup: ResultSet) -> List[Dict]:
)
elif prev_lower in ("activity window (gmt)", "revised activity window (gmt)"):
start_end = curr.split("to")
data["start"] = self._parse_time(start_end[0])
data["end"] = self._parse_time(start_end[1])
data["start"] = self.dt2ts(datetime.strptime(start_end[0].strip(), "%Y-%m-%d %H:%M:%S %Z"))
data["end"] = self.dt2ts(datetime.strptime(start_end[1].strip(), "%Y-%m-%d %H:%M:%S %Z"))
elif "extended up to time window" in prev_lower:
if "gmt" in curr.lower():
data["end"] = self._parse_time(curr)
data["end"] = self.dt2ts(datetime.strptime(curr, "%Y-%m-%d %H:%M:%S %Z"))
prev = span.text.strip()

return [data]

@staticmethod
def _parse_time(string: str) -> int:
"""Convert YYYY-MM-DD HH:MM:SS GMT to epoch."""
return int((datetime.strptime(string.strip(), "%Y-%m-%d %H:%M:%S GMT") - datetime(1970, 1, 1)).total_seconds())


class SubjectParserTata(EmailSubjectParser):
"""Custom Parser for Email subject of Tata circuit maintenance notifications."""
35 changes: 35 additions & 0 deletions circuit_maintenance_parser/provider.py
Original file line number Diff line number Diff line change
@@ -14,6 +14,8 @@
from circuit_maintenance_parser.errors import ProcessorError, ProviderError
from circuit_maintenance_parser.output import Maintenance
from circuit_maintenance_parser.parser import EmailDateParser, ICal

from circuit_maintenance_parser.parsers.apple import SubjectParserApple, TextParserApple
from circuit_maintenance_parser.parsers.aquacomms import HtmlParserAquaComms1, SubjectParserAquaComms1
from circuit_maintenance_parser.parsers.aws import SubjectParserAWS1, TextParserAWS1
from circuit_maintenance_parser.parsers.bso import HtmlParserBSO1
@@ -30,6 +32,7 @@
from circuit_maintenance_parser.parsers.momentum import HtmlParserMomentum1, SubjectParserMomentum1
from circuit_maintenance_parser.parsers.netflix import TextParserNetflix1
from circuit_maintenance_parser.parsers.openai import OpenAIParser
from circuit_maintenance_parser.parsers.pccw import HtmlParserPCCW, SubjectParserPCCW
from circuit_maintenance_parser.parsers.seaborn import (
HtmlParserSeaborn1,
HtmlParserSeaborn2,
@@ -204,6 +207,15 @@ def get_provider_type(cls) -> str:
####################


class Apple(GenericProvider):
"""Apple provider custom class."""

_processors: List[GenericProcessor] = [
CombinedProcessor(data_parsers=[TextParserApple, SubjectParserApple]),
]
_default_organizer = "[email protected]"


class AquaComms(GenericProvider):
"""AquaComms provider custom class."""

@@ -406,6 +418,29 @@ class PacketFabric(GenericProvider):
_default_organizer = PrivateAttr("[email protected]")


class PCCW(GenericProvider):
"""PCCW provider custom class."""

_include_filter = PrivateAttr(
{
"Icalendar": ["BEGIN"],
"ical": ["BEGIN"],
EMAIL_HEADER_SUBJECT: [
"Completion - Planned Outage Notification",
"Completion - Urgent Maintenance Notification",
],
}
)

_processors: List[GenericProcessor] = PrivateAttr(
[
SimpleProcessor(data_parsers=[ICal]),
CombinedProcessor(data_parsers=[HtmlParserPCCW, SubjectParserPCCW, EmailDateParser]),
]
)
_default_organizer = "mailto:[email protected]"


class Seaborn(GenericProvider):
"""Seaborn provider custom class."""

Loading

0 comments on commit 3b4e1d6

Please sign in to comment.