Partial rewrite with sane typing (modern python!)
While there, we get rid of annoying: Failure: twisted.internet.defer.CancelledError Whenever we would cancel a deferred (which we use a lot in this app)
This commit is contained in:
parent
d8071eabb7
commit
c1f7b12366
3
Pipfile
3
Pipfile
|
@ -10,7 +10,6 @@ python_version = "3"
|
|||
attrs = "*"
|
||||
twisted = {extras = ["conch"],version = "*"}
|
||||
service-identity = "*"
|
||||
munch = "*"
|
||||
pyyaml = "*"
|
||||
klein = "*"
|
||||
"jinja2" = "*"
|
||||
|
@ -21,6 +20,8 @@ flake8 = "*"
|
|||
bandit = "*"
|
||||
mypy = "*"
|
||||
isort = "*"
|
||||
types-markdown = "*"
|
||||
types-pyyaml = "*"
|
||||
|
||||
[pipenv]
|
||||
allow_prereleases = true
|
||||
|
|
28
Pipfile.lock
generated
28
Pipfile.lock
generated
|
@ -1,7 +1,7 @@
|
|||
{
|
||||
"_meta": {
|
||||
"hash": {
|
||||
"sha256": "da45c8a3b7e498a55a6f1af4c21b92668cb0426304040caff83620c5b1330aa2"
|
||||
"sha256": "7a1e05f686d4492800c29d951c4ec9c8fa9e37b01bffa4acc80a28de398ea4c7"
|
||||
},
|
||||
"pipfile-spec": 6,
|
||||
"requires": {
|
||||
|
@ -278,15 +278,6 @@
|
|||
"markers": "python_version >= '3.7'",
|
||||
"version": "==2.1.3"
|
||||
},
|
||||
"munch": {
|
||||
"hashes": [
|
||||
"sha256:542cb151461263216a4e37c3fd9afc425feeaf38aaa3025cd2a981fadb422235",
|
||||
"sha256:71033c45db9fb677a0b7eb517a4ce70ae09258490e419b0e7f00d1e386ecb1b4"
|
||||
],
|
||||
"index": "pypi",
|
||||
"markers": "python_version >= '3.6'",
|
||||
"version": "==4.0.0"
|
||||
},
|
||||
"pyasn1": {
|
||||
"hashes": [
|
||||
"sha256:4439847c58d40b1d0a573d07e3856e95333f1976294494c325775aeca506eb58",
|
||||
|
@ -708,6 +699,23 @@
|
|||
"markers": "python_version < '3.11'",
|
||||
"version": "==2.0.1"
|
||||
},
|
||||
"types-markdown": {
|
||||
"hashes": [
|
||||
"sha256:2299b9086c695f408a3ebabf820f1fba3b239f1b3bfdbb32bf42d530b42cdd83",
|
||||
"sha256:9afd38a8f53e19d43de3f8d89742b3674b5736767806ed9356d64ccb09f76439"
|
||||
],
|
||||
"index": "pypi",
|
||||
"markers": "python_version >= '3.7'",
|
||||
"version": "==3.5.0.3"
|
||||
},
|
||||
"types-pyyaml": {
|
||||
"hashes": [
|
||||
"sha256:334373d392fde0fdf95af5c3f1661885fa10c52167b14593eb856289e1855062",
|
||||
"sha256:c05bc6c158facb0676674b7f11fe3960db4f389718e19e62bd2b84d6205cfd24"
|
||||
],
|
||||
"index": "pypi",
|
||||
"version": "==6.0.12.12"
|
||||
},
|
||||
"typing-extensions": {
|
||||
"hashes": [
|
||||
"sha256:56a8f7a8776ea160e59ef0af6fc3a3a03b7d42156b90e47f0241515fcec620c2",
|
||||
|
|
|
@ -1,7 +1,6 @@
|
|||
import functools
|
||||
|
||||
from .conch_helpers import SSHSimpleProtocol
|
||||
|
||||
from .Config import Config
|
||||
|
||||
|
||||
|
|
|
@ -1,11 +1,15 @@
|
|||
from twisted.internet import reactor, task
|
||||
from twisted.web import resource
|
||||
|
||||
import json
|
||||
from munch import Munch
|
||||
from typing import TYPE_CHECKING, Any, Dict, List
|
||||
|
||||
from twisted.internet import reactor, task
|
||||
from twisted.web._responses import BAD_REQUEST, OK
|
||||
from twisted.web.server import Request
|
||||
|
||||
from .TokenResource import TokenResource
|
||||
|
||||
if TYPE_CHECKING:
|
||||
from .SitesManager import SiteManager, SitesManager
|
||||
|
||||
|
||||
class AdlerManagerTokenResource(TokenResource):
|
||||
"""
|
||||
|
@ -14,19 +18,17 @@ class AdlerManagerTokenResource(TokenResource):
|
|||
|
||||
HEADER = "Authorization"
|
||||
|
||||
def __init__(self, site_manager):
|
||||
def __init__(self, sites_manager: "SitesManager"):
|
||||
"""
|
||||
@param site_manager: The object managing state for all sites.
|
||||
@type site_manager: L{adlermanager.SiteManager}
|
||||
@type site_manager: L{adlermanager.SitesManager}
|
||||
"""
|
||||
TokenResource.__init__(self, tokens=site_manager.tokens)
|
||||
TokenResource.__init__(self, tokens=sites_manager.tokens)
|
||||
|
||||
self.site_manager = site_manager
|
||||
|
||||
def preprocess_header(self, header):
|
||||
def preprocess_header(self, header: str):
|
||||
return header.split(" ")[-1]
|
||||
|
||||
def processToken(self, token_data, request):
|
||||
def processToken(self, token_data: "SiteManager", request: Request) -> int:
|
||||
"""
|
||||
Pass Alerts along if Authorization Header matched.
|
||||
|
||||
|
@ -38,12 +40,12 @@ class AdlerManagerTokenResource(TokenResource):
|
|||
"""
|
||||
|
||||
try:
|
||||
request_body = request.content.read()
|
||||
alert_data = [Munch.fromDict(alert) for alert in json.loads(request_body)]
|
||||
except:
|
||||
return False
|
||||
request_body: bytes = request.content.read() # type: ignore
|
||||
alert_data: List[Dict[str, Any]] = json.loads(request_body)
|
||||
except Exception:
|
||||
return BAD_REQUEST
|
||||
|
||||
site = token_data
|
||||
|
||||
task.deferLater(reactor, 0, site.process_alerts, alert_data)
|
||||
return True
|
||||
task.deferLater(reactor, 0, site.process_alerts, alert_data) # type: ignore
|
||||
return OK
|
||||
|
|
|
@ -1,7 +1,8 @@
|
|||
import attr
|
||||
import os
|
||||
from datetime import timedelta
|
||||
|
||||
import attr
|
||||
|
||||
|
||||
@attr.s
|
||||
class ConfigClass(object):
|
||||
|
@ -45,22 +46,22 @@ class ConfigClass(object):
|
|||
Default value: 60 (i.e. 1 hour).
|
||||
"""
|
||||
|
||||
data_dir = attr.ib(default=os.getenv("DATA_DIR", "../data"))
|
||||
data_dir: str = attr.ib(default=os.getenv("DATA_DIR", "../data"))
|
||||
|
||||
# Web
|
||||
web_endpoint = attr.ib(default=os.getenv("WEB_ENDPOINT", "unix:adler.socket"))
|
||||
web_static_dir = attr.ib(default=os.getenv("WEB_STATIC_DIR", "../static"))
|
||||
web_endpoint: str = attr.ib(default=os.getenv("WEB_ENDPOINT", "unix:adler.socket"))
|
||||
web_static_dir: str = attr.ib(default=os.getenv("WEB_STATIC_DIR", "../static"))
|
||||
|
||||
# SSH
|
||||
ssh_enabled = attr.ib(default=os.getenv("SSH_ENABLED", b"") != b"")
|
||||
ssh_endpoint = attr.ib(
|
||||
ssh_enabled: bool = attr.ib(default=os.getenv("SSH_ENABLED", "") != "")
|
||||
ssh_endpoint: str = attr.ib(
|
||||
default=os.getenv("SSH_ENDPOINT", r"tcp6:interface=\:\::port=2222")
|
||||
)
|
||||
ssh_key_size = attr.ib(default=int(os.getenv("SSH_KEY_SIZE", "4096")))
|
||||
ssh_keys_dir = attr.ib(default=os.getenv("SSH_KEYS_DIR", "../data/ssh"))
|
||||
ssh_key_size: int = attr.ib(default=int(os.getenv("SSH_KEY_SIZE", "4096")))
|
||||
ssh_keys_dir: str = attr.ib(default=os.getenv("SSH_KEYS_DIR", "../data/ssh"))
|
||||
|
||||
# Alerts processing
|
||||
new_incident_timeout = attr.ib(
|
||||
new_incident_timeout: timedelta = attr.ib(
|
||||
default=timedelta(minutes=int(os.getenv("NEW_INCIDENT_TIMEOUT", "60")))
|
||||
)
|
||||
|
||||
|
|
|
@ -1,91 +1,59 @@
|
|||
import attr
|
||||
import copy
|
||||
from enum import IntEnum
|
||||
from munch import Munch
|
||||
from twisted.internet import reactor, defer, task
|
||||
from typing import Dict, Iterable, List, Optional
|
||||
|
||||
from .utils import current_time, current_timestamp
|
||||
import attr
|
||||
from twisted.internet import defer, reactor, task
|
||||
from twisted.python.filepath import FilePath
|
||||
|
||||
from .model import Alert, Severity
|
||||
from .utils import current_timestamp, default_errback, noop_deferred
|
||||
|
||||
FILENAME_TIME_FORMAT = "%Y-%m-%d-%H%MZ"
|
||||
|
||||
|
||||
class Severity(IntEnum):
|
||||
OK = 0
|
||||
WARNING = 1
|
||||
ERROR = 2
|
||||
|
||||
@classmethod
|
||||
def from_string(cls, s):
|
||||
# TODO: Do something sensitive with other priorities
|
||||
# At least document them somewhere :-D
|
||||
labels = {
|
||||
"ok": cls.OK,
|
||||
"warning": cls.WARNING,
|
||||
"error": cls.ERROR,
|
||||
"critical": cls.ERROR,
|
||||
}
|
||||
return labels[s.lower()]
|
||||
|
||||
@classmethod
|
||||
def from_alert(cls, alert):
|
||||
try:
|
||||
if "endsAt" in alert and alert.endsAt <= current_time():
|
||||
return Severity.OK
|
||||
except:
|
||||
pass
|
||||
return Severity.from_string(alert.labels.get("severity", "OK"))
|
||||
|
||||
@property
|
||||
def css(self):
|
||||
classes = {self.OK: "success", self.WARNING: "warning", self.ERROR: "danger"}
|
||||
return classes[self]
|
||||
|
||||
def __str__(self):
|
||||
return self.css
|
||||
|
||||
|
||||
@attr.s
|
||||
class IncidentManager(object):
|
||||
path = attr.ib()
|
||||
path: FilePath = attr.ib()
|
||||
|
||||
last_alert = attr.ib(default="")
|
||||
active_alerts = attr.ib(factory=dict)
|
||||
expired = attr.ib(factory=defer.Deferred)
|
||||
_timeout = attr.ib(factory=defer.Deferred)
|
||||
_alert_timeouts = attr.ib(factory=dict) # alert name => timeout
|
||||
last_alert: str = attr.ib(default="")
|
||||
active_alerts: Dict[str, Alert] = attr.ib(factory=dict)
|
||||
expired: defer.Deferred[None] = attr.ib(factory=noop_deferred)
|
||||
_timeout: defer.Deferred[None] = attr.ib(factory=noop_deferred)
|
||||
"""Incident timeout"""
|
||||
_alert_timeouts: Dict[str, defer.Deferred[None]] = attr.ib(factory=dict)
|
||||
"""alert_label -> timeout"""
|
||||
|
||||
_monitoring_down = attr.ib(default=False)
|
||||
|
||||
_logs = attr.ib(factory=list)
|
||||
# _logs = attr.ib(factory=list)
|
||||
# TODO: Get IncidentClosing timeout from settings?
|
||||
# Defaulting to 30m
|
||||
_monitoring_grace_period = attr.ib(default=60 * 30)
|
||||
_monitoring_grace_period: int = attr.ib(default=60 * 30)
|
||||
# Defaulting to 5m as alertmanager
|
||||
_alert_resolve_timeout = attr.ib(default=5 * 60)
|
||||
_alert_resolve_timeout: int = attr.ib(default=5 * 60)
|
||||
|
||||
def process_heartbeats(self, heartbeats, timestamp):
|
||||
def process_heartbeats(self, heartbeats: Iterable[Alert], timestamp: str):
|
||||
if heartbeats:
|
||||
self.last_updated = timestamp
|
||||
if self._monitoring_down:
|
||||
self._monitoring_down = False
|
||||
# Monitoring is back up, re-activate timeout
|
||||
self._timeout = task.deferLater(
|
||||
reactor, self._monitoring_grace_period, self._expire
|
||||
reactor, self._monitoring_grace_period, self._expire # type: ignore
|
||||
)
|
||||
self.log_event("[Meta]MonitoringUp", timestamp)
|
||||
|
||||
def process_alerts(self, alerts, timestamp):
|
||||
def process_alerts(self, alerts: Iterable[Alert], timestamp: str):
|
||||
if alerts:
|
||||
self._timeout.cancel()
|
||||
self._timeout = task.deferLater(
|
||||
reactor, self._monitoring_grace_period, self._expire
|
||||
)
|
||||
reactor, self._monitoring_grace_period, self._expire # type: ignore
|
||||
).addErrback(default_errback)
|
||||
self.last_alert = timestamp
|
||||
|
||||
new_alerts = dict()
|
||||
new_alerts: Dict[str, Alert] = dict()
|
||||
|
||||
for alert in alerts:
|
||||
alert_label = alert.labels.component
|
||||
alert_label = alert.labels["component"]
|
||||
if alert_label in self._alert_timeouts:
|
||||
self._alert_timeouts[alert_label].cancel()
|
||||
else:
|
||||
|
@ -98,47 +66,66 @@ class IncidentManager(object):
|
|||
):
|
||||
self.active_alerts[alert_label] = alert
|
||||
self._alert_timeouts[alert_label] = task.deferLater(
|
||||
reactor, self._alert_resolve_timeout, self._expire_alert, alert_label
|
||||
)
|
||||
reactor, # type: ignore
|
||||
self._alert_resolve_timeout,
|
||||
self._expire_alert,
|
||||
alert_label,
|
||||
).addErrback(default_errback)
|
||||
|
||||
if new_alerts:
|
||||
self.log_event("New", timestamp, alerts=list(new_alerts.values()))
|
||||
|
||||
def _expire(self):
|
||||
def _expire(self) -> None:
|
||||
if not self._monitoring_down:
|
||||
self.expired.callback(self)
|
||||
self.expired.callback(self) # type: ignore # twisted bug
|
||||
|
||||
def _expire_alert(self, alertname):
|
||||
def _expire_alert(self, alert_label: str) -> None:
|
||||
self.log_event(
|
||||
"Resolved", current_timestamp(), alert=self.active_alerts[alertname]
|
||||
"Resolved", current_timestamp(), alert=self.active_alerts[alert_label]
|
||||
)
|
||||
del self.active_alerts[alertname]
|
||||
del self.active_alerts[alert_label]
|
||||
|
||||
def monitoring_down(self, timestamp):
|
||||
def monitoring_down(self, timestamp: str) -> None:
|
||||
self._monitoring_down = True
|
||||
self.log_event("[Meta]MonitoringDown", timestamp)
|
||||
|
||||
def log_event(self, message, timestamp, alerts=[], alert=None):
|
||||
if alert is not None:
|
||||
alerts.append(alert)
|
||||
obj = {"message": message, "timestamp": timestamp}
|
||||
if alerts:
|
||||
alerts = copy.deepcopy(alerts)
|
||||
for alert in alerts:
|
||||
alert.status = alert.status.css
|
||||
obj["alerts"] = alerts
|
||||
self._logs.append(obj)
|
||||
# Persist messages
|
||||
with self.path.open("w") as f:
|
||||
m = Munch.fromDict({"log": self._logs, "timestamp": timestamp})
|
||||
f.write(m.toYAML().encode("utf-8"))
|
||||
def log_event(
|
||||
self,
|
||||
message: str,
|
||||
timestamp: str,
|
||||
alerts: List[Alert] = [],
|
||||
alert: Optional[Alert] = None,
|
||||
) -> None:
|
||||
# TODO: we probably actually want something like this
|
||||
pass
|
||||
|
||||
def component_status(self, component_label):
|
||||
# def log_event(
|
||||
# self,
|
||||
# message: str,
|
||||
# timestamp: str,
|
||||
# alerts: List[Alert] = [],
|
||||
# alert: Optional[Alert] = None,
|
||||
# ):
|
||||
# if alert is not None:
|
||||
# alerts.append(alert)
|
||||
# obj = {"message": message, "timestamp": timestamp}
|
||||
# if alerts:
|
||||
# alerts = copy.deepcopy(alerts)
|
||||
# for a in alerts:
|
||||
# a.status = a.status.css
|
||||
# obj["alerts"] = alerts
|
||||
# # self._logs.append(obj)
|
||||
# # Persist messages
|
||||
# with self.path.open("w") as f:
|
||||
# m = Munch.fromDict({"log": self._logs, "timestamp": timestamp})
|
||||
# f.write(m.toYAML().encode("utf-8"))
|
||||
|
||||
def component_status(self, component_label: str) -> Severity:
|
||||
return max(
|
||||
(
|
||||
alert.status
|
||||
for alert in self.active_alerts.values()
|
||||
if alert.labels.component == component_label
|
||||
if alert.labels["component"] == component_label
|
||||
),
|
||||
default=Severity.OK,
|
||||
)
|
||||
|
|
|
@ -1,58 +1,48 @@
|
|||
from typing import Any, Dict, Generator, List, cast
|
||||
|
||||
import attr
|
||||
import itertools
|
||||
from munch import Munch
|
||||
from twisted.python.filepath import FilePath
|
||||
import yaml
|
||||
from twisted.internet import defer, reactor, task
|
||||
from twisted.logger import Logger
|
||||
from twisted.internet import reactor, defer, task
|
||||
from twisted.python.filepath import FilePath
|
||||
|
||||
from .Config import Config
|
||||
from .IncidentManager import IncidentManager, Severity
|
||||
|
||||
from .utils import ensure_dirs, TimestampFile, read_timestamp
|
||||
from .IncidentManager import IncidentManager
|
||||
from .model import Alert, Severity
|
||||
from .utils import TimestampFile, default_errback, noop_deferred
|
||||
|
||||
|
||||
class SitesManager(object):
|
||||
log = Logger()
|
||||
|
||||
def __init__(self):
|
||||
def __init__(self) -> None:
|
||||
self.sites_dir = FilePath(Config.data_dir).child("sites")
|
||||
self.site_managers = {
|
||||
site: SiteManager(self.sites_dir.child(site)) for site in self.load_sites()
|
||||
}
|
||||
self.tokens = {
|
||||
self.tokens: Dict[str, SiteManager] = {
|
||||
token: manager
|
||||
for manager in self.site_managers.values()
|
||||
for token in manager.tokens
|
||||
}
|
||||
|
||||
def load_sites(self):
|
||||
def load_sites(self) -> Generator[str, None, None]:
|
||||
for site_dir in self.sites_dir.children():
|
||||
if not site_dir.isdir():
|
||||
continue
|
||||
yield site_dir.basename()
|
||||
|
||||
|
||||
def import_alert(alert):
|
||||
# Convert date data types
|
||||
for att in ["startsAt", "endsAt"]:
|
||||
if att in alert:
|
||||
try:
|
||||
alert[att] = read_timestamp(alert[att])
|
||||
except:
|
||||
del alert[att]
|
||||
# Convert severity (needs date data)
|
||||
alert.status = Severity.from_alert(alert)
|
||||
return alert
|
||||
yield cast(str, site_dir.basename())
|
||||
|
||||
|
||||
@attr.s
|
||||
class SiteManager(object):
|
||||
path = attr.ib()
|
||||
tokens = attr.ib(factory=list)
|
||||
path: FilePath = attr.ib()
|
||||
tokens: List[str] = attr.ib(factory=list)
|
||||
log = Logger()
|
||||
monitoring_is_down = attr.ib(default=False)
|
||||
definition: Dict[str, Any] = attr.ib(factory=dict)
|
||||
title = attr.ib(default="")
|
||||
|
||||
_timeout = attr.ib(factory=defer.Deferred)
|
||||
_timeout: defer.Deferred[None] = attr.ib(factory=noop_deferred)
|
||||
site_name = attr.ib(default="")
|
||||
# TODO: Get monitoring timeout from config
|
||||
# Default to 2 mins
|
||||
|
@ -60,25 +50,26 @@ class SiteManager(object):
|
|||
|
||||
def __attrs_post_init__(self):
|
||||
self.load_definition()
|
||||
self.title = self.definition["title"]
|
||||
self.load_tokens()
|
||||
self.last_updated = TimestampFile(self.path.child("last_updated.txt"))
|
||||
self.service_managers = [
|
||||
ServiceManager(path=self.path.child(s.label), definition=s)
|
||||
for s in self.definition.services
|
||||
ServiceManager(path=self.path.child(s["label"]), definition=s)
|
||||
for s in cast(List[Dict[str, Any]], self.definition.get("services", dict()))
|
||||
]
|
||||
self.site_name = self.path.basename()
|
||||
self._timeout = task.deferLater(
|
||||
reactor, self._timeout_seconds, self.monitoring_down
|
||||
)
|
||||
reactor, self._timeout_seconds, self.monitoring_down # type: ignore
|
||||
).addErrback(default_errback)
|
||||
|
||||
def monitoring_down(self):
|
||||
def monitoring_down(self) -> None:
|
||||
self.monitoring_is_down = True
|
||||
for manager in self.service_managers:
|
||||
manager.monitoring_down(self.last_updated.getStr())
|
||||
|
||||
def load_definition(self):
|
||||
with self.path.child("site.yml").open("r") as f:
|
||||
self.definition = Munch.fromYAML(f.read())
|
||||
self.definition = yaml.safe_load(f)
|
||||
|
||||
def load_tokens(self):
|
||||
tokens_file = self.path.child("tokens.txt")
|
||||
|
@ -86,34 +77,39 @@ class SiteManager(object):
|
|||
with open(tokens_file.path, "r") as f:
|
||||
self.tokens = [line.strip() for line in f]
|
||||
if not self.tokens:
|
||||
log.warn(
|
||||
self.log.warn(
|
||||
"Site {}: No tokens exist, "
|
||||
"your site will never update".format(self.definition.title)
|
||||
"your site will never update".format(self.title)
|
||||
)
|
||||
|
||||
def process_alerts(self, alerts):
|
||||
def process_alerts(self, raw_alerts: List[Dict[str, Any]]):
|
||||
self.last_updated.now()
|
||||
|
||||
self.monitoring_is_down = False
|
||||
self._timeout.cancel()
|
||||
self._timeout = task.deferLater(
|
||||
reactor, self._timeout_seconds, self.monitoring_down
|
||||
)
|
||||
reactor, self._timeout_seconds, self.monitoring_down # type: ignore
|
||||
).addErrback(default_errback)
|
||||
|
||||
# Filter alerts for this site
|
||||
alerts = [
|
||||
import_alert(a)
|
||||
for a in alerts
|
||||
if a.get("labels", {}).get("adlermanager", "") == self.site_name
|
||||
]
|
||||
alerts: List[Alert] = []
|
||||
for ra in raw_alerts:
|
||||
if (
|
||||
ra.get("labels", {}).get("adlermanager", "") == self.site_name
|
||||
and ra.get("labels", {}).get("component", "")
|
||||
and ra.get("labels", {}).get("service", "")
|
||||
):
|
||||
alerts.append(Alert.import_alert(ra))
|
||||
|
||||
heartbeats, filtered_alerts = [], []
|
||||
heartbeats: List[Alert] = []
|
||||
filtered_alerts: List[Alert] = []
|
||||
for a in alerts:
|
||||
(heartbeats if a.labels.get("heartbeat") else filtered_alerts).append(a)
|
||||
|
||||
timestamp = self.last_updated.getStr()
|
||||
for manager in self.service_managers:
|
||||
manager.process_heartbeats(heartbeats, self.last_updated.getStr())
|
||||
manager.process_alerts(filtered_alerts, self.last_updated.getStr())
|
||||
manager.process_heartbeats(heartbeats, timestamp)
|
||||
manager.process_alerts(filtered_alerts, timestamp)
|
||||
|
||||
@property
|
||||
def status(self):
|
||||
|
@ -126,34 +122,39 @@ class SiteManager(object):
|
|||
|
||||
@attr.s
|
||||
class ServiceManager(object):
|
||||
path = attr.ib()
|
||||
definition = attr.ib()
|
||||
path: FilePath = attr.ib()
|
||||
definition: Dict[str, Any] = attr.ib()
|
||||
current_incident = attr.ib(default=None)
|
||||
component_labels = attr.ib(factory=list)
|
||||
component_labels: List[str] = attr.ib(factory=list)
|
||||
label = attr.ib(default="")
|
||||
|
||||
log = Logger()
|
||||
|
||||
def __attrs_post_init__(self):
|
||||
self.label = self.definition["label"]
|
||||
# TODO: Recover status after server restart
|
||||
self.component_labels = [
|
||||
component.label for component in self.definition.components
|
||||
component["label"]
|
||||
for component in cast(
|
||||
List[Dict[str, Any]], self.definition.get("components", [])
|
||||
)
|
||||
]
|
||||
|
||||
def monitoring_down(self, timestamp):
|
||||
def monitoring_down(self, timestamp: str) -> None:
|
||||
if self.current_incident:
|
||||
self.current_incident.monitoring_down(timestamp)
|
||||
|
||||
def process_heartbeats(self, heartbeats, timestamp):
|
||||
def process_heartbeats(self, heartbeats: List[Alert], timestamp: str):
|
||||
if self.current_incident:
|
||||
self.current_incident.process_heartbeats(heartbeats, timestamp)
|
||||
|
||||
def process_alerts(self, alerts, timestamp):
|
||||
def process_alerts(self, alerts: List[Alert], timestamp: str):
|
||||
# Filter by service-affecting alerts
|
||||
alerts = [
|
||||
alert
|
||||
for alert in alerts
|
||||
if alert.labels.service == self.definition.label
|
||||
and alert.labels.component in self.component_labels
|
||||
if alert.labels.get("service") == self.label
|
||||
and alert.labels.get("component") in self.component_labels
|
||||
]
|
||||
|
||||
if alerts and not self.current_incident:
|
||||
|
@ -167,7 +168,7 @@ class ServiceManager(object):
|
|||
if self.current_incident:
|
||||
self.current_incident.process_alerts(alerts, timestamp)
|
||||
|
||||
def resolve_incident(self):
|
||||
def resolve_incident(self) -> None:
|
||||
self.current_incident = None
|
||||
|
||||
@property
|
||||
|
@ -183,25 +184,23 @@ class ServiceManager(object):
|
|||
)
|
||||
return Severity.OK
|
||||
|
||||
@property
|
||||
def past_incidents(self):
|
||||
past = self.path.children().sort()
|
||||
if self.current_incident and self.current_incident.path in past:
|
||||
past.remove(self.current_incident.path)
|
||||
past.reverse() # Beautiful sleepless code poetry
|
||||
# TODO: get limit from config?
|
||||
return past[:5]
|
||||
# @property
|
||||
# def past_incidents(self):
|
||||
# past = self.path.children().sort()
|
||||
# if self.current_incident and self.current_incident.path in past:
|
||||
# past.remove(self.current_incident.path)
|
||||
# past.reverse() # Beautiful sleepless code poetry
|
||||
# # TODO: get limit from config?
|
||||
# return past[:5]
|
||||
|
||||
@property
|
||||
def components(self):
|
||||
def components(self) -> List[Dict[str, Any]]:
|
||||
return [
|
||||
Munch.fromDict(
|
||||
{
|
||||
"definition": component,
|
||||
"status": self.current_incident.component_status(component.label)
|
||||
if self.current_incident
|
||||
else Severity.OK,
|
||||
}
|
||||
)
|
||||
for component in self.definition.components
|
||||
{
|
||||
"definition": component,
|
||||
"status": self.current_incident.component_status(component["label"])
|
||||
if self.current_incident
|
||||
else Severity.OK,
|
||||
}
|
||||
for component in self.definition.get("components", [])
|
||||
]
|
||||
|
|
|
@ -1,6 +1,9 @@
|
|||
from twisted.web import server, resource
|
||||
from typing import Any, Dict
|
||||
|
||||
from twisted.internet import defer
|
||||
from twisted.web._responses import *
|
||||
from twisted.web import resource, server
|
||||
from twisted.web._responses import INTERNAL_SERVER_ERROR, OK, UNAUTHORIZED
|
||||
from twisted.web.server import Request
|
||||
|
||||
|
||||
class TokenResource(resource.Resource):
|
||||
|
@ -16,14 +19,14 @@ class TokenResource(resource.Resource):
|
|||
|
||||
HEADER = "Auth-Token"
|
||||
|
||||
def __init__(self, tokens=dict()):
|
||||
def __init__(self, tokens: Dict[str, Any] = dict()):
|
||||
"""
|
||||
@param tokens: The mapping of valid tokens to target objects.
|
||||
"""
|
||||
resource.Resource.__init__(self)
|
||||
self.tokens = tokens
|
||||
|
||||
def render(self, request):
|
||||
def render(self, request: Request):
|
||||
"""
|
||||
See L{resource.Resource}.
|
||||
|
||||
|
@ -38,13 +41,13 @@ class TokenResource(resource.Resource):
|
|||
if token_data is None:
|
||||
return self._unauthorized(request)
|
||||
|
||||
self._processToken(token_data, request)
|
||||
self._processToken(token_data, request).addCallback(lambda x: request.finish())
|
||||
return server.NOT_DONE_YET
|
||||
|
||||
def preprocess_header(self, header):
|
||||
def preprocess_header(self, header: str) -> str:
|
||||
return header
|
||||
|
||||
def processToken(self, token_data, request):
|
||||
def processToken(self, token_data: Any, request: Request) -> int:
|
||||
"""
|
||||
Process the token and write to request as needed.
|
||||
|
||||
|
@ -57,10 +60,10 @@ class TokenResource(resource.Resource):
|
|||
@param token_data: The object associated with the passed token.
|
||||
@param request: The request object associated to this request.
|
||||
"""
|
||||
pass
|
||||
return OK
|
||||
|
||||
@defer.inlineCallbacks
|
||||
def _processToken(self, token_data, request):
|
||||
def _processToken(self, token_data: Any, request: Request):
|
||||
"""
|
||||
Invoke L{TokenResource.processToken} and produce a 500 if it fails.
|
||||
|
||||
|
@ -68,24 +71,25 @@ class TokenResource(resource.Resource):
|
|||
@param request: The request object associated to this request.
|
||||
"""
|
||||
try:
|
||||
success = yield defer.maybeDeferred(self.processToken, token_data, request)
|
||||
except Exception as ex:
|
||||
code: int = yield defer.maybeDeferred(
|
||||
self.processToken, token_data, request
|
||||
)
|
||||
except Exception:
|
||||
import traceback
|
||||
|
||||
traceback.print_stack()
|
||||
success = False
|
||||
request.setResponseCode(OK if success else INTERNAL_SERVER_ERROR)
|
||||
request.finish()
|
||||
defer.returnValue(success)
|
||||
code = INTERNAL_SERVER_ERROR
|
||||
request.setResponseCode(code) # type: ignore
|
||||
defer.returnValue(code)
|
||||
|
||||
def _unauthorized(self, request):
|
||||
def _unauthorized(self, request: Request):
|
||||
"""
|
||||
Send a 401 Unauthorized response.
|
||||
|
||||
@param request: The request object associated to this request.
|
||||
"""
|
||||
request.setResponseCode(UNAUTHORIZED)
|
||||
return self.unauthorizedPage().render(request)
|
||||
request.setResponseCode(UNAUTHORIZED) # type: ignore
|
||||
return self.unauthorizedPage().render(request) # type: ignore
|
||||
|
||||
def unauthorizedPage(self):
|
||||
"""
|
||||
|
@ -102,7 +106,7 @@ class TokenResource(resource.Resource):
|
|||
"""
|
||||
return "Pass a valid token in the {} header.".format(self.HEADER)
|
||||
|
||||
def getChild(self, name, request):
|
||||
def getChild(self, name: str, request: Request):
|
||||
"""
|
||||
Use this child for everything but the explicitly overriden.
|
||||
|
||||
|
|
|
@ -1,16 +1,20 @@
|
|||
from klein import Klein
|
||||
from twisted.web import resource, static
|
||||
from twisted.python.filepath import FilePath
|
||||
from twisted.logger import Logger
|
||||
# pyright: reportUnusedFunction=false
|
||||
from typing import cast
|
||||
|
||||
import jinja2
|
||||
import markdown
|
||||
from klein import Klein
|
||||
from twisted.logger import Logger
|
||||
from twisted.python.filepath import FilePath
|
||||
from twisted.web import resource, static
|
||||
from twisted.web.server import Request
|
||||
|
||||
from .Config import Config
|
||||
from .AdlerManagerTokenResource import AdlerManagerTokenResource
|
||||
from .Config import Config
|
||||
from .SitesManager import SiteManager, SitesManager
|
||||
|
||||
|
||||
def get_jinja_env(supportDir):
|
||||
def get_jinja_env(supportDir: str):
|
||||
"""
|
||||
Return a L{jinja2.Environment} with templates loaded from:
|
||||
- Package
|
||||
|
@ -36,19 +40,21 @@ def get_jinja_env(supportDir):
|
|||
),
|
||||
autoescape=True,
|
||||
)
|
||||
templates.filters["markdown"] = lambda txt: jinja2.Markup(md.convert(txt))
|
||||
templates.filters["markdown"] = lambda txt: jinja2.Markup( # type: ignore
|
||||
md.convert(txt) # type: ignore
|
||||
)
|
||||
return templates
|
||||
|
||||
|
||||
def web_root(sites_manager):
|
||||
def web_root(sites_manager: "SitesManager"):
|
||||
app = Klein()
|
||||
log = Logger()
|
||||
|
||||
@app.route("/")
|
||||
def index(request):
|
||||
@app.route("/") # type: ignore
|
||||
def index(request: Request):
|
||||
try:
|
||||
host = request.getRequestHostname().decode("utf-8")
|
||||
except:
|
||||
host = cast(str, request.getRequestHostname().decode("utf-8"))
|
||||
except Exception:
|
||||
return resource.ErrorPage(
|
||||
400, "Bad cat", '<a href="http://http.cat/400">http://http.cat/400</a>'
|
||||
)
|
||||
|
@ -56,26 +62,30 @@ def web_root(sites_manager):
|
|||
return resource.ErrorPage(
|
||||
404, "Gone cat", '<a href="http://http.cat/404">http://http.cat/404</a>'
|
||||
)
|
||||
site: SiteManager
|
||||
try:
|
||||
site = sites_manager.site_managers[host]
|
||||
except Exception as ex:
|
||||
except Exception:
|
||||
import traceback
|
||||
|
||||
traceback.print_stack()
|
||||
log.failure("sad cat")
|
||||
return resource.ErrorPage(
|
||||
500, "Sad cat", '<a href="http://http.cat/500">http://http.cat/500</a>'
|
||||
)
|
||||
|
||||
site_path = FilePath(Config.data_dir).child("sites").child(host).path
|
||||
site_path = cast(str, FilePath(Config.data_dir).child("sites").child(host).path)
|
||||
templates = get_jinja_env(site_path)
|
||||
template = templates.get_template("template.j2")
|
||||
|
||||
return template.render(site=site)
|
||||
|
||||
@app.route("/api/v1/alerts", methods=["POST"])
|
||||
def alert_handler(request):
|
||||
@app.route("/api/v1/alerts", methods=["POST"]) # type: ignore
|
||||
def alert_handler(request: Request):
|
||||
return AdlerManagerTokenResource(sites_manager)
|
||||
|
||||
@app.route("/static", branch=True)
|
||||
def static_files(request):
|
||||
@app.route("/static", branch=True) # type: ignore
|
||||
def static_files(request: Request):
|
||||
return static.File(Config.web_static_dir)
|
||||
|
||||
return app.resource()
|
||||
|
|
|
@ -1,4 +1,4 @@
|
|||
from .Config import Config
|
||||
from .AdlerManagerTokenResource import AdlerManagerTokenResource
|
||||
from .Config import Config
|
||||
from .SitesManager import SitesManager
|
||||
from .WebRoot import web_root
|
||||
|
|
|
@ -1,17 +1,17 @@
|
|||
from twisted.cred import portal
|
||||
from twisted.conch.manhole_tap import chainedProtocolFactory
|
||||
from twisted.conch import interfaces as conchinterfaces
|
||||
from twisted.conch import manhole_ssh, recvline, avatar
|
||||
from twisted.conch.ssh import keys, session
|
||||
from twisted.conch.insults import insults
|
||||
from twisted.conch.checkers import IAuthorizedKeysDB, SSHPublicKeyChecker
|
||||
from twisted.conch.checkers import readAuthorizedKeyFile
|
||||
|
||||
from twisted.python import filepath
|
||||
|
||||
from zope.interface import implementer
|
||||
|
||||
from twisted.application import service, strports
|
||||
from twisted.conch import avatar, interfaces as conchinterfaces, manhole_ssh, recvline
|
||||
from twisted.conch.checkers import (
|
||||
IAuthorizedKeysDB,
|
||||
SSHPublicKeyChecker,
|
||||
readAuthorizedKeyFile,
|
||||
)
|
||||
from twisted.conch.insults import insults
|
||||
from twisted.conch.manhole_tap import chainedProtocolFactory
|
||||
from twisted.conch.ssh import keys, session
|
||||
from twisted.cred import portal
|
||||
from twisted.python import filepath
|
||||
|
||||
|
||||
class SSHSimpleProtocol(recvline.HistoricRecvLine):
|
||||
|
|
65
adlermanager/model.py
Normal file
65
adlermanager/model.py
Normal file
|
@ -0,0 +1,65 @@
|
|||
from datetime import datetime
|
||||
from enum import IntEnum
|
||||
from typing import Any, Dict, Optional, cast
|
||||
|
||||
import attr
|
||||
|
||||
from .utils import current_time, read_timestamp
|
||||
|
||||
|
||||
class Severity(IntEnum):
|
||||
OK = 0
|
||||
WARNING = 1
|
||||
ERROR = 2
|
||||
|
||||
@classmethod
|
||||
def from_string(cls, s: str) -> "Severity":
|
||||
# TODO: Do something sensitive with other priorities
|
||||
# At least document them somewhere :-D
|
||||
labels = {
|
||||
"ok": cls.OK,
|
||||
"warning": cls.WARNING,
|
||||
"error": cls.ERROR,
|
||||
"critical": cls.ERROR,
|
||||
}
|
||||
return labels[s.lower()]
|
||||
|
||||
@classmethod
|
||||
def from_alert(cls, alert: "Alert") -> "Severity":
|
||||
if alert.endsAt and alert.endsAt <= current_time():
|
||||
return Severity.OK
|
||||
return Severity.from_string(alert.labels.get("severity", "OK"))
|
||||
|
||||
@property
|
||||
def css(self):
|
||||
classes = {self.OK: "success", self.WARNING: "warning", self.ERROR: "danger"}
|
||||
return classes[self]
|
||||
|
||||
def __str__(self):
|
||||
return self.css
|
||||
|
||||
|
||||
@attr.s
|
||||
class Alert:
|
||||
labels: Dict[str, str] = attr.ib(factory=dict)
|
||||
annotations: Dict[str, str] = attr.ib(factory=dict)
|
||||
endsAt: Optional[datetime] = attr.ib(default=None)
|
||||
startsAt: Optional[datetime] = attr.ib(default=None)
|
||||
status: Severity = attr.ib(default=Severity.OK)
|
||||
|
||||
@classmethod
|
||||
def import_alert(cls, d: Dict[str, Any]) -> "Alert":
|
||||
# Convert date data types
|
||||
alert = Alert(
|
||||
labels=d.get("labels", dict()),
|
||||
annotations=d.get("annotations", dict()),
|
||||
)
|
||||
for att in ["startsAt", "endsAt"]:
|
||||
if att in d:
|
||||
try:
|
||||
setattr(alert, att, read_timestamp(cast(str, d[att])))
|
||||
except Exception:
|
||||
setattr(alert, att, None)
|
||||
# Convert severity (needs date data)
|
||||
alert.status = Severity.from_alert(alert)
|
||||
return alert
|
|
@ -1,6 +1,8 @@
|
|||
from datetime import datetime, timezone
|
||||
|
||||
import attr
|
||||
from twisted.internet import defer
|
||||
from twisted.python.failure import Failure
|
||||
from twisted.python.filepath import FilePath
|
||||
|
||||
_blessed_date_format = "%Y-%m-%dT%H:%M:%S%z"
|
||||
|
@ -39,3 +41,14 @@ def read_timestamp(s: str) -> datetime:
|
|||
|
||||
def ensure_dirs(path: FilePath) -> None:
|
||||
path.makedirs(ignoreExistingDirectory=True)
|
||||
|
||||
|
||||
def default_errback(failure: Failure) -> None:
|
||||
failure.trap(defer.CancelledError) # type: ignore
|
||||
pass
|
||||
|
||||
|
||||
def noop_deferred() -> defer.Deferred[None]:
|
||||
d: defer.Deferred[None] = defer.Deferred()
|
||||
d.addErrback(default_errback)
|
||||
return d
|
||||
|
|
8
app.py
8
app.py
|
@ -11,7 +11,8 @@ from twisted.python.filepath import FilePath
|
|||
# Add current dir to path. Ugly but it works.
|
||||
import sys
|
||||
import os
|
||||
sys.path += [ os.path.dirname(os.path.realpath(__file__)) ]
|
||||
|
||||
sys.path += [os.path.dirname(os.path.realpath(__file__))]
|
||||
|
||||
from adlermanager import Config, SitesManager
|
||||
from adlermanager import web_root
|
||||
|
@ -19,7 +20,7 @@ from adlermanager import web_root
|
|||
if not FilePath(Config.data_dir).isdir():
|
||||
FilePath(Config.data_dir).createDirectory()
|
||||
|
||||
application = service.Application('AdlerManager')
|
||||
application = service.Application("AdlerManager")
|
||||
serv_collection = service.IServiceCollection(application)
|
||||
|
||||
# TokenResource
|
||||
|
@ -43,5 +44,6 @@ if Config.ssh_enabled:
|
|||
Config.ssh_endpoint,
|
||||
proto=AdlerManagerSSHProtocol,
|
||||
keyDir=Config.ssh_keys_dir,
|
||||
keySize=Config.ssh_key_size)
|
||||
keySize=Config.ssh_key_size,
|
||||
)
|
||||
i.setServiceParent(serv_collection)
|
||||
|
|
|
@ -23,7 +23,8 @@ curl -X POST \
|
|||
"severity": "error"
|
||||
},
|
||||
"annotations": {
|
||||
"summary": "This is a mock alert. Hello world!"
|
||||
"summary": "Hello world!",
|
||||
"description": "This is a mock alert"
|
||||
}
|
||||
}
|
||||
]
|
||||
|
|
Loading…
Reference in a new issue