516 lines
16 KiB
Python
516 lines
16 KiB
Python
"""
|
|
Monitoring models for comprehensive system observability
|
|
"""
|
|
import uuid
|
|
import json
|
|
from datetime import datetime, timedelta
|
|
from typing import Dict, Any, Optional, List
|
|
from decimal import Decimal
|
|
|
|
from django.db import models
|
|
from django.contrib.auth import get_user_model
|
|
from django.core.validators import MinValueValidator, MaxValueValidator
|
|
from django.utils import timezone
|
|
from django.core.exceptions import ValidationError
|
|
|
|
User = get_user_model()
|
|
|
|
|
|
class MonitoringTarget(models.Model):
|
|
"""Target systems, services, or components to monitor"""
|
|
|
|
TARGET_TYPES = [
|
|
('APPLICATION', 'Application'),
|
|
('DATABASE', 'Database'),
|
|
('CACHE', 'Cache'),
|
|
('QUEUE', 'Message Queue'),
|
|
('EXTERNAL_API', 'External API'),
|
|
('SERVICE', 'Internal Service'),
|
|
('INFRASTRUCTURE', 'Infrastructure'),
|
|
('MODULE', 'Django Module'),
|
|
]
|
|
|
|
STATUS_CHOICES = [
|
|
('ACTIVE', 'Active'),
|
|
('INACTIVE', 'Inactive'),
|
|
('MAINTENANCE', 'Maintenance'),
|
|
('ERROR', 'Error'),
|
|
]
|
|
|
|
id = models.UUIDField(primary_key=True, default=uuid.uuid4, editable=False)
|
|
name = models.CharField(max_length=200, unique=True)
|
|
description = models.TextField()
|
|
target_type = models.CharField(max_length=20, choices=TARGET_TYPES)
|
|
|
|
# Connection details
|
|
endpoint_url = models.URLField(blank=True, null=True)
|
|
connection_config = models.JSONField(
|
|
default=dict,
|
|
help_text="Connection configuration (credentials, timeouts, etc.)"
|
|
)
|
|
|
|
# Monitoring configuration
|
|
check_interval_seconds = models.PositiveIntegerField(default=60)
|
|
timeout_seconds = models.PositiveIntegerField(default=30)
|
|
retry_count = models.PositiveIntegerField(default=3)
|
|
|
|
# Health check configuration
|
|
health_check_enabled = models.BooleanField(default=True)
|
|
health_check_endpoint = models.CharField(max_length=200, blank=True, null=True)
|
|
expected_status_codes = models.JSONField(
|
|
default=list,
|
|
help_text="Expected HTTP status codes for health checks"
|
|
)
|
|
|
|
# Status and metadata
|
|
status = models.CharField(max_length=20, choices=STATUS_CHOICES, default='ACTIVE')
|
|
last_checked = models.DateTimeField(null=True, blank=True)
|
|
last_status = models.CharField(max_length=20, choices=[
|
|
('HEALTHY', 'Healthy'),
|
|
('WARNING', 'Warning'),
|
|
('CRITICAL', 'Critical'),
|
|
('UNKNOWN', 'Unknown'),
|
|
], default='UNKNOWN')
|
|
|
|
# Related module (if applicable)
|
|
related_module = models.CharField(
|
|
max_length=50,
|
|
blank=True,
|
|
null=True,
|
|
help_text="Related Django module (e.g., 'security', 'incident_intelligence')"
|
|
)
|
|
|
|
# Metadata
|
|
created_by = models.ForeignKey(User, on_delete=models.SET_NULL, null=True)
|
|
created_at = models.DateTimeField(auto_now_add=True)
|
|
updated_at = models.DateTimeField(auto_now=True)
|
|
|
|
class Meta:
|
|
ordering = ['name']
|
|
indexes = [
|
|
models.Index(fields=['target_type', 'status']),
|
|
models.Index(fields=['related_module']),
|
|
models.Index(fields=['last_checked']),
|
|
]
|
|
|
|
def __str__(self):
|
|
return f"{self.name} ({self.target_type})"
|
|
|
|
|
|
class HealthCheck(models.Model):
|
|
"""Individual health check results"""
|
|
|
|
CHECK_TYPES = [
|
|
('HTTP', 'HTTP Health Check'),
|
|
('DATABASE', 'Database Connection'),
|
|
('CACHE', 'Cache Connection'),
|
|
('QUEUE', 'Message Queue'),
|
|
('CUSTOM', 'Custom Check'),
|
|
('PING', 'Network Ping'),
|
|
('SSL', 'SSL Certificate'),
|
|
]
|
|
|
|
STATUS_CHOICES = [
|
|
('HEALTHY', 'Healthy'),
|
|
('WARNING', 'Warning'),
|
|
('CRITICAL', 'Critical'),
|
|
('UNKNOWN', 'Unknown'),
|
|
]
|
|
|
|
id = models.UUIDField(primary_key=True, default=uuid.uuid4, editable=False)
|
|
target = models.ForeignKey(MonitoringTarget, on_delete=models.CASCADE, related_name='health_checks')
|
|
|
|
# Check details
|
|
check_type = models.CharField(max_length=20, choices=CHECK_TYPES)
|
|
status = models.CharField(max_length=20, choices=STATUS_CHOICES)
|
|
response_time_ms = models.PositiveIntegerField(null=True, blank=True)
|
|
|
|
# Response details
|
|
status_code = models.PositiveIntegerField(null=True, blank=True)
|
|
response_body = models.TextField(blank=True, null=True)
|
|
error_message = models.TextField(blank=True, null=True)
|
|
|
|
# Metrics
|
|
cpu_usage_percent = models.FloatField(null=True, blank=True)
|
|
memory_usage_percent = models.FloatField(null=True, blank=True)
|
|
disk_usage_percent = models.FloatField(null=True, blank=True)
|
|
|
|
# Timestamps
|
|
checked_at = models.DateTimeField(auto_now_add=True)
|
|
|
|
class Meta:
|
|
ordering = ['-checked_at']
|
|
indexes = [
|
|
models.Index(fields=['target', 'checked_at']),
|
|
models.Index(fields=['status', 'checked_at']),
|
|
models.Index(fields=['check_type']),
|
|
]
|
|
|
|
def __str__(self):
|
|
return f"{self.target.name} - {self.status} ({self.checked_at})"
|
|
|
|
|
|
class SystemMetric(models.Model):
|
|
"""System performance and operational metrics"""
|
|
|
|
METRIC_TYPES = [
|
|
('PERFORMANCE', 'Performance Metric'),
|
|
('BUSINESS', 'Business Metric'),
|
|
('SECURITY', 'Security Metric'),
|
|
('INFRASTRUCTURE', 'Infrastructure Metric'),
|
|
('CUSTOM', 'Custom Metric'),
|
|
]
|
|
|
|
METRIC_CATEGORIES = [
|
|
('API_RESPONSE_TIME', 'API Response Time'),
|
|
('THROUGHPUT', 'Throughput'),
|
|
('ERROR_RATE', 'Error Rate'),
|
|
('AVAILABILITY', 'Availability'),
|
|
('INCIDENT_COUNT', 'Incident Count'),
|
|
('MTTR', 'Mean Time to Resolve'),
|
|
('MTTA', 'Mean Time to Acknowledge'),
|
|
('SLA_COMPLIANCE', 'SLA Compliance'),
|
|
('SECURITY_EVENTS', 'Security Events'),
|
|
('AUTOMATION_SUCCESS', 'Automation Success Rate'),
|
|
('AI_ACCURACY', 'AI Model Accuracy'),
|
|
('COST_IMPACT', 'Cost Impact'),
|
|
('USER_ACTIVITY', 'User Activity'),
|
|
('SYSTEM_RESOURCES', 'System Resources'),
|
|
]
|
|
|
|
id = models.UUIDField(primary_key=True, default=uuid.uuid4, editable=False)
|
|
name = models.CharField(max_length=200)
|
|
description = models.TextField()
|
|
metric_type = models.CharField(max_length=20, choices=METRIC_TYPES)
|
|
category = models.CharField(max_length=30, choices=METRIC_CATEGORIES)
|
|
|
|
# Metric configuration
|
|
unit = models.CharField(max_length=50, help_text="Unit of measurement")
|
|
aggregation_method = models.CharField(
|
|
max_length=20,
|
|
choices=[
|
|
('AVERAGE', 'Average'),
|
|
('SUM', 'Sum'),
|
|
('COUNT', 'Count'),
|
|
('MIN', 'Minimum'),
|
|
('MAX', 'Maximum'),
|
|
('PERCENTILE_95', '95th Percentile'),
|
|
('PERCENTILE_99', '99th Percentile'),
|
|
]
|
|
)
|
|
|
|
# Collection configuration
|
|
collection_interval_seconds = models.PositiveIntegerField(default=300) # 5 minutes
|
|
retention_days = models.PositiveIntegerField(default=90)
|
|
|
|
# Thresholds
|
|
warning_threshold = models.FloatField(null=True, blank=True)
|
|
critical_threshold = models.FloatField(null=True, blank=True)
|
|
|
|
# Status
|
|
is_active = models.BooleanField(default=True)
|
|
is_system_metric = models.BooleanField(default=False)
|
|
|
|
# Related module
|
|
related_module = models.CharField(
|
|
max_length=50,
|
|
blank=True,
|
|
null=True,
|
|
help_text="Related Django module"
|
|
)
|
|
|
|
# Metadata
|
|
created_by = models.ForeignKey(User, on_delete=models.SET_NULL, null=True)
|
|
created_at = models.DateTimeField(auto_now_add=True)
|
|
updated_at = models.DateTimeField(auto_now=True)
|
|
|
|
class Meta:
|
|
ordering = ['name']
|
|
indexes = [
|
|
models.Index(fields=['metric_type', 'category']),
|
|
models.Index(fields=['related_module']),
|
|
models.Index(fields=['is_active']),
|
|
]
|
|
|
|
def __str__(self):
|
|
return f"{self.name} ({self.category})"
|
|
|
|
|
|
class MetricMeasurement(models.Model):
|
|
"""Individual metric measurements"""
|
|
|
|
id = models.UUIDField(primary_key=True, default=uuid.uuid4, editable=False)
|
|
metric = models.ForeignKey(SystemMetric, on_delete=models.CASCADE, related_name='measurements')
|
|
|
|
# Measurement details
|
|
value = models.DecimalField(max_digits=15, decimal_places=4)
|
|
timestamp = models.DateTimeField(auto_now_add=True)
|
|
|
|
# Context
|
|
tags = models.JSONField(
|
|
default=dict,
|
|
help_text="Additional tags for this measurement"
|
|
)
|
|
metadata = models.JSONField(
|
|
default=dict,
|
|
help_text="Additional metadata"
|
|
)
|
|
|
|
class Meta:
|
|
ordering = ['-timestamp']
|
|
indexes = [
|
|
models.Index(fields=['metric', 'timestamp']),
|
|
models.Index(fields=['timestamp']),
|
|
]
|
|
|
|
def __str__(self):
|
|
return f"{self.metric.name}: {self.value} ({self.timestamp})"
|
|
|
|
|
|
class AlertRule(models.Model):
|
|
"""Alert rules for monitoring thresholds"""
|
|
|
|
ALERT_TYPES = [
|
|
('THRESHOLD', 'Threshold Alert'),
|
|
('ANOMALY', 'Anomaly Alert'),
|
|
('PATTERN', 'Pattern Alert'),
|
|
('AVAILABILITY', 'Availability Alert'),
|
|
('PERFORMANCE', 'Performance Alert'),
|
|
]
|
|
|
|
SEVERITY_CHOICES = [
|
|
('LOW', 'Low'),
|
|
('MEDIUM', 'Medium'),
|
|
('HIGH', 'High'),
|
|
('CRITICAL', 'Critical'),
|
|
]
|
|
|
|
STATUS_CHOICES = [
|
|
('ACTIVE', 'Active'),
|
|
('INACTIVE', 'Inactive'),
|
|
('MAINTENANCE', 'Maintenance'),
|
|
]
|
|
|
|
id = models.UUIDField(primary_key=True, default=uuid.uuid4, editable=False)
|
|
name = models.CharField(max_length=200)
|
|
description = models.TextField()
|
|
alert_type = models.CharField(max_length=20, choices=ALERT_TYPES)
|
|
severity = models.CharField(max_length=20, choices=SEVERITY_CHOICES)
|
|
|
|
# Rule configuration
|
|
condition = models.JSONField(
|
|
help_text="Alert condition configuration"
|
|
)
|
|
evaluation_interval_seconds = models.PositiveIntegerField(default=60)
|
|
|
|
# Related objects
|
|
metric = models.ForeignKey(
|
|
SystemMetric,
|
|
on_delete=models.CASCADE,
|
|
null=True,
|
|
blank=True,
|
|
related_name='alert_rules'
|
|
)
|
|
target = models.ForeignKey(
|
|
MonitoringTarget,
|
|
on_delete=models.CASCADE,
|
|
null=True,
|
|
blank=True,
|
|
related_name='alert_rules'
|
|
)
|
|
|
|
# Notification configuration
|
|
notification_channels = models.JSONField(
|
|
default=list,
|
|
help_text="List of notification channels (email, slack, webhook, etc.)"
|
|
)
|
|
notification_template = models.TextField(
|
|
blank=True,
|
|
null=True,
|
|
help_text="Custom notification template"
|
|
)
|
|
|
|
# Status
|
|
status = models.CharField(max_length=20, choices=STATUS_CHOICES, default='ACTIVE')
|
|
is_enabled = models.BooleanField(default=True)
|
|
|
|
# Metadata
|
|
created_by = models.ForeignKey(User, on_delete=models.SET_NULL, null=True)
|
|
created_at = models.DateTimeField(auto_now_add=True)
|
|
updated_at = models.DateTimeField(auto_now=True)
|
|
|
|
class Meta:
|
|
ordering = ['name']
|
|
indexes = [
|
|
models.Index(fields=['alert_type', 'severity']),
|
|
models.Index(fields=['status', 'is_enabled']),
|
|
]
|
|
|
|
def __str__(self):
|
|
return f"{self.name} ({self.severity})"
|
|
|
|
|
|
class Alert(models.Model):
|
|
"""Alert instances"""
|
|
|
|
STATUS_CHOICES = [
|
|
('TRIGGERED', 'Triggered'),
|
|
('ACKNOWLEDGED', 'Acknowledged'),
|
|
('RESOLVED', 'Resolved'),
|
|
('SUPPRESSED', 'Suppressed'),
|
|
]
|
|
|
|
id = models.UUIDField(primary_key=True, default=uuid.uuid4, editable=False)
|
|
rule = models.ForeignKey(AlertRule, on_delete=models.CASCADE, related_name='alerts')
|
|
|
|
# Alert details
|
|
title = models.CharField(max_length=200)
|
|
description = models.TextField()
|
|
severity = models.CharField(max_length=20, choices=AlertRule.SEVERITY_CHOICES)
|
|
status = models.CharField(max_length=20, choices=STATUS_CHOICES, default='TRIGGERED')
|
|
|
|
# Context
|
|
triggered_value = models.DecimalField(max_digits=15, decimal_places=4, null=True, blank=True)
|
|
threshold_value = models.DecimalField(max_digits=15, decimal_places=4, null=True, blank=True)
|
|
context_data = models.JSONField(
|
|
default=dict,
|
|
help_text="Additional context data for the alert"
|
|
)
|
|
|
|
# Timestamps
|
|
triggered_at = models.DateTimeField(auto_now_add=True)
|
|
acknowledged_at = models.DateTimeField(null=True, blank=True)
|
|
resolved_at = models.DateTimeField(null=True, blank=True)
|
|
|
|
# Assignment
|
|
acknowledged_by = models.ForeignKey(
|
|
User,
|
|
on_delete=models.SET_NULL,
|
|
null=True,
|
|
blank=True,
|
|
related_name='acknowledged_alerts'
|
|
)
|
|
resolved_by = models.ForeignKey(
|
|
User,
|
|
on_delete=models.SET_NULL,
|
|
null=True,
|
|
blank=True,
|
|
related_name='resolved_alerts'
|
|
)
|
|
|
|
class Meta:
|
|
ordering = ['-triggered_at']
|
|
indexes = [
|
|
models.Index(fields=['rule', 'status']),
|
|
models.Index(fields=['severity', 'status']),
|
|
models.Index(fields=['triggered_at']),
|
|
]
|
|
|
|
def __str__(self):
|
|
return f"{self.title} ({self.severity}) - {self.status}"
|
|
|
|
|
|
class MonitoringDashboard(models.Model):
|
|
"""Monitoring dashboard configurations"""
|
|
|
|
DASHBOARD_TYPES = [
|
|
('SYSTEM_OVERVIEW', 'System Overview'),
|
|
('PERFORMANCE', 'Performance'),
|
|
('BUSINESS_METRICS', 'Business Metrics'),
|
|
('SECURITY', 'Security'),
|
|
('INFRASTRUCTURE', 'Infrastructure'),
|
|
('CUSTOM', 'Custom'),
|
|
]
|
|
|
|
id = models.UUIDField(primary_key=True, default=uuid.uuid4, editable=False)
|
|
name = models.CharField(max_length=200)
|
|
description = models.TextField()
|
|
dashboard_type = models.CharField(max_length=20, choices=DASHBOARD_TYPES)
|
|
|
|
# Dashboard configuration
|
|
layout_config = models.JSONField(
|
|
default=dict,
|
|
help_text="Dashboard layout configuration"
|
|
)
|
|
widget_configs = models.JSONField(
|
|
default=list,
|
|
help_text="Configuration for dashboard widgets"
|
|
)
|
|
|
|
# Access control
|
|
is_public = models.BooleanField(default=False)
|
|
allowed_users = models.ManyToManyField(
|
|
User,
|
|
blank=True,
|
|
related_name='accessible_monitoring_dashboards'
|
|
)
|
|
allowed_roles = models.JSONField(
|
|
default=list,
|
|
help_text="List of roles that can access this dashboard"
|
|
)
|
|
|
|
# Refresh configuration
|
|
auto_refresh_enabled = models.BooleanField(default=True)
|
|
refresh_interval_seconds = models.PositiveIntegerField(default=30)
|
|
|
|
# Status
|
|
is_active = models.BooleanField(default=True)
|
|
created_by = models.ForeignKey(User, on_delete=models.SET_NULL, null=True)
|
|
created_at = models.DateTimeField(auto_now_add=True)
|
|
updated_at = models.DateTimeField(auto_now=True)
|
|
|
|
class Meta:
|
|
ordering = ['name']
|
|
indexes = [
|
|
models.Index(fields=['dashboard_type', 'is_active']),
|
|
models.Index(fields=['is_public']),
|
|
]
|
|
|
|
def __str__(self):
|
|
return f"{self.name} ({self.dashboard_type})"
|
|
|
|
|
|
class SystemStatus(models.Model):
|
|
"""Overall system status tracking"""
|
|
|
|
STATUS_CHOICES = [
|
|
('OPERATIONAL', 'Operational'),
|
|
('DEGRADED', 'Degraded'),
|
|
('PARTIAL_OUTAGE', 'Partial Outage'),
|
|
('MAJOR_OUTAGE', 'Major Outage'),
|
|
('MAINTENANCE', 'Maintenance'),
|
|
]
|
|
|
|
id = models.UUIDField(primary_key=True, default=uuid.uuid4, editable=False)
|
|
status = models.CharField(max_length=20, choices=STATUS_CHOICES)
|
|
message = models.TextField(help_text="Status message for users")
|
|
|
|
# Impact details
|
|
affected_services = models.JSONField(
|
|
default=list,
|
|
help_text="List of affected services"
|
|
)
|
|
estimated_resolution = models.DateTimeField(null=True, blank=True)
|
|
|
|
# Timestamps
|
|
started_at = models.DateTimeField(auto_now_add=True)
|
|
updated_at = models.DateTimeField(auto_now=True)
|
|
resolved_at = models.DateTimeField(null=True, blank=True)
|
|
|
|
# Metadata
|
|
created_by = models.ForeignKey(User, on_delete=models.SET_NULL, null=True)
|
|
|
|
class Meta:
|
|
ordering = ['-started_at']
|
|
indexes = [
|
|
models.Index(fields=['status', 'started_at']),
|
|
models.Index(fields=['started_at']),
|
|
]
|
|
|
|
def __str__(self):
|
|
return f"System Status: {self.status} ({self.started_at})"
|
|
|
|
@property
|
|
def is_resolved(self):
|
|
return self.resolved_at is not None
|