""" Monitoring models for comprehensive system observability """ import uuid import json from datetime import datetime, timedelta from typing import Dict, Any, Optional, List from decimal import Decimal from django.db import models from django.contrib.auth import get_user_model from django.core.validators import MinValueValidator, MaxValueValidator from django.utils import timezone from django.core.exceptions import ValidationError User = get_user_model() class MonitoringTarget(models.Model): """Target systems, services, or components to monitor""" TARGET_TYPES = [ ('APPLICATION', 'Application'), ('DATABASE', 'Database'), ('CACHE', 'Cache'), ('QUEUE', 'Message Queue'), ('EXTERNAL_API', 'External API'), ('SERVICE', 'Internal Service'), ('INFRASTRUCTURE', 'Infrastructure'), ('MODULE', 'Django Module'), ] STATUS_CHOICES = [ ('ACTIVE', 'Active'), ('INACTIVE', 'Inactive'), ('MAINTENANCE', 'Maintenance'), ('ERROR', 'Error'), ] id = models.UUIDField(primary_key=True, default=uuid.uuid4, editable=False) name = models.CharField(max_length=200, unique=True) description = models.TextField() target_type = models.CharField(max_length=20, choices=TARGET_TYPES) # Connection details endpoint_url = models.URLField(blank=True, null=True) connection_config = models.JSONField( default=dict, help_text="Connection configuration (credentials, timeouts, etc.)" ) # Monitoring configuration check_interval_seconds = models.PositiveIntegerField(default=60) timeout_seconds = models.PositiveIntegerField(default=30) retry_count = models.PositiveIntegerField(default=3) # Health check configuration health_check_enabled = models.BooleanField(default=True) health_check_endpoint = models.CharField(max_length=200, blank=True, null=True) expected_status_codes = models.JSONField( default=list, help_text="Expected HTTP status codes for health checks" ) # Status and metadata status = models.CharField(max_length=20, choices=STATUS_CHOICES, default='ACTIVE') last_checked = models.DateTimeField(null=True, blank=True) last_status = models.CharField(max_length=20, choices=[ ('HEALTHY', 'Healthy'), ('WARNING', 'Warning'), ('CRITICAL', 'Critical'), ('UNKNOWN', 'Unknown'), ], default='UNKNOWN') # Related module (if applicable) related_module = models.CharField( max_length=50, blank=True, null=True, help_text="Related Django module (e.g., 'security', 'incident_intelligence')" ) # Metadata created_by = models.ForeignKey(User, on_delete=models.SET_NULL, null=True) created_at = models.DateTimeField(auto_now_add=True) updated_at = models.DateTimeField(auto_now=True) class Meta: ordering = ['name'] indexes = [ models.Index(fields=['target_type', 'status']), models.Index(fields=['related_module']), models.Index(fields=['last_checked']), ] def __str__(self): return f"{self.name} ({self.target_type})" class HealthCheck(models.Model): """Individual health check results""" CHECK_TYPES = [ ('HTTP', 'HTTP Health Check'), ('DATABASE', 'Database Connection'), ('CACHE', 'Cache Connection'), ('QUEUE', 'Message Queue'), ('CUSTOM', 'Custom Check'), ('PING', 'Network Ping'), ('SSL', 'SSL Certificate'), ] STATUS_CHOICES = [ ('HEALTHY', 'Healthy'), ('WARNING', 'Warning'), ('CRITICAL', 'Critical'), ('UNKNOWN', 'Unknown'), ] id = models.UUIDField(primary_key=True, default=uuid.uuid4, editable=False) target = models.ForeignKey(MonitoringTarget, on_delete=models.CASCADE, related_name='health_checks') # Check details check_type = models.CharField(max_length=20, choices=CHECK_TYPES) status = models.CharField(max_length=20, choices=STATUS_CHOICES) response_time_ms = models.PositiveIntegerField(null=True, blank=True) # Response details status_code = models.PositiveIntegerField(null=True, blank=True) response_body = models.TextField(blank=True, null=True) error_message = models.TextField(blank=True, null=True) # Metrics cpu_usage_percent = models.FloatField(null=True, blank=True) memory_usage_percent = models.FloatField(null=True, blank=True) disk_usage_percent = models.FloatField(null=True, blank=True) # Timestamps checked_at = models.DateTimeField(auto_now_add=True) class Meta: ordering = ['-checked_at'] indexes = [ models.Index(fields=['target', 'checked_at']), models.Index(fields=['status', 'checked_at']), models.Index(fields=['check_type']), ] def __str__(self): return f"{self.target.name} - {self.status} ({self.checked_at})" class SystemMetric(models.Model): """System performance and operational metrics""" METRIC_TYPES = [ ('PERFORMANCE', 'Performance Metric'), ('BUSINESS', 'Business Metric'), ('SECURITY', 'Security Metric'), ('INFRASTRUCTURE', 'Infrastructure Metric'), ('CUSTOM', 'Custom Metric'), ] METRIC_CATEGORIES = [ ('API_RESPONSE_TIME', 'API Response Time'), ('THROUGHPUT', 'Throughput'), ('ERROR_RATE', 'Error Rate'), ('AVAILABILITY', 'Availability'), ('INCIDENT_COUNT', 'Incident Count'), ('MTTR', 'Mean Time to Resolve'), ('MTTA', 'Mean Time to Acknowledge'), ('SLA_COMPLIANCE', 'SLA Compliance'), ('SECURITY_EVENTS', 'Security Events'), ('AUTOMATION_SUCCESS', 'Automation Success Rate'), ('AI_ACCURACY', 'AI Model Accuracy'), ('COST_IMPACT', 'Cost Impact'), ('USER_ACTIVITY', 'User Activity'), ('SYSTEM_RESOURCES', 'System Resources'), ] id = models.UUIDField(primary_key=True, default=uuid.uuid4, editable=False) name = models.CharField(max_length=200) description = models.TextField() metric_type = models.CharField(max_length=20, choices=METRIC_TYPES) category = models.CharField(max_length=30, choices=METRIC_CATEGORIES) # Metric configuration unit = models.CharField(max_length=50, help_text="Unit of measurement") aggregation_method = models.CharField( max_length=20, choices=[ ('AVERAGE', 'Average'), ('SUM', 'Sum'), ('COUNT', 'Count'), ('MIN', 'Minimum'), ('MAX', 'Maximum'), ('PERCENTILE_95', '95th Percentile'), ('PERCENTILE_99', '99th Percentile'), ] ) # Collection configuration collection_interval_seconds = models.PositiveIntegerField(default=300) # 5 minutes retention_days = models.PositiveIntegerField(default=90) # Thresholds warning_threshold = models.FloatField(null=True, blank=True) critical_threshold = models.FloatField(null=True, blank=True) # Status is_active = models.BooleanField(default=True) is_system_metric = models.BooleanField(default=False) # Related module related_module = models.CharField( max_length=50, blank=True, null=True, help_text="Related Django module" ) # Metadata created_by = models.ForeignKey(User, on_delete=models.SET_NULL, null=True) created_at = models.DateTimeField(auto_now_add=True) updated_at = models.DateTimeField(auto_now=True) class Meta: ordering = ['name'] indexes = [ models.Index(fields=['metric_type', 'category']), models.Index(fields=['related_module']), models.Index(fields=['is_active']), ] def __str__(self): return f"{self.name} ({self.category})" class MetricMeasurement(models.Model): """Individual metric measurements""" id = models.UUIDField(primary_key=True, default=uuid.uuid4, editable=False) metric = models.ForeignKey(SystemMetric, on_delete=models.CASCADE, related_name='measurements') # Measurement details value = models.DecimalField(max_digits=15, decimal_places=4) timestamp = models.DateTimeField(auto_now_add=True) # Context tags = models.JSONField( default=dict, help_text="Additional tags for this measurement" ) metadata = models.JSONField( default=dict, help_text="Additional metadata" ) class Meta: ordering = ['-timestamp'] indexes = [ models.Index(fields=['metric', 'timestamp']), models.Index(fields=['timestamp']), ] def __str__(self): return f"{self.metric.name}: {self.value} ({self.timestamp})" class AlertRule(models.Model): """Alert rules for monitoring thresholds""" ALERT_TYPES = [ ('THRESHOLD', 'Threshold Alert'), ('ANOMALY', 'Anomaly Alert'), ('PATTERN', 'Pattern Alert'), ('AVAILABILITY', 'Availability Alert'), ('PERFORMANCE', 'Performance Alert'), ] SEVERITY_CHOICES = [ ('LOW', 'Low'), ('MEDIUM', 'Medium'), ('HIGH', 'High'), ('CRITICAL', 'Critical'), ] STATUS_CHOICES = [ ('ACTIVE', 'Active'), ('INACTIVE', 'Inactive'), ('MAINTENANCE', 'Maintenance'), ] id = models.UUIDField(primary_key=True, default=uuid.uuid4, editable=False) name = models.CharField(max_length=200) description = models.TextField() alert_type = models.CharField(max_length=20, choices=ALERT_TYPES) severity = models.CharField(max_length=20, choices=SEVERITY_CHOICES) # Rule configuration condition = models.JSONField( help_text="Alert condition configuration" ) evaluation_interval_seconds = models.PositiveIntegerField(default=60) # Related objects metric = models.ForeignKey( SystemMetric, on_delete=models.CASCADE, null=True, blank=True, related_name='alert_rules' ) target = models.ForeignKey( MonitoringTarget, on_delete=models.CASCADE, null=True, blank=True, related_name='alert_rules' ) # Notification configuration notification_channels = models.JSONField( default=list, help_text="List of notification channels (email, slack, webhook, etc.)" ) notification_template = models.TextField( blank=True, null=True, help_text="Custom notification template" ) # Status status = models.CharField(max_length=20, choices=STATUS_CHOICES, default='ACTIVE') is_enabled = models.BooleanField(default=True) # Metadata created_by = models.ForeignKey(User, on_delete=models.SET_NULL, null=True) created_at = models.DateTimeField(auto_now_add=True) updated_at = models.DateTimeField(auto_now=True) class Meta: ordering = ['name'] indexes = [ models.Index(fields=['alert_type', 'severity']), models.Index(fields=['status', 'is_enabled']), ] def __str__(self): return f"{self.name} ({self.severity})" class Alert(models.Model): """Alert instances""" STATUS_CHOICES = [ ('TRIGGERED', 'Triggered'), ('ACKNOWLEDGED', 'Acknowledged'), ('RESOLVED', 'Resolved'), ('SUPPRESSED', 'Suppressed'), ] id = models.UUIDField(primary_key=True, default=uuid.uuid4, editable=False) rule = models.ForeignKey(AlertRule, on_delete=models.CASCADE, related_name='alerts') # Alert details title = models.CharField(max_length=200) description = models.TextField() severity = models.CharField(max_length=20, choices=AlertRule.SEVERITY_CHOICES) status = models.CharField(max_length=20, choices=STATUS_CHOICES, default='TRIGGERED') # Context triggered_value = models.DecimalField(max_digits=15, decimal_places=4, null=True, blank=True) threshold_value = models.DecimalField(max_digits=15, decimal_places=4, null=True, blank=True) context_data = models.JSONField( default=dict, help_text="Additional context data for the alert" ) # Timestamps triggered_at = models.DateTimeField(auto_now_add=True) acknowledged_at = models.DateTimeField(null=True, blank=True) resolved_at = models.DateTimeField(null=True, blank=True) # Assignment acknowledged_by = models.ForeignKey( User, on_delete=models.SET_NULL, null=True, blank=True, related_name='acknowledged_alerts' ) resolved_by = models.ForeignKey( User, on_delete=models.SET_NULL, null=True, blank=True, related_name='resolved_alerts' ) class Meta: ordering = ['-triggered_at'] indexes = [ models.Index(fields=['rule', 'status']), models.Index(fields=['severity', 'status']), models.Index(fields=['triggered_at']), ] def __str__(self): return f"{self.title} ({self.severity}) - {self.status}" class MonitoringDashboard(models.Model): """Monitoring dashboard configurations""" DASHBOARD_TYPES = [ ('SYSTEM_OVERVIEW', 'System Overview'), ('PERFORMANCE', 'Performance'), ('BUSINESS_METRICS', 'Business Metrics'), ('SECURITY', 'Security'), ('INFRASTRUCTURE', 'Infrastructure'), ('CUSTOM', 'Custom'), ] id = models.UUIDField(primary_key=True, default=uuid.uuid4, editable=False) name = models.CharField(max_length=200) description = models.TextField() dashboard_type = models.CharField(max_length=20, choices=DASHBOARD_TYPES) # Dashboard configuration layout_config = models.JSONField( default=dict, help_text="Dashboard layout configuration" ) widget_configs = models.JSONField( default=list, help_text="Configuration for dashboard widgets" ) # Access control is_public = models.BooleanField(default=False) allowed_users = models.ManyToManyField( User, blank=True, related_name='accessible_monitoring_dashboards' ) allowed_roles = models.JSONField( default=list, help_text="List of roles that can access this dashboard" ) # Refresh configuration auto_refresh_enabled = models.BooleanField(default=True) refresh_interval_seconds = models.PositiveIntegerField(default=30) # Status is_active = models.BooleanField(default=True) created_by = models.ForeignKey(User, on_delete=models.SET_NULL, null=True) created_at = models.DateTimeField(auto_now_add=True) updated_at = models.DateTimeField(auto_now=True) class Meta: ordering = ['name'] indexes = [ models.Index(fields=['dashboard_type', 'is_active']), models.Index(fields=['is_public']), ] def __str__(self): return f"{self.name} ({self.dashboard_type})" class SystemStatus(models.Model): """Overall system status tracking""" STATUS_CHOICES = [ ('OPERATIONAL', 'Operational'), ('DEGRADED', 'Degraded'), ('PARTIAL_OUTAGE', 'Partial Outage'), ('MAJOR_OUTAGE', 'Major Outage'), ('MAINTENANCE', 'Maintenance'), ] id = models.UUIDField(primary_key=True, default=uuid.uuid4, editable=False) status = models.CharField(max_length=20, choices=STATUS_CHOICES) message = models.TextField(help_text="Status message for users") # Impact details affected_services = models.JSONField( default=list, help_text="List of affected services" ) estimated_resolution = models.DateTimeField(null=True, blank=True) # Timestamps started_at = models.DateTimeField(auto_now_add=True) updated_at = models.DateTimeField(auto_now=True) resolved_at = models.DateTimeField(null=True, blank=True) # Metadata created_by = models.ForeignKey(User, on_delete=models.SET_NULL, null=True) class Meta: ordering = ['-started_at'] indexes = [ models.Index(fields=['status', 'started_at']), models.Index(fields=['started_at']), ] def __str__(self): return f"System Status: {self.status} ({self.started_at})" @property def is_resolved(self): return self.resolved_at is not None