Updates
This commit is contained in:
515
ETB-API/monitoring/models.py
Normal file
515
ETB-API/monitoring/models.py
Normal file
@@ -0,0 +1,515 @@
|
||||
"""
|
||||
Monitoring models for comprehensive system observability
|
||||
"""
|
||||
import uuid
|
||||
import json
|
||||
from datetime import datetime, timedelta
|
||||
from typing import Dict, Any, Optional, List
|
||||
from decimal import Decimal
|
||||
|
||||
from django.db import models
|
||||
from django.contrib.auth import get_user_model
|
||||
from django.core.validators import MinValueValidator, MaxValueValidator
|
||||
from django.utils import timezone
|
||||
from django.core.exceptions import ValidationError
|
||||
|
||||
User = get_user_model()
|
||||
|
||||
|
||||
class MonitoringTarget(models.Model):
|
||||
"""Target systems, services, or components to monitor"""
|
||||
|
||||
TARGET_TYPES = [
|
||||
('APPLICATION', 'Application'),
|
||||
('DATABASE', 'Database'),
|
||||
('CACHE', 'Cache'),
|
||||
('QUEUE', 'Message Queue'),
|
||||
('EXTERNAL_API', 'External API'),
|
||||
('SERVICE', 'Internal Service'),
|
||||
('INFRASTRUCTURE', 'Infrastructure'),
|
||||
('MODULE', 'Django Module'),
|
||||
]
|
||||
|
||||
STATUS_CHOICES = [
|
||||
('ACTIVE', 'Active'),
|
||||
('INACTIVE', 'Inactive'),
|
||||
('MAINTENANCE', 'Maintenance'),
|
||||
('ERROR', 'Error'),
|
||||
]
|
||||
|
||||
id = models.UUIDField(primary_key=True, default=uuid.uuid4, editable=False)
|
||||
name = models.CharField(max_length=200, unique=True)
|
||||
description = models.TextField()
|
||||
target_type = models.CharField(max_length=20, choices=TARGET_TYPES)
|
||||
|
||||
# Connection details
|
||||
endpoint_url = models.URLField(blank=True, null=True)
|
||||
connection_config = models.JSONField(
|
||||
default=dict,
|
||||
help_text="Connection configuration (credentials, timeouts, etc.)"
|
||||
)
|
||||
|
||||
# Monitoring configuration
|
||||
check_interval_seconds = models.PositiveIntegerField(default=60)
|
||||
timeout_seconds = models.PositiveIntegerField(default=30)
|
||||
retry_count = models.PositiveIntegerField(default=3)
|
||||
|
||||
# Health check configuration
|
||||
health_check_enabled = models.BooleanField(default=True)
|
||||
health_check_endpoint = models.CharField(max_length=200, blank=True, null=True)
|
||||
expected_status_codes = models.JSONField(
|
||||
default=list,
|
||||
help_text="Expected HTTP status codes for health checks"
|
||||
)
|
||||
|
||||
# Status and metadata
|
||||
status = models.CharField(max_length=20, choices=STATUS_CHOICES, default='ACTIVE')
|
||||
last_checked = models.DateTimeField(null=True, blank=True)
|
||||
last_status = models.CharField(max_length=20, choices=[
|
||||
('HEALTHY', 'Healthy'),
|
||||
('WARNING', 'Warning'),
|
||||
('CRITICAL', 'Critical'),
|
||||
('UNKNOWN', 'Unknown'),
|
||||
], default='UNKNOWN')
|
||||
|
||||
# Related module (if applicable)
|
||||
related_module = models.CharField(
|
||||
max_length=50,
|
||||
blank=True,
|
||||
null=True,
|
||||
help_text="Related Django module (e.g., 'security', 'incident_intelligence')"
|
||||
)
|
||||
|
||||
# Metadata
|
||||
created_by = models.ForeignKey(User, on_delete=models.SET_NULL, null=True)
|
||||
created_at = models.DateTimeField(auto_now_add=True)
|
||||
updated_at = models.DateTimeField(auto_now=True)
|
||||
|
||||
class Meta:
|
||||
ordering = ['name']
|
||||
indexes = [
|
||||
models.Index(fields=['target_type', 'status']),
|
||||
models.Index(fields=['related_module']),
|
||||
models.Index(fields=['last_checked']),
|
||||
]
|
||||
|
||||
def __str__(self):
|
||||
return f"{self.name} ({self.target_type})"
|
||||
|
||||
|
||||
class HealthCheck(models.Model):
|
||||
"""Individual health check results"""
|
||||
|
||||
CHECK_TYPES = [
|
||||
('HTTP', 'HTTP Health Check'),
|
||||
('DATABASE', 'Database Connection'),
|
||||
('CACHE', 'Cache Connection'),
|
||||
('QUEUE', 'Message Queue'),
|
||||
('CUSTOM', 'Custom Check'),
|
||||
('PING', 'Network Ping'),
|
||||
('SSL', 'SSL Certificate'),
|
||||
]
|
||||
|
||||
STATUS_CHOICES = [
|
||||
('HEALTHY', 'Healthy'),
|
||||
('WARNING', 'Warning'),
|
||||
('CRITICAL', 'Critical'),
|
||||
('UNKNOWN', 'Unknown'),
|
||||
]
|
||||
|
||||
id = models.UUIDField(primary_key=True, default=uuid.uuid4, editable=False)
|
||||
target = models.ForeignKey(MonitoringTarget, on_delete=models.CASCADE, related_name='health_checks')
|
||||
|
||||
# Check details
|
||||
check_type = models.CharField(max_length=20, choices=CHECK_TYPES)
|
||||
status = models.CharField(max_length=20, choices=STATUS_CHOICES)
|
||||
response_time_ms = models.PositiveIntegerField(null=True, blank=True)
|
||||
|
||||
# Response details
|
||||
status_code = models.PositiveIntegerField(null=True, blank=True)
|
||||
response_body = models.TextField(blank=True, null=True)
|
||||
error_message = models.TextField(blank=True, null=True)
|
||||
|
||||
# Metrics
|
||||
cpu_usage_percent = models.FloatField(null=True, blank=True)
|
||||
memory_usage_percent = models.FloatField(null=True, blank=True)
|
||||
disk_usage_percent = models.FloatField(null=True, blank=True)
|
||||
|
||||
# Timestamps
|
||||
checked_at = models.DateTimeField(auto_now_add=True)
|
||||
|
||||
class Meta:
|
||||
ordering = ['-checked_at']
|
||||
indexes = [
|
||||
models.Index(fields=['target', 'checked_at']),
|
||||
models.Index(fields=['status', 'checked_at']),
|
||||
models.Index(fields=['check_type']),
|
||||
]
|
||||
|
||||
def __str__(self):
|
||||
return f"{self.target.name} - {self.status} ({self.checked_at})"
|
||||
|
||||
|
||||
class SystemMetric(models.Model):
|
||||
"""System performance and operational metrics"""
|
||||
|
||||
METRIC_TYPES = [
|
||||
('PERFORMANCE', 'Performance Metric'),
|
||||
('BUSINESS', 'Business Metric'),
|
||||
('SECURITY', 'Security Metric'),
|
||||
('INFRASTRUCTURE', 'Infrastructure Metric'),
|
||||
('CUSTOM', 'Custom Metric'),
|
||||
]
|
||||
|
||||
METRIC_CATEGORIES = [
|
||||
('API_RESPONSE_TIME', 'API Response Time'),
|
||||
('THROUGHPUT', 'Throughput'),
|
||||
('ERROR_RATE', 'Error Rate'),
|
||||
('AVAILABILITY', 'Availability'),
|
||||
('INCIDENT_COUNT', 'Incident Count'),
|
||||
('MTTR', 'Mean Time to Resolve'),
|
||||
('MTTA', 'Mean Time to Acknowledge'),
|
||||
('SLA_COMPLIANCE', 'SLA Compliance'),
|
||||
('SECURITY_EVENTS', 'Security Events'),
|
||||
('AUTOMATION_SUCCESS', 'Automation Success Rate'),
|
||||
('AI_ACCURACY', 'AI Model Accuracy'),
|
||||
('COST_IMPACT', 'Cost Impact'),
|
||||
('USER_ACTIVITY', 'User Activity'),
|
||||
('SYSTEM_RESOURCES', 'System Resources'),
|
||||
]
|
||||
|
||||
id = models.UUIDField(primary_key=True, default=uuid.uuid4, editable=False)
|
||||
name = models.CharField(max_length=200)
|
||||
description = models.TextField()
|
||||
metric_type = models.CharField(max_length=20, choices=METRIC_TYPES)
|
||||
category = models.CharField(max_length=30, choices=METRIC_CATEGORIES)
|
||||
|
||||
# Metric configuration
|
||||
unit = models.CharField(max_length=50, help_text="Unit of measurement")
|
||||
aggregation_method = models.CharField(
|
||||
max_length=20,
|
||||
choices=[
|
||||
('AVERAGE', 'Average'),
|
||||
('SUM', 'Sum'),
|
||||
('COUNT', 'Count'),
|
||||
('MIN', 'Minimum'),
|
||||
('MAX', 'Maximum'),
|
||||
('PERCENTILE_95', '95th Percentile'),
|
||||
('PERCENTILE_99', '99th Percentile'),
|
||||
]
|
||||
)
|
||||
|
||||
# Collection configuration
|
||||
collection_interval_seconds = models.PositiveIntegerField(default=300) # 5 minutes
|
||||
retention_days = models.PositiveIntegerField(default=90)
|
||||
|
||||
# Thresholds
|
||||
warning_threshold = models.FloatField(null=True, blank=True)
|
||||
critical_threshold = models.FloatField(null=True, blank=True)
|
||||
|
||||
# Status
|
||||
is_active = models.BooleanField(default=True)
|
||||
is_system_metric = models.BooleanField(default=False)
|
||||
|
||||
# Related module
|
||||
related_module = models.CharField(
|
||||
max_length=50,
|
||||
blank=True,
|
||||
null=True,
|
||||
help_text="Related Django module"
|
||||
)
|
||||
|
||||
# Metadata
|
||||
created_by = models.ForeignKey(User, on_delete=models.SET_NULL, null=True)
|
||||
created_at = models.DateTimeField(auto_now_add=True)
|
||||
updated_at = models.DateTimeField(auto_now=True)
|
||||
|
||||
class Meta:
|
||||
ordering = ['name']
|
||||
indexes = [
|
||||
models.Index(fields=['metric_type', 'category']),
|
||||
models.Index(fields=['related_module']),
|
||||
models.Index(fields=['is_active']),
|
||||
]
|
||||
|
||||
def __str__(self):
|
||||
return f"{self.name} ({self.category})"
|
||||
|
||||
|
||||
class MetricMeasurement(models.Model):
|
||||
"""Individual metric measurements"""
|
||||
|
||||
id = models.UUIDField(primary_key=True, default=uuid.uuid4, editable=False)
|
||||
metric = models.ForeignKey(SystemMetric, on_delete=models.CASCADE, related_name='measurements')
|
||||
|
||||
# Measurement details
|
||||
value = models.DecimalField(max_digits=15, decimal_places=4)
|
||||
timestamp = models.DateTimeField(auto_now_add=True)
|
||||
|
||||
# Context
|
||||
tags = models.JSONField(
|
||||
default=dict,
|
||||
help_text="Additional tags for this measurement"
|
||||
)
|
||||
metadata = models.JSONField(
|
||||
default=dict,
|
||||
help_text="Additional metadata"
|
||||
)
|
||||
|
||||
class Meta:
|
||||
ordering = ['-timestamp']
|
||||
indexes = [
|
||||
models.Index(fields=['metric', 'timestamp']),
|
||||
models.Index(fields=['timestamp']),
|
||||
]
|
||||
|
||||
def __str__(self):
|
||||
return f"{self.metric.name}: {self.value} ({self.timestamp})"
|
||||
|
||||
|
||||
class AlertRule(models.Model):
|
||||
"""Alert rules for monitoring thresholds"""
|
||||
|
||||
ALERT_TYPES = [
|
||||
('THRESHOLD', 'Threshold Alert'),
|
||||
('ANOMALY', 'Anomaly Alert'),
|
||||
('PATTERN', 'Pattern Alert'),
|
||||
('AVAILABILITY', 'Availability Alert'),
|
||||
('PERFORMANCE', 'Performance Alert'),
|
||||
]
|
||||
|
||||
SEVERITY_CHOICES = [
|
||||
('LOW', 'Low'),
|
||||
('MEDIUM', 'Medium'),
|
||||
('HIGH', 'High'),
|
||||
('CRITICAL', 'Critical'),
|
||||
]
|
||||
|
||||
STATUS_CHOICES = [
|
||||
('ACTIVE', 'Active'),
|
||||
('INACTIVE', 'Inactive'),
|
||||
('MAINTENANCE', 'Maintenance'),
|
||||
]
|
||||
|
||||
id = models.UUIDField(primary_key=True, default=uuid.uuid4, editable=False)
|
||||
name = models.CharField(max_length=200)
|
||||
description = models.TextField()
|
||||
alert_type = models.CharField(max_length=20, choices=ALERT_TYPES)
|
||||
severity = models.CharField(max_length=20, choices=SEVERITY_CHOICES)
|
||||
|
||||
# Rule configuration
|
||||
condition = models.JSONField(
|
||||
help_text="Alert condition configuration"
|
||||
)
|
||||
evaluation_interval_seconds = models.PositiveIntegerField(default=60)
|
||||
|
||||
# Related objects
|
||||
metric = models.ForeignKey(
|
||||
SystemMetric,
|
||||
on_delete=models.CASCADE,
|
||||
null=True,
|
||||
blank=True,
|
||||
related_name='alert_rules'
|
||||
)
|
||||
target = models.ForeignKey(
|
||||
MonitoringTarget,
|
||||
on_delete=models.CASCADE,
|
||||
null=True,
|
||||
blank=True,
|
||||
related_name='alert_rules'
|
||||
)
|
||||
|
||||
# Notification configuration
|
||||
notification_channels = models.JSONField(
|
||||
default=list,
|
||||
help_text="List of notification channels (email, slack, webhook, etc.)"
|
||||
)
|
||||
notification_template = models.TextField(
|
||||
blank=True,
|
||||
null=True,
|
||||
help_text="Custom notification template"
|
||||
)
|
||||
|
||||
# Status
|
||||
status = models.CharField(max_length=20, choices=STATUS_CHOICES, default='ACTIVE')
|
||||
is_enabled = models.BooleanField(default=True)
|
||||
|
||||
# Metadata
|
||||
created_by = models.ForeignKey(User, on_delete=models.SET_NULL, null=True)
|
||||
created_at = models.DateTimeField(auto_now_add=True)
|
||||
updated_at = models.DateTimeField(auto_now=True)
|
||||
|
||||
class Meta:
|
||||
ordering = ['name']
|
||||
indexes = [
|
||||
models.Index(fields=['alert_type', 'severity']),
|
||||
models.Index(fields=['status', 'is_enabled']),
|
||||
]
|
||||
|
||||
def __str__(self):
|
||||
return f"{self.name} ({self.severity})"
|
||||
|
||||
|
||||
class Alert(models.Model):
|
||||
"""Alert instances"""
|
||||
|
||||
STATUS_CHOICES = [
|
||||
('TRIGGERED', 'Triggered'),
|
||||
('ACKNOWLEDGED', 'Acknowledged'),
|
||||
('RESOLVED', 'Resolved'),
|
||||
('SUPPRESSED', 'Suppressed'),
|
||||
]
|
||||
|
||||
id = models.UUIDField(primary_key=True, default=uuid.uuid4, editable=False)
|
||||
rule = models.ForeignKey(AlertRule, on_delete=models.CASCADE, related_name='alerts')
|
||||
|
||||
# Alert details
|
||||
title = models.CharField(max_length=200)
|
||||
description = models.TextField()
|
||||
severity = models.CharField(max_length=20, choices=AlertRule.SEVERITY_CHOICES)
|
||||
status = models.CharField(max_length=20, choices=STATUS_CHOICES, default='TRIGGERED')
|
||||
|
||||
# Context
|
||||
triggered_value = models.DecimalField(max_digits=15, decimal_places=4, null=True, blank=True)
|
||||
threshold_value = models.DecimalField(max_digits=15, decimal_places=4, null=True, blank=True)
|
||||
context_data = models.JSONField(
|
||||
default=dict,
|
||||
help_text="Additional context data for the alert"
|
||||
)
|
||||
|
||||
# Timestamps
|
||||
triggered_at = models.DateTimeField(auto_now_add=True)
|
||||
acknowledged_at = models.DateTimeField(null=True, blank=True)
|
||||
resolved_at = models.DateTimeField(null=True, blank=True)
|
||||
|
||||
# Assignment
|
||||
acknowledged_by = models.ForeignKey(
|
||||
User,
|
||||
on_delete=models.SET_NULL,
|
||||
null=True,
|
||||
blank=True,
|
||||
related_name='acknowledged_alerts'
|
||||
)
|
||||
resolved_by = models.ForeignKey(
|
||||
User,
|
||||
on_delete=models.SET_NULL,
|
||||
null=True,
|
||||
blank=True,
|
||||
related_name='resolved_alerts'
|
||||
)
|
||||
|
||||
class Meta:
|
||||
ordering = ['-triggered_at']
|
||||
indexes = [
|
||||
models.Index(fields=['rule', 'status']),
|
||||
models.Index(fields=['severity', 'status']),
|
||||
models.Index(fields=['triggered_at']),
|
||||
]
|
||||
|
||||
def __str__(self):
|
||||
return f"{self.title} ({self.severity}) - {self.status}"
|
||||
|
||||
|
||||
class MonitoringDashboard(models.Model):
|
||||
"""Monitoring dashboard configurations"""
|
||||
|
||||
DASHBOARD_TYPES = [
|
||||
('SYSTEM_OVERVIEW', 'System Overview'),
|
||||
('PERFORMANCE', 'Performance'),
|
||||
('BUSINESS_METRICS', 'Business Metrics'),
|
||||
('SECURITY', 'Security'),
|
||||
('INFRASTRUCTURE', 'Infrastructure'),
|
||||
('CUSTOM', 'Custom'),
|
||||
]
|
||||
|
||||
id = models.UUIDField(primary_key=True, default=uuid.uuid4, editable=False)
|
||||
name = models.CharField(max_length=200)
|
||||
description = models.TextField()
|
||||
dashboard_type = models.CharField(max_length=20, choices=DASHBOARD_TYPES)
|
||||
|
||||
# Dashboard configuration
|
||||
layout_config = models.JSONField(
|
||||
default=dict,
|
||||
help_text="Dashboard layout configuration"
|
||||
)
|
||||
widget_configs = models.JSONField(
|
||||
default=list,
|
||||
help_text="Configuration for dashboard widgets"
|
||||
)
|
||||
|
||||
# Access control
|
||||
is_public = models.BooleanField(default=False)
|
||||
allowed_users = models.ManyToManyField(
|
||||
User,
|
||||
blank=True,
|
||||
related_name='accessible_monitoring_dashboards'
|
||||
)
|
||||
allowed_roles = models.JSONField(
|
||||
default=list,
|
||||
help_text="List of roles that can access this dashboard"
|
||||
)
|
||||
|
||||
# Refresh configuration
|
||||
auto_refresh_enabled = models.BooleanField(default=True)
|
||||
refresh_interval_seconds = models.PositiveIntegerField(default=30)
|
||||
|
||||
# Status
|
||||
is_active = models.BooleanField(default=True)
|
||||
created_by = models.ForeignKey(User, on_delete=models.SET_NULL, null=True)
|
||||
created_at = models.DateTimeField(auto_now_add=True)
|
||||
updated_at = models.DateTimeField(auto_now=True)
|
||||
|
||||
class Meta:
|
||||
ordering = ['name']
|
||||
indexes = [
|
||||
models.Index(fields=['dashboard_type', 'is_active']),
|
||||
models.Index(fields=['is_public']),
|
||||
]
|
||||
|
||||
def __str__(self):
|
||||
return f"{self.name} ({self.dashboard_type})"
|
||||
|
||||
|
||||
class SystemStatus(models.Model):
|
||||
"""Overall system status tracking"""
|
||||
|
||||
STATUS_CHOICES = [
|
||||
('OPERATIONAL', 'Operational'),
|
||||
('DEGRADED', 'Degraded'),
|
||||
('PARTIAL_OUTAGE', 'Partial Outage'),
|
||||
('MAJOR_OUTAGE', 'Major Outage'),
|
||||
('MAINTENANCE', 'Maintenance'),
|
||||
]
|
||||
|
||||
id = models.UUIDField(primary_key=True, default=uuid.uuid4, editable=False)
|
||||
status = models.CharField(max_length=20, choices=STATUS_CHOICES)
|
||||
message = models.TextField(help_text="Status message for users")
|
||||
|
||||
# Impact details
|
||||
affected_services = models.JSONField(
|
||||
default=list,
|
||||
help_text="List of affected services"
|
||||
)
|
||||
estimated_resolution = models.DateTimeField(null=True, blank=True)
|
||||
|
||||
# Timestamps
|
||||
started_at = models.DateTimeField(auto_now_add=True)
|
||||
updated_at = models.DateTimeField(auto_now=True)
|
||||
resolved_at = models.DateTimeField(null=True, blank=True)
|
||||
|
||||
# Metadata
|
||||
created_by = models.ForeignKey(User, on_delete=models.SET_NULL, null=True)
|
||||
|
||||
class Meta:
|
||||
ordering = ['-started_at']
|
||||
indexes = [
|
||||
models.Index(fields=['status', 'started_at']),
|
||||
models.Index(fields=['started_at']),
|
||||
]
|
||||
|
||||
def __str__(self):
|
||||
return f"System Status: {self.status} ({self.started_at})"
|
||||
|
||||
@property
|
||||
def is_resolved(self):
|
||||
return self.resolved_at is not None
|
||||
Reference in New Issue
Block a user