Files
ETB/ETB-API/monitoring/views.py
Iliyan Angelov 6b247e5b9f Updates
2025-09-19 11:58:53 +03:00

481 lines
18 KiB
Python

"""
Views for monitoring system
"""
import logging
from rest_framework import viewsets, status, permissions
from rest_framework.decorators import action
from rest_framework.response import Response
from rest_framework.views import APIView
from django_filters.rest_framework import DjangoFilterBackend
from rest_framework.filters import SearchFilter, OrderingFilter
from django.utils import timezone
from datetime import timedelta
from monitoring.models import (
MonitoringTarget, HealthCheck, SystemMetric, MetricMeasurement,
AlertRule, Alert, MonitoringDashboard, SystemStatus
)
from monitoring.serializers import (
MonitoringTargetSerializer, HealthCheckSerializer, SystemMetricSerializer,
MetricMeasurementSerializer, AlertRuleSerializer, AlertSerializer,
MonitoringDashboardSerializer, SystemStatusSerializer,
HealthCheckSummarySerializer, MetricTrendSerializer, AlertSummarySerializer,
SystemOverviewSerializer
)
from monitoring.services.health_checks import HealthCheckService
from monitoring.services.metrics_collector import MetricsCollector, MetricsAggregator
from monitoring.services.alerting import AlertingService
from monitoring.tasks import (
execute_health_checks, collect_metrics, evaluate_alerts,
generate_system_status_report
)
logger = logging.getLogger(__name__)
class MonitoringTargetViewSet(viewsets.ModelViewSet):
"""ViewSet for MonitoringTarget model"""
queryset = MonitoringTarget.objects.all()
serializer_class = MonitoringTargetSerializer
permission_classes = [permissions.IsAuthenticated]
filter_backends = [DjangoFilterBackend, SearchFilter, OrderingFilter]
filterset_fields = ['target_type', 'status', 'last_status', 'related_module']
search_fields = ['name', 'description']
ordering_fields = ['name', 'created_at', 'last_checked']
ordering = ['name']
def perform_create(self, serializer):
"""Set the creator when creating a monitoring target"""
serializer.save(created_by=self.request.user)
@action(detail=True, methods=['post'])
def test_connection(self, request, pk=None):
"""Test connection to monitoring target"""
target = self.get_object()
try:
health_service = HealthCheckService()
result = health_service.execute_health_check(target, 'HTTP')
return Response({
'status': 'success',
'result': result
})
except Exception as e:
return Response({
'status': 'error',
'error': str(e)
}, status=status.HTTP_500_INTERNAL_SERVER_ERROR)
@action(detail=True, methods=['post'])
def enable_monitoring(self, request, pk=None):
"""Enable monitoring for a target"""
target = self.get_object()
target.status = 'ACTIVE'
target.save()
return Response({
'status': 'success',
'message': f'Monitoring enabled for {target.name}'
})
@action(detail=True, methods=['post'])
def disable_monitoring(self, request, pk=None):
"""Disable monitoring for a target"""
target = self.get_object()
target.status = 'INACTIVE'
target.save()
return Response({
'status': 'success',
'message': f'Monitoring disabled for {target.name}'
})
class HealthCheckViewSet(viewsets.ReadOnlyModelViewSet):
"""ViewSet for HealthCheck model (read-only)"""
queryset = HealthCheck.objects.all()
serializer_class = HealthCheckSerializer
permission_classes = [permissions.IsAuthenticated]
filter_backends = [DjangoFilterBackend, SearchFilter, OrderingFilter]
filterset_fields = ['target', 'check_type', 'status']
ordering_fields = ['checked_at', 'response_time_ms']
ordering = ['-checked_at']
@action(detail=False, methods=['get'])
def summary(self, request):
"""Get health check summary"""
try:
health_service = HealthCheckService()
summary = health_service.get_system_health_summary()
serializer = HealthCheckSummarySerializer(summary)
return Response(serializer.data)
except Exception as e:
return Response({
'error': str(e)
}, status=status.HTTP_500_INTERNAL_SERVER_ERROR)
@action(detail=False, methods=['post'])
def run_all_checks(self, request):
"""Run health checks for all targets"""
try:
# Execute health checks asynchronously
task = execute_health_checks.delay()
return Response({
'status': 'success',
'message': 'Health checks started',
'task_id': task.id
})
except Exception as e:
return Response({
'error': str(e)
}, status=status.HTTP_500_INTERNAL_SERVER_ERROR)
class SystemMetricViewSet(viewsets.ModelViewSet):
"""ViewSet for SystemMetric model"""
queryset = SystemMetric.objects.all()
serializer_class = SystemMetricSerializer
permission_classes = [permissions.IsAuthenticated]
filter_backends = [DjangoFilterBackend, SearchFilter, OrderingFilter]
filterset_fields = ['metric_type', 'category', 'is_active', 'related_module']
search_fields = ['name', 'description']
ordering_fields = ['name', 'created_at']
ordering = ['name']
def perform_create(self, serializer):
"""Set the creator when creating a metric"""
serializer.save(created_by=self.request.user)
@action(detail=True, methods=['get'])
def measurements(self, request, pk=None):
"""Get measurements for a metric"""
metric = self.get_object()
# Get query parameters
hours = int(request.query_params.get('hours', 24))
limit = int(request.query_params.get('limit', 100))
since = timezone.now() - timedelta(hours=hours)
measurements = MetricMeasurement.objects.filter(
metric=metric,
timestamp__gte=since
).order_by('-timestamp')[:limit]
serializer = MetricMeasurementSerializer(measurements, many=True)
return Response(serializer.data)
@action(detail=True, methods=['get'])
def trends(self, request, pk=None):
"""Get metric trends"""
metric = self.get_object()
days = int(request.query_params.get('days', 7))
try:
aggregator = MetricsAggregator()
trends = aggregator.get_metric_trends(metric, days)
serializer = MetricTrendSerializer(trends)
return Response(serializer.data)
except Exception as e:
return Response({
'error': str(e)
}, status=status.HTTP_500_INTERNAL_SERVER_ERROR)
class MetricMeasurementViewSet(viewsets.ReadOnlyModelViewSet):
"""ViewSet for MetricMeasurement model (read-only)"""
queryset = MetricMeasurement.objects.all()
serializer_class = MetricMeasurementSerializer
permission_classes = [permissions.IsAuthenticated]
filter_backends = [DjangoFilterBackend, OrderingFilter]
filterset_fields = ['metric']
ordering_fields = ['timestamp', 'value']
ordering = ['-timestamp']
class AlertRuleViewSet(viewsets.ModelViewSet):
"""ViewSet for AlertRule model"""
queryset = AlertRule.objects.all()
serializer_class = AlertRuleSerializer
permission_classes = [permissions.IsAuthenticated]
filter_backends = [DjangoFilterBackend, SearchFilter, OrderingFilter]
filterset_fields = ['alert_type', 'severity', 'status', 'is_enabled']
search_fields = ['name', 'description']
ordering_fields = ['name', 'created_at']
ordering = ['name']
def perform_create(self, serializer):
"""Set the creator when creating an alert rule"""
serializer.save(created_by=self.request.user)
@action(detail=True, methods=['post'])
def test_rule(self, request, pk=None):
"""Test an alert rule"""
rule = self.get_object()
try:
alerting_service = AlertingService()
# This would test the rule without creating an alert
return Response({
'status': 'success',
'message': f'Alert rule {rule.name} test completed'
})
except Exception as e:
return Response({
'error': str(e)
}, status=status.HTTP_500_INTERNAL_SERVER_ERROR)
@action(detail=True, methods=['post'])
def enable_rule(self, request, pk=None):
"""Enable an alert rule"""
rule = self.get_object()
rule.is_enabled = True
rule.save()
return Response({
'status': 'success',
'message': f'Alert rule {rule.name} enabled'
})
@action(detail=True, methods=['post'])
def disable_rule(self, request, pk=None):
"""Disable an alert rule"""
rule = self.get_object()
rule.is_enabled = False
rule.save()
return Response({
'status': 'success',
'message': f'Alert rule {rule.name} disabled'
})
class AlertViewSet(viewsets.ModelViewSet):
"""ViewSet for Alert model"""
queryset = Alert.objects.all()
serializer_class = AlertSerializer
permission_classes = [permissions.IsAuthenticated]
filter_backends = [DjangoFilterBackend, SearchFilter, OrderingFilter]
filterset_fields = ['rule', 'severity', 'status']
search_fields = ['title', 'description']
ordering_fields = ['triggered_at', 'severity']
ordering = ['-triggered_at']
@action(detail=True, methods=['post'])
def acknowledge(self, request, pk=None):
"""Acknowledge an alert"""
alert = self.get_object()
try:
alerting_service = AlertingService()
result = alerting_service.acknowledge_alert(str(alert.id), request.user)
return Response(result)
except Exception as e:
return Response({
'error': str(e)
}, status=status.HTTP_500_INTERNAL_SERVER_ERROR)
@action(detail=True, methods=['post'])
def resolve(self, request, pk=None):
"""Resolve an alert"""
alert = self.get_object()
try:
alerting_service = AlertingService()
result = alerting_service.resolve_alert(str(alert.id), request.user)
return Response(result)
except Exception as e:
return Response({
'error': str(e)
}, status=status.HTTP_500_INTERNAL_SERVER_ERROR)
@action(detail=False, methods=['get'])
def summary(self, request):
"""Get alert summary"""
try:
alerting_service = AlertingService()
active_alerts = alerting_service.get_active_alerts()
# Calculate summary
total_alerts = Alert.objects.count()
critical_alerts = Alert.objects.filter(severity='CRITICAL', status='TRIGGERED').count()
high_alerts = Alert.objects.filter(severity='HIGH', status='TRIGGERED').count()
medium_alerts = Alert.objects.filter(severity='MEDIUM', status='TRIGGERED').count()
low_alerts = Alert.objects.filter(severity='LOW', status='TRIGGERED').count()
acknowledged_alerts = Alert.objects.filter(status='ACKNOWLEDGED').count()
resolved_alerts = Alert.objects.filter(status='RESOLVED').count()
summary = {
'total_alerts': total_alerts,
'critical_alerts': critical_alerts,
'high_alerts': high_alerts,
'medium_alerts': medium_alerts,
'low_alerts': low_alerts,
'acknowledged_alerts': acknowledged_alerts,
'resolved_alerts': resolved_alerts
}
serializer = AlertSummarySerializer(summary)
return Response(serializer.data)
except Exception as e:
return Response({
'error': str(e)
}, status=status.HTTP_500_INTERNAL_SERVER_ERROR)
class MonitoringDashboardViewSet(viewsets.ModelViewSet):
"""ViewSet for MonitoringDashboard model"""
queryset = MonitoringDashboard.objects.all()
serializer_class = MonitoringDashboardSerializer
permission_classes = [permissions.IsAuthenticated]
filter_backends = [DjangoFilterBackend, SearchFilter, OrderingFilter]
filterset_fields = ['dashboard_type', 'is_active', 'is_public']
search_fields = ['name', 'description']
ordering_fields = ['name', 'created_at']
ordering = ['name']
def perform_create(self, serializer):
"""Set the creator when creating a dashboard"""
serializer.save(created_by=self.request.user)
def get_queryset(self):
"""Filter dashboards based on user access"""
queryset = super().get_queryset()
if not self.request.user.is_staff:
# Non-staff users can only see public dashboards or dashboards they have access to
queryset = queryset.filter(
models.Q(is_public=True) |
models.Q(allowed_users=self.request.user)
).distinct()
return queryset
class SystemStatusViewSet(viewsets.ReadOnlyModelViewSet):
"""ViewSet for SystemStatus model (read-only)"""
queryset = SystemStatus.objects.all()
serializer_class = SystemStatusSerializer
permission_classes = [permissions.IsAuthenticated]
ordering = ['-started_at']
class SystemOverviewView(APIView):
"""System overview endpoint"""
permission_classes = [permissions.IsAuthenticated]
def get(self, request):
"""Get system overview"""
try:
# Get current system status
current_status = SystemStatus.objects.filter(
resolved_at__isnull=True
).order_by('-started_at').first()
if not current_status:
# Create default operational status
current_status = SystemStatus.objects.create(
status='OPERATIONAL',
message='All systems operational',
created_by=request.user
)
# Get health summary
health_service = HealthCheckService()
health_summary = health_service.get_system_health_summary()
# Get alert summary
alerting_service = AlertingService()
active_alerts = alerting_service.get_active_alerts()
alert_summary = {
'total_alerts': len(active_alerts),
'critical_alerts': len([a for a in active_alerts if a['severity'] == 'CRITICAL']),
'high_alerts': len([a for a in active_alerts if a['severity'] == 'HIGH']),
'medium_alerts': len([a for a in active_alerts if a['severity'] == 'MEDIUM']),
'low_alerts': len([a for a in active_alerts if a['severity'] == 'LOW']),
'acknowledged_alerts': 0, # Would be calculated from database
'resolved_alerts': 0 # Would be calculated from database
}
# Get recent incidents (mock data for now)
recent_incidents = []
# Get top metrics (mock data for now)
top_metrics = []
# Get system resources
import psutil
system_resources = {
'cpu_percent': psutil.cpu_percent(interval=1),
'memory_percent': psutil.virtual_memory().percent,
'disk_percent': psutil.disk_usage('/').percent
}
overview = {
'system_status': current_status,
'health_summary': health_summary,
'alert_summary': alert_summary,
'recent_incidents': recent_incidents,
'top_metrics': top_metrics,
'system_resources': system_resources
}
serializer = SystemOverviewSerializer(overview)
return Response(serializer.data)
except Exception as e:
logger.error(f"Failed to get system overview: {e}")
return Response({
'error': str(e)
}, status=status.HTTP_500_INTERNAL_SERVER_ERROR)
class MonitoringTasksView(APIView):
"""Monitoring tasks management"""
permission_classes = [permissions.IsAuthenticated]
def post(self, request):
"""Execute monitoring tasks"""
task_type = request.data.get('task_type')
try:
if task_type == 'health_checks':
task = execute_health_checks.delay()
elif task_type == 'metrics_collection':
task = collect_metrics.delay()
elif task_type == 'alert_evaluation':
task = evaluate_alerts.delay()
elif task_type == 'system_status_report':
task = generate_system_status_report.delay()
else:
return Response({
'error': 'Invalid task type'
}, status=status.HTTP_400_BAD_REQUEST)
return Response({
'status': 'success',
'message': f'{task_type} task started',
'task_id': task.id
})
except Exception as e:
return Response({
'error': str(e)
}, status=status.HTTP_500_INTERNAL_SERVER_ERROR)