239 lines
9.1 KiB
Python
239 lines
9.1 KiB
Python
"""
|
|
Signals for analytics_predictive_insights app
|
|
Handles automatic KPI calculations and analytics updates
|
|
"""
|
|
from django.db.models.signals import post_save, post_delete
|
|
from django.dispatch import receiver
|
|
from django.utils import timezone
|
|
from django.db import models
|
|
from datetime import timedelta
|
|
import logging
|
|
|
|
from incident_intelligence.models import Incident
|
|
from .models import KPIMetric, KPIMeasurement, CostImpactAnalysis
|
|
from .ml.anomaly_detection import AnomalyDetectionService
|
|
from .ml.predictive_models import PredictiveModelService
|
|
|
|
logger = logging.getLogger(__name__)
|
|
|
|
|
|
@receiver(post_save, sender=Incident)
|
|
def update_kpi_measurements_on_incident_change(sender, instance, created, **kwargs):
|
|
"""Update KPI measurements when incidents are created or updated"""
|
|
try:
|
|
# Only process if incident is resolved or status changed
|
|
if not created and not instance.is_resolved:
|
|
return
|
|
|
|
# Get active KPI metrics that apply to this incident
|
|
applicable_metrics = KPIMetric.objects.filter(
|
|
is_active=True
|
|
).filter(
|
|
# Check if metric applies to this incident
|
|
models.Q(incident_categories__contains=[instance.category]) |
|
|
models.Q(incident_severities__contains=[instance.severity]) |
|
|
models.Q(incident_priorities__contains=[instance.priority]) |
|
|
models.Q(incident_categories__isnull=True) |
|
|
models.Q(incident_severities__isnull=True) |
|
|
models.Q(incident_priorities__isnull=True)
|
|
)
|
|
|
|
for metric in applicable_metrics:
|
|
# Calculate and update KPI measurement
|
|
_calculate_kpi_measurement(metric, instance)
|
|
|
|
except Exception as e:
|
|
logger.error(f"Error updating KPI measurements for incident {instance.id}: {str(e)}")
|
|
|
|
|
|
@receiver(post_save, sender=Incident)
|
|
def trigger_anomaly_detection_on_incident(sender, instance, created, **kwargs):
|
|
"""Trigger anomaly detection when new incidents are created"""
|
|
try:
|
|
if created:
|
|
# Run anomaly detection for active models
|
|
anomaly_service = AnomalyDetectionService()
|
|
anomaly_service.run_anomaly_detection()
|
|
|
|
except Exception as e:
|
|
logger.error(f"Error running anomaly detection for incident {instance.id}: {str(e)}")
|
|
|
|
|
|
@receiver(post_save, sender=CostImpactAnalysis)
|
|
def update_cost_analytics_on_cost_change(sender, instance, created, **kwargs):
|
|
"""Update cost analytics when cost analysis is created or updated"""
|
|
try:
|
|
# Trigger cost-related KPI updates
|
|
cost_metrics = KPIMetric.objects.filter(
|
|
is_active=True,
|
|
metric_type='COST_IMPACT'
|
|
)
|
|
|
|
for metric in cost_metrics:
|
|
_calculate_kpi_measurement(metric, instance.incident)
|
|
|
|
except Exception as e:
|
|
logger.error(f"Error updating cost analytics for cost analysis {instance.id}: {str(e)}")
|
|
|
|
|
|
def _calculate_kpi_measurement(metric, incident):
|
|
"""Calculate KPI measurement for a specific metric and incident"""
|
|
try:
|
|
# Determine time window for calculation
|
|
end_time = timezone.now()
|
|
start_time = end_time - timedelta(hours=metric.time_window_hours)
|
|
|
|
# Get incidents in the time window that match the metric criteria
|
|
incidents = Incident.objects.filter(
|
|
created_at__gte=start_time,
|
|
created_at__lte=end_time
|
|
)
|
|
|
|
# Apply metric filters
|
|
if metric.incident_categories:
|
|
incidents = incidents.filter(category__in=metric.incident_categories)
|
|
if metric.incident_severities:
|
|
incidents = incidents.filter(severity__in=metric.incident_severities)
|
|
if metric.incident_priorities:
|
|
incidents = incidents.filter(priority__in=metric.incident_priorities)
|
|
|
|
# Calculate metric value based on type
|
|
if metric.metric_type == 'MTTA':
|
|
# Mean Time to Acknowledge
|
|
acknowledged_incidents = incidents.filter(
|
|
status__in=['IN_PROGRESS', 'RESOLVED', 'CLOSED']
|
|
).exclude(assigned_to__isnull=True)
|
|
|
|
if acknowledged_incidents.exists():
|
|
# Calculate average time to acknowledgment
|
|
total_time = timedelta()
|
|
count = 0
|
|
|
|
for inc in acknowledged_incidents:
|
|
# This is simplified - in practice, you'd need to track acknowledgment time
|
|
if inc.updated_at and inc.created_at:
|
|
time_diff = inc.updated_at - inc.created_at
|
|
total_time += time_diff
|
|
count += 1
|
|
|
|
if count > 0:
|
|
avg_time = total_time / count
|
|
value = avg_time.total_seconds() / 60 # Convert to minutes
|
|
unit = 'minutes'
|
|
else:
|
|
value = 0
|
|
unit = 'minutes'
|
|
else:
|
|
value = 0
|
|
unit = 'minutes'
|
|
|
|
elif metric.metric_type == 'MTTR':
|
|
# Mean Time to Resolve
|
|
resolved_incidents = incidents.filter(
|
|
status__in=['RESOLVED', 'CLOSED'],
|
|
resolved_at__isnull=False
|
|
)
|
|
|
|
if resolved_incidents.exists():
|
|
total_time = timedelta()
|
|
count = 0
|
|
|
|
for inc in resolved_incidents:
|
|
if inc.resolved_at and inc.created_at:
|
|
time_diff = inc.resolved_at - inc.created_at
|
|
total_time += time_diff
|
|
count += 1
|
|
|
|
if count > 0:
|
|
avg_time = total_time / count
|
|
value = avg_time.total_seconds() / 3600 # Convert to hours
|
|
unit = 'hours'
|
|
else:
|
|
value = 0
|
|
unit = 'hours'
|
|
else:
|
|
value = 0
|
|
unit = 'hours'
|
|
|
|
elif metric.metric_type == 'INCIDENT_COUNT':
|
|
# Incident Count
|
|
value = incidents.count()
|
|
unit = 'count'
|
|
|
|
elif metric.metric_type == 'RESOLUTION_RATE':
|
|
# Resolution Rate
|
|
total_incidents = incidents.count()
|
|
resolved_incidents = incidents.filter(
|
|
status__in=['RESOLVED', 'CLOSED']
|
|
).count()
|
|
|
|
if total_incidents > 0:
|
|
value = (resolved_incidents / total_incidents) * 100
|
|
unit = 'percentage'
|
|
else:
|
|
value = 0
|
|
unit = 'percentage'
|
|
|
|
else:
|
|
# Default calculation
|
|
value = incidents.count()
|
|
unit = 'count'
|
|
|
|
# Create or update KPI measurement
|
|
measurement, created = KPIMeasurement.objects.get_or_create(
|
|
metric=metric,
|
|
measurement_period_start=start_time,
|
|
measurement_period_end=end_time,
|
|
defaults={
|
|
'value': value,
|
|
'unit': unit,
|
|
'incident_count': incidents.count(),
|
|
'sample_size': incidents.count()
|
|
}
|
|
)
|
|
|
|
if not created:
|
|
measurement.value = value
|
|
measurement.unit = unit
|
|
measurement.incident_count = incidents.count()
|
|
measurement.sample_size = incidents.count()
|
|
measurement.save()
|
|
|
|
logger.info(f"Updated KPI measurement for {metric.name}: {value} {unit}")
|
|
|
|
except Exception as e:
|
|
logger.error(f"Error calculating KPI measurement for {metric.name}: {str(e)}")
|
|
|
|
|
|
# Management command signals for scheduled tasks
|
|
@receiver(post_save, sender=PredictiveModel)
|
|
def schedule_model_training(sender, instance, created, **kwargs):
|
|
"""Schedule model training when a new predictive model is created"""
|
|
try:
|
|
if created and instance.status == 'TRAINING':
|
|
# In a real implementation, you would schedule a background task
|
|
# For now, we'll just log the event
|
|
logger.info(f"Scheduled training for model {instance.name}")
|
|
|
|
except Exception as e:
|
|
logger.error(f"Error scheduling model training for {instance.name}: {str(e)}")
|
|
|
|
|
|
@receiver(post_save, sender=PredictiveModel)
|
|
def trigger_model_retraining(sender, instance, created, **kwargs):
|
|
"""Trigger model retraining when performance drops below threshold"""
|
|
try:
|
|
if not created and instance.auto_retrain_enabled:
|
|
# Check if model performance is below threshold
|
|
if (instance.accuracy_score and
|
|
instance.accuracy_score < instance.performance_threshold):
|
|
|
|
# Update status to retraining
|
|
instance.status = 'RETRAINING'
|
|
instance.save()
|
|
|
|
logger.info(f"Triggered retraining for model {instance.name} due to low performance")
|
|
|
|
except Exception as e:
|
|
logger.error(f"Error triggering model retraining for {instance.name}: {str(e)}")
|