288 lines
12 KiB
Python
288 lines
12 KiB
Python
"""
|
|
SLA Notifications Service for Chat Integration
|
|
Handles SLA threshold notifications and escalation alerts in chat rooms
|
|
"""
|
|
from django.utils import timezone
|
|
from django.db.models import Q
|
|
from typing import Dict, Any, Optional, List
|
|
|
|
from ..models import WarRoom, WarRoomMessage
|
|
from sla_oncall.models import SLAInstance, EscalationInstance, EscalationPolicy
|
|
from incident_intelligence.models import Incident
|
|
|
|
|
|
class SLANotificationService:
|
|
"""Service for handling SLA-related notifications in chat rooms"""
|
|
|
|
@staticmethod
|
|
def send_sla_warning_notification(sla_instance: SLAInstance, threshold_percent: float = 80.0):
|
|
"""Send SLA warning notification to incident chat room"""
|
|
try:
|
|
war_room = WarRoom.objects.filter(incident=sla_instance.incident).first()
|
|
if not war_room:
|
|
return False
|
|
|
|
# Calculate time remaining
|
|
time_remaining = sla_instance.time_remaining
|
|
time_remaining_minutes = int(time_remaining.total_seconds() / 60)
|
|
|
|
# Create warning message
|
|
message_content = (
|
|
f"🚨 **SLA Warning** 🚨\n\n"
|
|
f"**SLA:** {sla_instance.sla_definition.name}\n"
|
|
f"**Type:** {sla_instance.sla_definition.get_sla_type_display()}\n"
|
|
f"**Time Remaining:** {time_remaining_minutes} minutes\n"
|
|
f"**Threshold:** {threshold_percent}% reached\n\n"
|
|
f"Please take immediate action to meet the SLA target."
|
|
)
|
|
|
|
# Create system message
|
|
WarRoomMessage.objects.create(
|
|
war_room=war_room,
|
|
content=message_content,
|
|
message_type='ALERT',
|
|
sender=None,
|
|
sender_name='SLA Monitor',
|
|
external_data={
|
|
'sla_instance_id': str(sla_instance.id),
|
|
'notification_type': 'sla_warning',
|
|
'threshold_percent': threshold_percent
|
|
}
|
|
)
|
|
|
|
return True
|
|
|
|
except Exception as e:
|
|
print(f"Error sending SLA warning notification: {e}")
|
|
return False
|
|
|
|
@staticmethod
|
|
def send_sla_breach_notification(sla_instance: SLAInstance):
|
|
"""Send SLA breach notification to incident chat room"""
|
|
try:
|
|
war_room = WarRoom.objects.filter(incident=sla_instance.incident).first()
|
|
if not war_room:
|
|
return False
|
|
|
|
# Calculate breach time
|
|
breach_time = sla_instance.breach_time
|
|
breach_minutes = int(breach_time.total_seconds() / 60)
|
|
|
|
# Create breach message
|
|
message_content = (
|
|
f"🚨 **SLA BREACHED** 🚨\n\n"
|
|
f"**SLA:** {sla_instance.sla_definition.name}\n"
|
|
f"**Type:** {sla_instance.sla_definition.get_sla_type_display()}\n"
|
|
f"**Breach Time:** {breach_minutes} minutes ago\n"
|
|
f"**Target Time:** {sla_instance.target_time.strftime('%Y-%m-%d %H:%M:%S')}\n\n"
|
|
f"**IMMEDIATE ACTION REQUIRED**\n"
|
|
f"Escalation procedures have been triggered."
|
|
)
|
|
|
|
# Create system message
|
|
WarRoomMessage.objects.create(
|
|
war_room=war_room,
|
|
content=message_content,
|
|
message_type='ALERT',
|
|
sender=None,
|
|
sender_name='SLA Monitor',
|
|
external_data={
|
|
'sla_instance_id': str(sla_instance.id),
|
|
'notification_type': 'sla_breach',
|
|
'breach_minutes': breach_minutes
|
|
}
|
|
)
|
|
|
|
return True
|
|
|
|
except Exception as e:
|
|
print(f"Error sending SLA breach notification: {e}")
|
|
return False
|
|
|
|
@staticmethod
|
|
def send_escalation_notification(escalation_instance: EscalationInstance):
|
|
"""Send escalation notification to incident chat room"""
|
|
try:
|
|
war_room = WarRoom.objects.filter(incident=escalation_instance.incident).first()
|
|
if not war_room:
|
|
return False
|
|
|
|
# Create escalation message
|
|
message_content = (
|
|
f"📢 **ESCALATION TRIGGERED** 📢\n\n"
|
|
f"**Policy:** {escalation_instance.escalation_policy.name}\n"
|
|
f"**Level:** {escalation_instance.escalation_level}\n"
|
|
f"**Trigger:** {escalation_instance.escalation_policy.get_trigger_condition_display()}\n"
|
|
f"**Time:** {escalation_instance.triggered_at.strftime('%Y-%m-%d %H:%M:%S')}\n\n"
|
|
f"**Actions Taken:**\n"
|
|
)
|
|
|
|
# Add actions taken
|
|
for action in escalation_instance.actions_taken:
|
|
message_content += f"• {action}\n"
|
|
|
|
# Create system message
|
|
WarRoomMessage.objects.create(
|
|
war_room=war_room,
|
|
content=message_content,
|
|
message_type='ALERT',
|
|
sender=None,
|
|
sender_name='Escalation System',
|
|
external_data={
|
|
'escalation_instance_id': str(escalation_instance.id),
|
|
'notification_type': 'escalation',
|
|
'escalation_level': escalation_instance.escalation_level
|
|
}
|
|
)
|
|
|
|
return True
|
|
|
|
except Exception as e:
|
|
print(f"Error sending escalation notification: {e}")
|
|
return False
|
|
|
|
@staticmethod
|
|
def send_oncall_handoff_notification(incident: Incident, old_oncall_user, new_oncall_user):
|
|
"""Send on-call handoff notification to incident chat room"""
|
|
try:
|
|
war_room = WarRoom.objects.filter(incident=incident).first()
|
|
if not war_room:
|
|
return False
|
|
|
|
# Create handoff message
|
|
message_content = (
|
|
f"🔄 **ON-CALL HANDOFF** 🔄\n\n"
|
|
f"**From:** {old_oncall_user.username if old_oncall_user else 'System'}\n"
|
|
f"**To:** {new_oncall_user.username}\n"
|
|
f"**Time:** {timezone.now().strftime('%Y-%m-%d %H:%M:%S')}\n\n"
|
|
f"Please review the incident status and continue response activities."
|
|
)
|
|
|
|
# Create system message
|
|
WarRoomMessage.objects.create(
|
|
war_room=war_room,
|
|
content=message_content,
|
|
message_type='UPDATE',
|
|
sender=None,
|
|
sender_name='On-Call System',
|
|
external_data={
|
|
'notification_type': 'oncall_handoff',
|
|
'old_oncall_user_id': str(old_oncall_user.id) if old_oncall_user else None,
|
|
'new_oncall_user_id': str(new_oncall_user.id)
|
|
}
|
|
)
|
|
|
|
return True
|
|
|
|
except Exception as e:
|
|
print(f"Error sending on-call handoff notification: {e}")
|
|
return False
|
|
|
|
@staticmethod
|
|
def send_sla_met_notification(sla_instance: SLAInstance):
|
|
"""Send SLA met notification to incident chat room"""
|
|
try:
|
|
war_room = WarRoom.objects.filter(incident=sla_instance.incident).first()
|
|
if not war_room:
|
|
return False
|
|
|
|
# Calculate response time
|
|
response_time = sla_instance.response_time
|
|
response_minutes = int(response_time.total_seconds() / 60) if response_time else 0
|
|
|
|
# Create success message
|
|
message_content = (
|
|
f"✅ **SLA MET** ✅\n\n"
|
|
f"**SLA:** {sla_instance.sla_definition.name}\n"
|
|
f"**Type:** {sla_instance.sla_definition.get_sla_type_display()}\n"
|
|
f"**Response Time:** {response_minutes} minutes\n"
|
|
f"**Target Time:** {sla_instance.target_time.strftime('%Y-%m-%d %H:%M:%S')}\n\n"
|
|
f"Great job meeting the SLA target!"
|
|
)
|
|
|
|
# Create system message
|
|
WarRoomMessage.objects.create(
|
|
war_room=war_room,
|
|
content=message_content,
|
|
message_type='UPDATE',
|
|
sender=None,
|
|
sender_name='SLA Monitor',
|
|
external_data={
|
|
'sla_instance_id': str(sla_instance.id),
|
|
'notification_type': 'sla_met',
|
|
'response_minutes': response_minutes
|
|
}
|
|
)
|
|
|
|
return True
|
|
|
|
except Exception as e:
|
|
print(f"Error sending SLA met notification: {e}")
|
|
return False
|
|
|
|
@staticmethod
|
|
def get_sla_status_for_incident(incident: Incident) -> Dict[str, Any]:
|
|
"""Get SLA status summary for an incident"""
|
|
try:
|
|
sla_instances = SLAInstance.objects.filter(incident=incident)
|
|
|
|
status_summary = {
|
|
'total_slas': sla_instances.count(),
|
|
'active_slas': sla_instances.filter(status='ACTIVE').count(),
|
|
'met_slas': sla_instances.filter(status='MET').count(),
|
|
'breached_slas': sla_instances.filter(status='BREACHED').count(),
|
|
'sla_details': []
|
|
}
|
|
|
|
for sla in sla_instances:
|
|
sla_detail = {
|
|
'id': str(sla.id),
|
|
'name': sla.sla_definition.name,
|
|
'type': sla.sla_definition.get_sla_type_display(),
|
|
'status': sla.status,
|
|
'target_time': sla.target_time.isoformat(),
|
|
'time_remaining': int(sla.time_remaining.total_seconds() / 60) if sla.status == 'ACTIVE' else 0,
|
|
'breach_time': int(sla.breach_time.total_seconds() / 60) if sla.is_breached else 0
|
|
}
|
|
status_summary['sla_details'].append(sla_detail)
|
|
|
|
return status_summary
|
|
|
|
except Exception as e:
|
|
print(f"Error getting SLA status: {e}")
|
|
return {'error': str(e)}
|
|
|
|
@staticmethod
|
|
def check_and_send_threshold_notifications():
|
|
"""Check all active SLAs and send threshold notifications"""
|
|
try:
|
|
active_slas = SLAInstance.objects.filter(status='ACTIVE')
|
|
notifications_sent = 0
|
|
|
|
for sla in active_slas:
|
|
# Check if SLA is approaching threshold
|
|
if sla.sla_definition.escalation_enabled:
|
|
threshold_percent = sla.sla_definition.escalation_threshold_percent
|
|
time_elapsed = timezone.now() - sla.started_at
|
|
total_duration = sla.target_time - sla.started_at
|
|
elapsed_percent = (time_elapsed.total_seconds() / total_duration.total_seconds()) * 100
|
|
|
|
if elapsed_percent >= threshold_percent:
|
|
# Check if we haven't already sent a warning
|
|
existing_warning = WarRoomMessage.objects.filter(
|
|
war_room__incident=sla.incident,
|
|
message_type='ALERT',
|
|
external_data__notification_type='sla_warning',
|
|
external_data__sla_instance_id=str(sla.id)
|
|
).exists()
|
|
|
|
if not existing_warning:
|
|
if SLANotificationService.send_sla_warning_notification(sla, threshold_percent):
|
|
notifications_sent += 1
|
|
|
|
return notifications_sent
|
|
|
|
except Exception as e:
|
|
print(f"Error checking SLA thresholds: {e}")
|
|
return 0
|