Files
ETB/ETB-API/sla_oncall/models.py
Iliyan Angelov 6b247e5b9f Updates
2025-09-19 11:58:53 +03:00

653 lines
22 KiB
Python

"""
SLA & On-Call Enhancement models for Enterprise Incident Management API
Implements dynamic SLAs, escalation policies, on-call rotations, and business hours management
"""
import uuid
import json
from datetime import datetime, timedelta, time
from typing import Dict, Any, Optional, List
from zoneinfo import ZoneInfo
from django.db import models
from django.contrib.auth import get_user_model
from django.core.validators import MinValueValidator, MaxValueValidator
from django.utils import timezone
from django.core.exceptions import ValidationError
User = get_user_model()
class BusinessHours(models.Model):
"""Business hours configuration for different teams and services"""
WEEKDAYS = [
('MONDAY', 'Monday'),
('TUESDAY', 'Tuesday'),
('WEDNESDAY', 'Wednesday'),
('THURSDAY', 'Thursday'),
('FRIDAY', 'Friday'),
('SATURDAY', 'Saturday'),
('SUNDAY', 'Sunday'),
]
id = models.UUIDField(primary_key=True, default=uuid.uuid4, editable=False)
name = models.CharField(max_length=200, unique=True)
description = models.TextField()
timezone = models.CharField(max_length=50, default='UTC')
# Business hours configuration
weekday_start = models.TimeField(default=time(9, 0)) # 9:00 AM
weekday_end = models.TimeField(default=time(17, 0)) # 5:00 PM
weekend_start = models.TimeField(default=time(10, 0)) # 10:00 AM
weekend_end = models.TimeField(default=time(16, 0)) # 4:00 PM
# Specific day overrides
day_overrides = models.JSONField(
default=dict,
help_text="Override hours for specific dates (YYYY-MM-DD format)"
)
# Holiday configuration
holiday_calendar = models.JSONField(
default=list,
help_text="List of holidays (YYYY-MM-DD format) when business hours don't apply"
)
# Status
is_active = models.BooleanField(default=True)
is_default = models.BooleanField(default=False, help_text="Default business hours for the system")
# Metadata
created_by = models.ForeignKey(User, on_delete=models.SET_NULL, null=True)
created_at = models.DateTimeField(auto_now_add=True)
updated_at = models.DateTimeField(auto_now=True)
class Meta:
ordering = ['name']
indexes = [
models.Index(fields=['is_active', 'is_default']),
]
def __str__(self):
return f"{self.name} ({self.timezone})"
def is_business_hours(self, check_time: datetime = None) -> bool:
"""Check if given time (or now) is within business hours"""
if check_time is None:
check_time = timezone.now()
# Convert to business hours timezone
if check_time.tzinfo is None:
check_time = timezone.make_aware(check_time)
local_time = check_time.astimezone(ZoneInfo(self.timezone))
current_date = local_time.date()
current_time = local_time.time()
# Check for holiday
if current_date.strftime('%Y-%m-%d') in self.holiday_calendar:
return False
# Check for day override
date_key = current_date.strftime('%Y-%m-%d')
if date_key in self.day_overrides:
override = self.day_overrides[date_key]
if override.get('is_holiday', False):
return False
start_time = time.fromisoformat(override.get('start_time', '09:00'))
end_time = time.fromisoformat(override.get('end_time', '17:00'))
return start_time <= current_time <= end_time
# Regular business hours
is_weekend = current_date.weekday() >= 5 # Saturday = 5, Sunday = 6
if is_weekend:
return self.weekend_start <= current_time <= self.weekend_end
else:
return self.weekday_start <= current_time <= self.weekday_end
class SLADefinition(models.Model):
"""Dynamic SLA definitions based on incident type, severity, and business context"""
SLA_TYPES = [
('RESPONSE_TIME', 'Response Time'),
('RESOLUTION_TIME', 'Resolution Time'),
('ACKNOWLEDGMENT_TIME', 'Acknowledgment Time'),
('FIRST_RESPONSE', 'First Response Time'),
]
SEVERITY_CHOICES = [
('ALL', 'All Severities'),
('LOW', 'Low'),
('MEDIUM', 'Medium'),
('HIGH', 'High'),
('CRITICAL', 'Critical'),
('EMERGENCY', 'Emergency'),
]
PRIORITY_CHOICES = [
('ALL', 'All Priorities'),
('P1', 'P1 - Critical'),
('P2', 'P2 - High'),
('P3', 'P3 - Medium'),
('P4', 'P4 - Low'),
]
id = models.UUIDField(primary_key=True, default=uuid.uuid4, editable=False)
name = models.CharField(max_length=200)
description = models.TextField()
sla_type = models.CharField(max_length=20, choices=SLA_TYPES)
# Targeting criteria
incident_categories = models.JSONField(
default=list,
help_text="List of incident categories this SLA applies to"
)
incident_severities = models.JSONField(
default=list,
help_text="List of incident severities this SLA applies to"
)
incident_priorities = models.JSONField(
default=list,
help_text="List of incident priorities this SLA applies to"
)
# SLA targets
target_duration_minutes = models.PositiveIntegerField(
help_text="SLA target in minutes"
)
business_hours_only = models.BooleanField(
default=False,
help_text="Whether SLA only applies during business hours"
)
business_hours = models.ForeignKey(
BusinessHours,
on_delete=models.SET_NULL,
null=True,
blank=True,
help_text="Business hours configuration for this SLA"
)
# Escalation configuration
escalation_enabled = models.BooleanField(default=True)
escalation_threshold_percent = models.FloatField(
default=80.0,
validators=[MinValueValidator(0.0), MaxValueValidator(100.0)],
help_text="Escalate when X% of SLA time has passed"
)
# Status and metadata
is_active = models.BooleanField(default=True)
is_default = models.BooleanField(default=False)
created_by = models.ForeignKey(User, on_delete=models.SET_NULL, null=True)
created_at = models.DateTimeField(auto_now_add=True)
updated_at = models.DateTimeField(auto_now=True)
class Meta:
ordering = ['name']
indexes = [
models.Index(fields=['sla_type', 'is_active']),
models.Index(fields=['incident_severities']),
models.Index(fields=['incident_categories']),
]
def __str__(self):
return f"{self.name} ({self.sla_type})"
def applies_to_incident(self, incident) -> bool:
"""Check if this SLA applies to the given incident"""
if not self.is_active:
return False
# Check categories
if self.incident_categories and incident.category not in self.incident_categories:
return False
# Check severities
if self.incident_severities and incident.severity not in self.incident_severities:
return False
# Check priorities
if self.incident_priorities and incident.priority not in self.incident_priorities:
return False
return True
class EscalationPolicy(models.Model):
"""Escalation policies for incidents and SLAs"""
ESCALATION_TYPES = [
('TIME_BASED', 'Time-based Escalation'),
('SEVERITY_BASED', 'Severity-based Escalation'),
('RESOURCE_BASED', 'Resource-based Escalation'),
('CUSTOM', 'Custom Escalation'),
]
TRIGGER_CONDITIONS = [
('SLA_BREACH', 'SLA Breach'),
('SLA_THRESHOLD', 'SLA Threshold Reached'),
('NO_RESPONSE', 'No Response'),
('NO_ACKNOWLEDGMENT', 'No Acknowledgment'),
('CUSTOM', 'Custom Condition'),
]
id = models.UUIDField(primary_key=True, default=uuid.uuid4, editable=False)
name = models.CharField(max_length=200, unique=True)
description = models.TextField()
escalation_type = models.CharField(max_length=20, choices=ESCALATION_TYPES)
trigger_condition = models.CharField(max_length=20, choices=TRIGGER_CONDITIONS)
# Targeting criteria
incident_severities = models.JSONField(
default=list,
help_text="List of incident severities this policy applies to"
)
incident_categories = models.JSONField(
default=list,
help_text="List of incident categories this policy applies to"
)
# Escalation configuration
trigger_delay_minutes = models.PositiveIntegerField(
default=0,
help_text="Delay before escalation triggers (in minutes)"
)
escalation_steps = models.JSONField(
default=list,
help_text="List of escalation steps with timing and actions"
)
# Notification configuration
notification_channels = models.JSONField(
default=list,
help_text="Channels to notify during escalation (email, sms, slack, etc.)"
)
notification_templates = models.JSONField(
default=dict,
help_text="Templates for different notification channels"
)
# Status and metadata
is_active = models.BooleanField(default=True)
is_default = models.BooleanField(default=False)
created_by = models.ForeignKey(User, on_delete=models.SET_NULL, null=True)
created_at = models.DateTimeField(auto_now_add=True)
updated_at = models.DateTimeField(auto_now=True)
class Meta:
ordering = ['name']
indexes = [
models.Index(fields=['escalation_type', 'is_active']),
models.Index(fields=['trigger_condition']),
]
def __str__(self):
return f"{self.name} ({self.escalation_type})"
def applies_to_incident(self, incident) -> bool:
"""Check if this escalation policy applies to the given incident"""
if not self.is_active:
return False
# Check severities
if self.incident_severities and incident.severity not in self.incident_severities:
return False
# Check categories
if self.incident_categories and incident.category not in self.incident_categories:
return False
return True
class OnCallRotation(models.Model):
"""On-call rotation management"""
ROTATION_TYPES = [
('WEEKLY', 'Weekly Rotation'),
('DAILY', 'Daily Rotation'),
('MONTHLY', 'Monthly Rotation'),
('CUSTOM', 'Custom Schedule'),
]
STATUS_CHOICES = [
('ACTIVE', 'Active'),
('PAUSED', 'Paused'),
('INACTIVE', 'Inactive'),
]
id = models.UUIDField(primary_key=True, default=uuid.uuid4, editable=False)
name = models.CharField(max_length=200, unique=True)
description = models.TextField()
rotation_type = models.CharField(max_length=20, choices=ROTATION_TYPES)
status = models.CharField(max_length=20, choices=STATUS_CHOICES, default='ACTIVE')
# Team configuration
team_name = models.CharField(max_length=100)
team_description = models.TextField(blank=True, null=True)
# Schedule configuration
schedule_config = models.JSONField(
default=dict,
help_text="Configuration for the rotation schedule"
)
timezone = models.CharField(max_length=50, default='UTC')
# Integration configuration
external_system = models.CharField(
max_length=50,
choices=[
('PAGERDUTY', 'PagerDuty'),
('OPSGENIE', 'OpsGenie'),
('INTERNAL', 'Internal System'),
('CUSTOM', 'Custom Integration'),
],
default='INTERNAL'
)
external_system_id = models.CharField(
max_length=255,
blank=True,
null=True,
help_text="ID in external system (PagerDuty schedule ID, etc.)"
)
integration_config = models.JSONField(
default=dict,
help_text="Configuration for external system integration"
)
# Status and metadata
created_by = models.ForeignKey(User, on_delete=models.SET_NULL, null=True)
created_at = models.DateTimeField(auto_now_add=True)
updated_at = models.DateTimeField(auto_now=True)
class Meta:
ordering = ['name']
indexes = [
models.Index(fields=['status', 'rotation_type']),
models.Index(fields=['external_system']),
]
def __str__(self):
return f"{self.name} ({self.rotation_type})"
def get_current_oncall(self, check_time: datetime = None):
"""Get the current on-call person for the given time"""
if check_time is None:
check_time = timezone.now()
return self.assignments.filter(
start_time__lte=check_time,
end_time__gte=check_time,
status='ACTIVE'
).first()
class OnCallAssignment(models.Model):
"""Individual on-call assignments within rotations"""
STATUS_CHOICES = [
('SCHEDULED', 'Scheduled'),
('ACTIVE', 'Active'),
('COMPLETED', 'Completed'),
('CANCELLED', 'Cancelled'),
]
id = models.UUIDField(primary_key=True, default=uuid.uuid4, editable=False)
rotation = models.ForeignKey(
OnCallRotation,
on_delete=models.CASCADE,
related_name='assignments'
)
user = models.ForeignKey(User, on_delete=models.CASCADE)
# Schedule
start_time = models.DateTimeField()
end_time = models.DateTimeField()
# Status
status = models.CharField(max_length=20, choices=STATUS_CHOICES, default='SCHEDULED')
# Handoff information
handoff_notes = models.TextField(blank=True, null=True)
handed_off_from = models.ForeignKey(
User,
on_delete=models.SET_NULL,
null=True,
blank=True,
related_name='handed_off_assignments'
)
handoff_time = models.DateTimeField(null=True, blank=True)
# Performance tracking
incidents_handled = models.PositiveIntegerField(default=0)
response_time_avg = models.DurationField(null=True, blank=True)
# Metadata
created_at = models.DateTimeField(auto_now_add=True)
updated_at = models.DateTimeField(auto_now=True)
class Meta:
ordering = ['start_time']
indexes = [
models.Index(fields=['rotation', 'start_time']),
models.Index(fields=['user', 'start_time']),
models.Index(fields=['status', 'start_time']),
]
def __str__(self):
return f"{self.user.username} on-call {self.start_time} - {self.end_time}"
def is_current(self, check_time: datetime = None) -> bool:
"""Check if this assignment is currently active"""
if check_time is None:
check_time = timezone.now()
return (self.status == 'ACTIVE' and
self.start_time <= check_time <= self.end_time)
class SLAInstance(models.Model):
"""Instance of SLA tracking for a specific incident"""
STATUS_CHOICES = [
('ACTIVE', 'Active'),
('MET', 'SLA Met'),
('BREACHED', 'SLA Breached'),
('CANCELLED', 'Cancelled'),
]
id = models.UUIDField(primary_key=True, default=uuid.uuid4, editable=False)
sla_definition = models.ForeignKey(SLADefinition, on_delete=models.CASCADE)
incident = models.ForeignKey(
'incident_intelligence.Incident',
on_delete=models.CASCADE,
related_name='sla_instances'
)
# SLA tracking
status = models.CharField(max_length=20, choices=STATUS_CHOICES, default='ACTIVE')
target_time = models.DateTimeField(help_text="When the SLA should be met")
# Timing
started_at = models.DateTimeField(auto_now_add=True)
met_at = models.DateTimeField(null=True, blank=True)
breached_at = models.DateTimeField(null=True, blank=True)
# Escalation tracking
escalation_policy = models.ForeignKey(
EscalationPolicy,
on_delete=models.SET_NULL,
null=True,
blank=True
)
escalation_triggered = models.BooleanField(default=False)
escalation_triggered_at = models.DateTimeField(null=True, blank=True)
escalation_level = models.PositiveIntegerField(default=0)
# Performance metrics
response_time = models.DurationField(null=True, blank=True)
resolution_time = models.DurationField(null=True, blank=True)
# Metadata
created_at = models.DateTimeField(auto_now_add=True)
updated_at = models.DateTimeField(auto_now=True)
class Meta:
ordering = ['-created_at']
indexes = [
models.Index(fields=['incident', 'status']),
models.Index(fields=['sla_definition', 'status']),
models.Index(fields=['target_time', 'status']),
]
def __str__(self):
return f"SLA {self.sla_definition.name} for {self.incident.title}"
@property
def is_breached(self) -> bool:
"""Check if SLA is breached"""
if self.status == 'BREACHED':
return True
if self.status == 'ACTIVE':
return timezone.now() > self.target_time
return False
@property
def time_remaining(self) -> timedelta:
"""Get time remaining until SLA breach"""
if self.status != 'ACTIVE':
return timedelta(0)
remaining = self.target_time - timezone.now()
return remaining if remaining > timedelta(0) else timedelta(0)
@property
def breach_time(self) -> timedelta:
"""Get time since SLA breach"""
if not self.is_breached:
return timedelta(0)
return timezone.now() - self.target_time
class EscalationInstance(models.Model):
"""Instance of escalation for a specific incident"""
STATUS_CHOICES = [
('PENDING', 'Pending'),
('TRIGGERED', 'Triggered'),
('ACKNOWLEDGED', 'Acknowledged'),
('RESOLVED', 'Resolved'),
('CANCELLED', 'Cancelled'),
]
id = models.UUIDField(primary_key=True, default=uuid.uuid4, editable=False)
escalation_policy = models.ForeignKey(EscalationPolicy, on_delete=models.CASCADE)
incident = models.ForeignKey(
'incident_intelligence.Incident',
on_delete=models.CASCADE,
related_name='escalation_instances'
)
sla_instance = models.ForeignKey(
SLAInstance,
on_delete=models.CASCADE,
null=True,
blank=True,
related_name='escalation_instances'
)
# Escalation details
status = models.CharField(max_length=20, choices=STATUS_CHOICES, default='PENDING')
escalation_level = models.PositiveIntegerField(default=1)
current_step = models.PositiveIntegerField(default=0)
# Timing
triggered_at = models.DateTimeField(null=True, blank=True)
acknowledged_at = models.DateTimeField(null=True, blank=True)
resolved_at = models.DateTimeField(null=True, blank=True)
# Actions taken
notifications_sent = models.JSONField(
default=list,
help_text="List of notifications sent during escalation"
)
actions_taken = models.JSONField(
default=list,
help_text="List of actions taken during escalation"
)
# Metadata
created_at = models.DateTimeField(auto_now_add=True)
updated_at = models.DateTimeField(auto_now=True)
class Meta:
ordering = ['-created_at']
indexes = [
models.Index(fields=['incident', 'status']),
models.Index(fields=['escalation_policy', 'status']),
models.Index(fields=['triggered_at']),
]
def __str__(self):
return f"Escalation {self.escalation_policy.name} for {self.incident.title}"
class NotificationTemplate(models.Model):
"""Templates for escalation and on-call notifications"""
TEMPLATE_TYPES = [
('ESCALATION', 'Escalation Notification'),
('ONCALL_HANDOFF', 'On-Call Handoff'),
('SLA_BREACH', 'SLA Breach Alert'),
('SLA_WARNING', 'SLA Warning'),
('CUSTOM', 'Custom Notification'),
]
CHANNEL_TYPES = [
('EMAIL', 'Email'),
('SMS', 'SMS'),
('SLACK', 'Slack'),
('TEAMS', 'Microsoft Teams'),
('WEBHOOK', 'Webhook'),
('CUSTOM', 'Custom Channel'),
]
id = models.UUIDField(primary_key=True, default=uuid.uuid4, editable=False)
name = models.CharField(max_length=200)
template_type = models.CharField(max_length=20, choices=TEMPLATE_TYPES)
channel_type = models.CharField(max_length=20, choices=CHANNEL_TYPES)
# Template content
subject_template = models.CharField(
max_length=500,
help_text="Subject template with variables"
)
body_template = models.TextField(
help_text="Body template with variables"
)
variables = models.JSONField(
default=list,
help_text="Available variables for this template"
)
# Status and metadata
is_active = models.BooleanField(default=True)
is_default = models.BooleanField(default=False)
created_by = models.ForeignKey(User, on_delete=models.SET_NULL, null=True)
created_at = models.DateTimeField(auto_now_add=True)
updated_at = models.DateTimeField(auto_now=True)
class Meta:
ordering = ['template_type', 'channel_type', 'name']
unique_together = ['template_type', 'channel_type', 'name']
indexes = [
models.Index(fields=['template_type', 'channel_type']),
models.Index(fields=['is_active']),
]
def __str__(self):
return f"{self.name} ({self.template_type} - {self.channel_type})"