Files
ETB/ETB-API/knowledge_learning/management/commands/update_learning_patterns.py
Iliyan Angelov 6b247e5b9f Updates
2025-09-19 11:58:53 +03:00

304 lines
14 KiB
Python

from django.core.management.base import BaseCommand, CommandError
from django.utils import timezone
from datetime import timedelta
from django.db.models import Count, Avg
from knowledge_learning.models import LearningPattern, IncidentRecommendation, Postmortem
from incident_intelligence.models import Incident
class Command(BaseCommand):
help = 'Update learning patterns based on incident data and recommendations'
def add_arguments(self, parser):
parser.add_argument(
'--days',
type=int,
default=30,
help='Number of days back to analyze for pattern learning (default: 30)'
)
parser.add_argument(
'--min-frequency',
type=int,
default=3,
help='Minimum frequency required to create a pattern (default: 3)'
)
parser.add_argument(
'--dry-run',
action='store_true',
help='Show what patterns would be created/updated without actually doing it'
)
def handle(self, *args, **options):
days = options['days']
min_frequency = options['min_frequency']
dry_run = options['dry_run']
# Calculate date range
end_date = timezone.now()
start_date = end_date - timedelta(days=days)
self.stdout.write(
self.style.SUCCESS(f'Analyzing incidents from {start_date.date()} to {end_date.date()}')
)
if dry_run:
self.stdout.write(self.style.WARNING('DRY RUN - No patterns will be created/updated'))
# Analyze resolution patterns
self.stdout.write('\nAnalyzing resolution patterns...')
resolution_patterns = self.analyze_resolution_patterns(start_date, end_date, min_frequency, dry_run)
# Analyze prevention patterns
self.stdout.write('\nAnalyzing prevention patterns...')
prevention_patterns = self.analyze_prevention_patterns(start_date, end_date, min_frequency, dry_run)
# Analyze escalation patterns
self.stdout.write('\nAnalyzing escalation patterns...')
escalation_patterns = self.analyze_escalation_patterns(start_date, end_date, min_frequency, dry_run)
# Summary
total_patterns = resolution_patterns + prevention_patterns + escalation_patterns
self.stdout.write('\n' + '='*50)
self.stdout.write(
self.style.SUCCESS(f'Learning pattern analysis completed:')
)
self.stdout.write(f' Resolution patterns: {resolution_patterns}')
self.stdout.write(f' Prevention patterns: {prevention_patterns}')
self.stdout.write(f' Escalation patterns: {escalation_patterns}')
self.stdout.write(f' Total patterns: {total_patterns}')
self.stdout.write('='*50)
def analyze_resolution_patterns(self, start_date, end_date, min_frequency, dry_run):
"""Analyze patterns in incident resolution"""
# Get resolved incidents in the time period
resolved_incidents = Incident.objects.filter(
status__in=['RESOLVED', 'CLOSED'],
resolved_at__gte=start_date,
resolved_at__lte=end_date
)
# Group by category and analyze resolution patterns
category_patterns = {}
for incident in resolved_incidents:
category = incident.category or 'Unknown'
if category not in category_patterns:
category_patterns[category] = {
'incidents': [],
'resolution_times': [],
'assigned_users': [],
'severities': []
}
pattern = category_patterns[category]
pattern['incidents'].append(incident)
if incident.resolved_at:
resolution_time = incident.resolved_at - incident.created_at
pattern['resolution_times'].append(resolution_time)
if incident.assigned_to:
pattern['assigned_users'].append(incident.assigned_to)
pattern['severities'].append(incident.severity)
patterns_created = 0
for category, data in category_patterns.items():
if len(data['incidents']) >= min_frequency:
# Calculate pattern statistics
avg_resolution_time = sum(data['resolution_times'], timedelta()) / len(data['resolution_times'])
common_users = [user for user, count in
[(user, data['assigned_users'].count(user)) for user in set(data['assigned_users'])]
if count >= min_frequency]
# Create or update pattern
pattern_name = f"Resolution Pattern: {category}"
if not dry_run:
pattern, created = LearningPattern.objects.get_or_create(
name=pattern_name,
pattern_type='RESOLUTION',
defaults={
'description': f"Resolution pattern for {category} incidents based on {len(data['incidents'])} resolved incidents",
'frequency': len(data['incidents']),
'success_rate': 1.0, # All resolved incidents are successful
'confidence_score': min(len(data['incidents']) / 10.0, 1.0),
'triggers': [category],
'actions': [
f"Average resolution time: {avg_resolution_time}",
f"Common assignees: {', '.join([u.username for u in common_users[:3]])}" if common_users else "Various assignees"
],
'outcomes': ["Incident resolved successfully"]
}
)
if not created:
# Update existing pattern
pattern.frequency = len(data['incidents'])
pattern.confidence_score = min(len(data['incidents']) / 10.0, 1.0)
pattern.save()
# Add source incidents
pattern.source_incidents.set(data['incidents'])
self.stdout.write(f'{pattern_name}: {len(data["incidents"])} incidents, avg resolution: {avg_resolution_time}')
patterns_created += 1
return patterns_created
def analyze_prevention_patterns(self, start_date, end_date, min_frequency, dry_run):
"""Analyze patterns for incident prevention"""
# Get postmortems with action items
postmortems = Postmortem.objects.filter(
status='PUBLISHED',
created_at__gte=start_date,
created_at__lte=end_date,
action_items__isnull=False
).exclude(action_items=[])
# Group action items by category
prevention_actions = {}
for postmortem in postmortems:
category = postmortem.incident.category or 'Unknown'
if category not in prevention_actions:
prevention_actions[category] = []
for action_item in postmortem.action_items:
if isinstance(action_item, dict) and action_item.get('category') == 'Prevention':
prevention_actions[category].append({
'action': action_item.get('title', ''),
'description': action_item.get('description', ''),
'postmortem': postmortem
})
patterns_created = 0
for category, actions in prevention_actions.items():
if len(actions) >= min_frequency:
# Find common prevention actions
action_counts = {}
for action in actions:
action_key = action['action']
if action_key not in action_counts:
action_counts[action_key] = 0
action_counts[action_key] += 1
common_actions = [action for action, count in action_counts.items() if count >= min_frequency]
if common_actions:
pattern_name = f"Prevention Pattern: {category}"
if not dry_run:
pattern, created = LearningPattern.objects.get_or_create(
name=pattern_name,
pattern_type='PREVENTION',
defaults={
'description': f"Prevention pattern for {category} incidents based on {len(actions)} prevention actions",
'frequency': len(actions),
'success_rate': 0.8, # Assume 80% success rate for prevention
'confidence_score': min(len(actions) / 10.0, 1.0),
'triggers': [category],
'actions': common_actions,
'outcomes': ["Reduced incident frequency", "Improved system reliability"]
}
)
if not created:
pattern.frequency = len(actions)
pattern.confidence_score = min(len(actions) / 10.0, 1.0)
pattern.save()
# Add source postmortems
source_postmortems = [action['postmortem'] for action in actions]
pattern.source_postmortems.set(source_postmortems)
self.stdout.write(f'{pattern_name}: {len(actions)} prevention actions, {len(common_actions)} common actions')
patterns_created += 1
return patterns_created
def analyze_escalation_patterns(self, start_date, end_date, min_frequency, dry_run):
"""Analyze patterns in incident escalation"""
# Get incidents that were escalated (changed severity or priority)
# This is a simplified analysis - in production, you'd track escalation events
escalated_incidents = Incident.objects.filter(
created_at__gte=start_date,
created_at__lte=end_date,
severity__in=['HIGH', 'CRITICAL', 'EMERGENCY']
)
# Group by category and analyze escalation patterns
escalation_patterns = {}
for incident in escalated_incidents:
category = incident.category or 'Unknown'
if category not in escalation_patterns:
escalation_patterns[category] = {
'incidents': [],
'severities': [],
'escalation_times': []
}
pattern = escalation_patterns[category]
pattern['incidents'].append(incident)
pattern['severities'].append(incident.severity)
# Estimate escalation time (time from creation to first update)
if incident.updated_at > incident.created_at:
escalation_time = incident.updated_at - incident.created_at
pattern['escalation_times'].append(escalation_time)
patterns_created = 0
for category, data in escalation_patterns.items():
if len(data['incidents']) >= min_frequency:
# Calculate escalation statistics
avg_escalation_time = (sum(data['escalation_times'], timedelta()) /
len(data['escalation_times'])) if data['escalation_times'] else timedelta()
severity_distribution = {}
for severity in data['severities']:
severity_distribution[severity] = severity_distribution.get(severity, 0) + 1
pattern_name = f"Escalation Pattern: {category}"
if not dry_run:
pattern, created = LearningPattern.objects.get_or_create(
name=pattern_name,
pattern_type='ESCALATION',
defaults={
'description': f"Escalation pattern for {category} incidents based on {len(data['incidents'])} escalated incidents",
'frequency': len(data['incidents']),
'success_rate': 0.7, # Assume 70% success rate for escalation
'confidence_score': min(len(data['incidents']) / 10.0, 1.0),
'triggers': [category],
'actions': [
f"Average escalation time: {avg_escalation_time}",
f"Severity distribution: {severity_distribution}"
],
'outcomes': ["Appropriate incident handling", "Faster resolution"]
}
)
if not created:
pattern.frequency = len(data['incidents'])
pattern.confidence_score = min(len(data['incidents']) / 10.0, 1.0)
pattern.save()
# Add source incidents
pattern.source_incidents.set(data['incidents'])
self.stdout.write(f'{pattern_name}: {len(data["incidents"])} incidents, avg escalation: {avg_escalation_time}')
patterns_created += 1
return patterns_created