Files
Iliyan Angelov 6b247e5b9f Updates
2025-09-19 11:58:53 +03:00

371 lines
16 KiB
Python

import logging
from django.db.models.signals import post_save, post_delete, pre_save
from django.dispatch import receiver
from django.utils import timezone
from datetime import timedelta
from .models import (
Postmortem, KnowledgeBaseArticle, IncidentRecommendation,
LearningPattern, KnowledgeBaseUsage, AutomatedPostmortemGeneration
)
from incident_intelligence.models import Incident
logger = logging.getLogger(__name__)
@receiver(post_save, sender=Incident)
def incident_resolved_handler(sender, instance, created, **kwargs):
"""Handle incident resolution and trigger postmortem generation"""
# Only process if incident was just resolved (not on creation)
if not created and instance.status in ['RESOLVED', 'CLOSED']:
try:
# Check if incident severity warrants automatic postmortem generation
if instance.severity in ['HIGH', 'CRITICAL', 'EMERGENCY']:
from .services.postmortem_generator import PostmortemGenerator
generator = PostmortemGenerator()
# Generate postmortem asynchronously (in production, use Celery)
try:
result = generator.generate_postmortem_for_incident(
incident_id=str(instance.id),
include_timeline=True,
include_logs=True,
trigger='incident_resolved'
)
logger.info(f"Generated postmortem for incident {instance.id}: {result['postmortem_id']}")
except Exception as e:
logger.error(f"Failed to generate postmortem for incident {instance.id}: {str(e)}")
# Generate recommendations for similar incidents
from .services.recommendation_engine import RecommendationEngine
recommendation_engine = RecommendationEngine()
try:
recommendations = recommendation_engine.generate_recommendations(
incident_id=str(instance.id),
max_recommendations=3,
min_confidence=0.6
)
logger.info(f"Generated {len(recommendations)} recommendations for incident {instance.id}")
except Exception as e:
logger.error(f"Failed to generate recommendations for incident {instance.id}: {str(e)}")
except Exception as e:
logger.error(f"Error in incident_resolved_handler: {str(e)}")
@receiver(post_save, sender=Postmortem)
def postmortem_saved_handler(sender, instance, created, **kwargs):
"""Handle postmortem creation and updates"""
if created:
# Set default due date if not provided
if not instance.due_date:
instance.due_date = timezone.now() + timedelta(days=7)
instance.save(update_fields=['due_date'])
logger.info(f"Created postmortem {instance.id} for incident {instance.incident.id}")
# If postmortem is published, create knowledge base articles
if instance.status == 'PUBLISHED' and not created:
try:
create_knowledge_articles_from_postmortem(instance)
except Exception as e:
logger.error(f"Failed to create knowledge articles from postmortem {instance.id}: {str(e)}")
@receiver(post_save, sender=KnowledgeBaseArticle)
def knowledge_article_saved_handler(sender, instance, created, **kwargs):
"""Handle knowledge base article creation and updates"""
if created:
# Set default review date if not provided
if not instance.next_review_due:
instance.next_review_due = timezone.now() + timedelta(days=90) # Review in 3 months
instance.save(update_fields=['next_review_due'])
logger.info(f"Created knowledge base article {instance.id}: {instance.title}")
# Update search keywords if not provided
if not instance.search_keywords:
instance.search_keywords = extract_keywords_from_article(instance)
instance.save(update_fields=['search_keywords'])
@receiver(post_save, sender=IncidentRecommendation)
def recommendation_applied_handler(sender, instance, created, **kwargs):
"""Handle recommendation application and learning pattern updates"""
if instance.is_applied and not created:
# Update learning patterns based on applied recommendations
try:
update_learning_patterns_from_recommendation(instance)
except Exception as e:
logger.error(f"Failed to update learning patterns from recommendation {instance.id}: {str(e)}")
logger.info(f"Applied recommendation {instance.id} for incident {instance.incident.id}")
@receiver(post_save, sender=KnowledgeBaseUsage)
def knowledge_usage_handler(sender, instance, created, **kwargs):
"""Handle knowledge base usage tracking"""
if created:
# Update article popularity metrics
if instance.knowledge_article and instance.usage_type == 'VIEW':
# Increment view count (this is also handled in the model method)
pass
# Track recommendation effectiveness
if instance.recommendation and instance.usage_type == 'APPLY':
# This could trigger learning pattern updates
pass
logger.debug(f"Recorded knowledge usage: {instance.usage_type} by {instance.user.username}")
@receiver(post_save, sender=LearningPattern)
def learning_pattern_updated_handler(sender, instance, created, **kwargs):
"""Handle learning pattern updates"""
if created:
logger.info(f"Created learning pattern {instance.id}: {instance.name}")
else:
# If pattern is validated, update related recommendations
if instance.is_validated:
try:
update_recommendations_from_validated_pattern(instance)
except Exception as e:
logger.error(f"Failed to update recommendations from pattern {instance.id}: {str(e)}")
def create_knowledge_articles_from_postmortem(postmortem: Postmortem):
"""Create knowledge base articles from postmortem content"""
articles_created = []
# Create runbook from action items
if postmortem.action_items:
runbook_content = f"""
# Runbook: {postmortem.title}
## Overview
This runbook was created from postmortem analysis of incident: {postmortem.incident.title}
## Root Cause
{postmortem.root_cause_analysis}
## Action Items
"""
for i, action_item in enumerate(postmortem.action_items, 1):
runbook_content += f"\n### {i}. {action_item.get('title', 'Action Item')}\n"
runbook_content += f"{action_item.get('description', '')}\n"
if action_item.get('priority'):
runbook_content += f"**Priority:** {action_item['priority']}\n"
runbook = KnowledgeBaseArticle.objects.create(
title=f"Runbook: {postmortem.incident.category} - {postmortem.incident.title}",
slug=f"runbook-{postmortem.incident.category.lower()}-{postmortem.id}",
content=runbook_content,
summary=f"Runbook created from postmortem analysis of {postmortem.incident.title}",
article_type='RUNBOOK',
category=postmortem.incident.category,
subcategory=postmortem.incident.subcategory,
tags=[postmortem.incident.category, 'runbook', 'postmortem'],
related_services=postmortem.affected_services,
status='DRAFT',
author=postmortem.owner,
source_postmortems=[postmortem],
related_incidents=[postmortem.incident]
)
articles_created.append(runbook)
# Create troubleshooting guide from lessons learned
if postmortem.lessons_learned:
troubleshooting_content = f"""
# Troubleshooting Guide: {postmortem.incident.category}
## Overview
This troubleshooting guide was created from lessons learned in incident: {postmortem.incident.title}
## Lessons Learned
{postmortem.lessons_learned}
## Common Issues and Solutions
Based on the incident analysis, here are common issues and their solutions:
### Issue: {postmortem.incident.title}
**Symptoms:** {postmortem.incident.description[:200]}...
**Solution:** {postmortem.root_cause_analysis[:300]}...
**Prevention:** {postmortem.lessons_learned[:300]}...
"""
troubleshooting = KnowledgeBaseArticle.objects.create(
title=f"Troubleshooting: {postmortem.incident.category} Issues",
slug=f"troubleshooting-{postmortem.incident.category.lower()}-{postmortem.id}",
content=troubleshooting_content,
summary=f"Troubleshooting guide for {postmortem.incident.category} issues based on incident analysis",
article_type='TROUBLESHOOTING',
category=postmortem.incident.category,
subcategory=postmortem.incident.subcategory,
tags=[postmortem.incident.category, 'troubleshooting', 'lessons-learned'],
related_services=postmortem.affected_services,
status='DRAFT',
author=postmortem.owner,
source_postmortems=[postmortem],
related_incidents=[postmortem.incident]
)
articles_created.append(troubleshooting)
logger.info(f"Created {len(articles_created)} knowledge articles from postmortem {postmortem.id}")
return articles_created
def extract_keywords_from_article(article: KnowledgeBaseArticle) -> list:
"""Extract keywords from article content for search optimization"""
import re
# Combine title, summary, and content
text = f"{article.title} {article.summary} {article.content}"
# Extract words (simple approach - in production, use more sophisticated NLP)
words = re.findall(r'\b[a-zA-Z]{3,}\b', text.lower())
# Remove common stop words
stop_words = {
'the', 'and', 'for', 'are', 'but', 'not', 'you', 'all', 'can', 'had', 'her', 'was', 'one', 'our',
'out', 'day', 'get', 'has', 'him', 'his', 'how', 'its', 'may', 'new', 'now', 'old', 'see', 'two',
'who', 'boy', 'did', 'man', 'oil', 'sit', 'try', 'use', 'way', 'will', 'with', 'this', 'that',
'from', 'they', 'know', 'want', 'been', 'good', 'much', 'some', 'time', 'very', 'when', 'come',
'here', 'just', 'like', 'long', 'make', 'many', 'over', 'such', 'take', 'than', 'them', 'well',
'were', 'what', 'your', 'about', 'after', 'again', 'before', 'could', 'every', 'first', 'great',
'little', 'might', 'never', 'other', 'place', 'right', 'should', 'still', 'think', 'under',
'water', 'where', 'while', 'world', 'years', 'being', 'called', 'found', 'going', 'having',
'known', 'large', 'looked', 'making', 'number', 'people', 'seemed', 'small', 'taken', 'turned',
'wanted', 'without', 'working'
}
# Filter out stop words and get unique words
keywords = list(set([word for word in words if word not in stop_words]))
# Limit to top 20 keywords
return keywords[:20]
def update_learning_patterns_from_recommendation(recommendation: IncidentRecommendation):
"""Update learning patterns based on applied recommendations"""
# Find or create learning pattern based on recommendation type and incident category
pattern_name = f"{recommendation.recommendation_type} Pattern for {recommendation.incident.category}"
pattern, created = LearningPattern.objects.get_or_create(
name=pattern_name,
pattern_type=recommendation.recommendation_type,
defaults={
'description': f"Pattern learned from applied recommendations for {recommendation.incident.category} incidents",
'frequency': 1,
'success_rate': 1.0, # Initial success rate
'confidence_score': recommendation.confidence_score,
'triggers': [recommendation.incident.category],
'actions': recommendation.suggested_actions,
'outcomes': [recommendation.expected_outcome] if recommendation.expected_outcome else []
}
)
if not created:
# Update existing pattern
pattern.frequency += 1
pattern.source_incidents.add(recommendation.incident)
# Update success rate based on effectiveness rating
if recommendation.effectiveness_rating:
# Simple moving average for success rate
current_success = (recommendation.effectiveness_rating - 1) / 4 # Convert 1-5 to 0-1
pattern.success_rate = ((pattern.success_rate * (pattern.frequency - 1)) + current_success) / pattern.frequency
# Update confidence score
pattern.confidence_score = max(pattern.confidence_score, recommendation.confidence_score)
pattern.save()
# Add source incident and postmortem if available
pattern.source_incidents.add(recommendation.incident)
if hasattr(recommendation.incident, 'postmortems'):
for postmortem in recommendation.incident.postmortems.all():
pattern.source_postmortems.add(postmortem)
def update_recommendations_from_validated_pattern(pattern: LearningPattern):
"""Update existing recommendations based on validated learning patterns"""
# Find recommendations that could benefit from this validated pattern
related_recommendations = IncidentRecommendation.objects.filter(
recommendation_type=pattern.pattern_type,
confidence_score__lt=pattern.confidence_score
)
for recommendation in related_recommendations:
# Update confidence score if pattern is more confident
if pattern.confidence_score > recommendation.confidence_score:
recommendation.confidence_score = pattern.confidence_score
recommendation.confidence_level = get_confidence_level(pattern.confidence_score)
recommendation.save(update_fields=['confidence_score', 'confidence_level'])
def get_confidence_level(score: float) -> str:
"""Convert confidence score to confidence level"""
if score >= 0.8:
return 'VERY_HIGH'
elif score >= 0.6:
return 'HIGH'
elif score >= 0.4:
return 'MEDIUM'
else:
return 'LOW'
# Additional signal handlers for cleanup and maintenance
@receiver(post_delete, sender=KnowledgeBaseArticle)
def knowledge_article_deleted_handler(sender, instance, **kwargs):
"""Handle knowledge base article deletion"""
logger.info(f"Deleted knowledge base article {instance.id}: {instance.title}")
@receiver(post_delete, sender=Postmortem)
def postmortem_deleted_handler(sender, instance, **kwargs):
"""Handle postmortem deletion"""
logger.info(f"Deleted postmortem {instance.id} for incident {instance.incident.id}")
@receiver(pre_save, sender=KnowledgeBaseArticle)
def knowledge_article_pre_save_handler(sender, instance, **kwargs):
"""Handle knowledge base article before save"""
# Auto-generate slug if not provided
if not instance.slug and instance.title:
import re
slug = re.sub(r'[^\w\s-]', '', instance.title.lower())
slug = re.sub(r'[-\s]+', '-', slug)
instance.slug = slug[:50] # Limit slug length
# Update last_updated_by if content changed
if instance.pk: # Only for updates, not creation
try:
old_instance = KnowledgeBaseArticle.objects.get(pk=instance.pk)
if (old_instance.content != instance.content or
old_instance.title != instance.title or
old_instance.summary != instance.summary):
# Content changed, but we can't set last_updated_by here as it's not in the signal context
pass
except KnowledgeBaseArticle.DoesNotExist:
pass