371 lines
16 KiB
Python
371 lines
16 KiB
Python
import logging
|
|
from django.db.models.signals import post_save, post_delete, pre_save
|
|
from django.dispatch import receiver
|
|
from django.utils import timezone
|
|
from datetime import timedelta
|
|
|
|
from .models import (
|
|
Postmortem, KnowledgeBaseArticle, IncidentRecommendation,
|
|
LearningPattern, KnowledgeBaseUsage, AutomatedPostmortemGeneration
|
|
)
|
|
from incident_intelligence.models import Incident
|
|
|
|
logger = logging.getLogger(__name__)
|
|
|
|
|
|
@receiver(post_save, sender=Incident)
|
|
def incident_resolved_handler(sender, instance, created, **kwargs):
|
|
"""Handle incident resolution and trigger postmortem generation"""
|
|
|
|
# Only process if incident was just resolved (not on creation)
|
|
if not created and instance.status in ['RESOLVED', 'CLOSED']:
|
|
try:
|
|
# Check if incident severity warrants automatic postmortem generation
|
|
if instance.severity in ['HIGH', 'CRITICAL', 'EMERGENCY']:
|
|
from .services.postmortem_generator import PostmortemGenerator
|
|
|
|
generator = PostmortemGenerator()
|
|
|
|
# Generate postmortem asynchronously (in production, use Celery)
|
|
try:
|
|
result = generator.generate_postmortem_for_incident(
|
|
incident_id=str(instance.id),
|
|
include_timeline=True,
|
|
include_logs=True,
|
|
trigger='incident_resolved'
|
|
)
|
|
|
|
logger.info(f"Generated postmortem for incident {instance.id}: {result['postmortem_id']}")
|
|
|
|
except Exception as e:
|
|
logger.error(f"Failed to generate postmortem for incident {instance.id}: {str(e)}")
|
|
|
|
# Generate recommendations for similar incidents
|
|
from .services.recommendation_engine import RecommendationEngine
|
|
|
|
recommendation_engine = RecommendationEngine()
|
|
|
|
try:
|
|
recommendations = recommendation_engine.generate_recommendations(
|
|
incident_id=str(instance.id),
|
|
max_recommendations=3,
|
|
min_confidence=0.6
|
|
)
|
|
|
|
logger.info(f"Generated {len(recommendations)} recommendations for incident {instance.id}")
|
|
|
|
except Exception as e:
|
|
logger.error(f"Failed to generate recommendations for incident {instance.id}: {str(e)}")
|
|
|
|
except Exception as e:
|
|
logger.error(f"Error in incident_resolved_handler: {str(e)}")
|
|
|
|
|
|
@receiver(post_save, sender=Postmortem)
|
|
def postmortem_saved_handler(sender, instance, created, **kwargs):
|
|
"""Handle postmortem creation and updates"""
|
|
|
|
if created:
|
|
# Set default due date if not provided
|
|
if not instance.due_date:
|
|
instance.due_date = timezone.now() + timedelta(days=7)
|
|
instance.save(update_fields=['due_date'])
|
|
|
|
logger.info(f"Created postmortem {instance.id} for incident {instance.incident.id}")
|
|
|
|
# If postmortem is published, create knowledge base articles
|
|
if instance.status == 'PUBLISHED' and not created:
|
|
try:
|
|
create_knowledge_articles_from_postmortem(instance)
|
|
except Exception as e:
|
|
logger.error(f"Failed to create knowledge articles from postmortem {instance.id}: {str(e)}")
|
|
|
|
|
|
@receiver(post_save, sender=KnowledgeBaseArticle)
|
|
def knowledge_article_saved_handler(sender, instance, created, **kwargs):
|
|
"""Handle knowledge base article creation and updates"""
|
|
|
|
if created:
|
|
# Set default review date if not provided
|
|
if not instance.next_review_due:
|
|
instance.next_review_due = timezone.now() + timedelta(days=90) # Review in 3 months
|
|
instance.save(update_fields=['next_review_due'])
|
|
|
|
logger.info(f"Created knowledge base article {instance.id}: {instance.title}")
|
|
|
|
# Update search keywords if not provided
|
|
if not instance.search_keywords:
|
|
instance.search_keywords = extract_keywords_from_article(instance)
|
|
instance.save(update_fields=['search_keywords'])
|
|
|
|
|
|
@receiver(post_save, sender=IncidentRecommendation)
|
|
def recommendation_applied_handler(sender, instance, created, **kwargs):
|
|
"""Handle recommendation application and learning pattern updates"""
|
|
|
|
if instance.is_applied and not created:
|
|
# Update learning patterns based on applied recommendations
|
|
try:
|
|
update_learning_patterns_from_recommendation(instance)
|
|
except Exception as e:
|
|
logger.error(f"Failed to update learning patterns from recommendation {instance.id}: {str(e)}")
|
|
|
|
logger.info(f"Applied recommendation {instance.id} for incident {instance.incident.id}")
|
|
|
|
|
|
@receiver(post_save, sender=KnowledgeBaseUsage)
|
|
def knowledge_usage_handler(sender, instance, created, **kwargs):
|
|
"""Handle knowledge base usage tracking"""
|
|
|
|
if created:
|
|
# Update article popularity metrics
|
|
if instance.knowledge_article and instance.usage_type == 'VIEW':
|
|
# Increment view count (this is also handled in the model method)
|
|
pass
|
|
|
|
# Track recommendation effectiveness
|
|
if instance.recommendation and instance.usage_type == 'APPLY':
|
|
# This could trigger learning pattern updates
|
|
pass
|
|
|
|
logger.debug(f"Recorded knowledge usage: {instance.usage_type} by {instance.user.username}")
|
|
|
|
|
|
@receiver(post_save, sender=LearningPattern)
|
|
def learning_pattern_updated_handler(sender, instance, created, **kwargs):
|
|
"""Handle learning pattern updates"""
|
|
|
|
if created:
|
|
logger.info(f"Created learning pattern {instance.id}: {instance.name}")
|
|
else:
|
|
# If pattern is validated, update related recommendations
|
|
if instance.is_validated:
|
|
try:
|
|
update_recommendations_from_validated_pattern(instance)
|
|
except Exception as e:
|
|
logger.error(f"Failed to update recommendations from pattern {instance.id}: {str(e)}")
|
|
|
|
|
|
def create_knowledge_articles_from_postmortem(postmortem: Postmortem):
|
|
"""Create knowledge base articles from postmortem content"""
|
|
|
|
articles_created = []
|
|
|
|
# Create runbook from action items
|
|
if postmortem.action_items:
|
|
runbook_content = f"""
|
|
# Runbook: {postmortem.title}
|
|
|
|
## Overview
|
|
This runbook was created from postmortem analysis of incident: {postmortem.incident.title}
|
|
|
|
## Root Cause
|
|
{postmortem.root_cause_analysis}
|
|
|
|
## Action Items
|
|
"""
|
|
for i, action_item in enumerate(postmortem.action_items, 1):
|
|
runbook_content += f"\n### {i}. {action_item.get('title', 'Action Item')}\n"
|
|
runbook_content += f"{action_item.get('description', '')}\n"
|
|
if action_item.get('priority'):
|
|
runbook_content += f"**Priority:** {action_item['priority']}\n"
|
|
|
|
runbook = KnowledgeBaseArticle.objects.create(
|
|
title=f"Runbook: {postmortem.incident.category} - {postmortem.incident.title}",
|
|
slug=f"runbook-{postmortem.incident.category.lower()}-{postmortem.id}",
|
|
content=runbook_content,
|
|
summary=f"Runbook created from postmortem analysis of {postmortem.incident.title}",
|
|
article_type='RUNBOOK',
|
|
category=postmortem.incident.category,
|
|
subcategory=postmortem.incident.subcategory,
|
|
tags=[postmortem.incident.category, 'runbook', 'postmortem'],
|
|
related_services=postmortem.affected_services,
|
|
status='DRAFT',
|
|
author=postmortem.owner,
|
|
source_postmortems=[postmortem],
|
|
related_incidents=[postmortem.incident]
|
|
)
|
|
articles_created.append(runbook)
|
|
|
|
# Create troubleshooting guide from lessons learned
|
|
if postmortem.lessons_learned:
|
|
troubleshooting_content = f"""
|
|
# Troubleshooting Guide: {postmortem.incident.category}
|
|
|
|
## Overview
|
|
This troubleshooting guide was created from lessons learned in incident: {postmortem.incident.title}
|
|
|
|
## Lessons Learned
|
|
{postmortem.lessons_learned}
|
|
|
|
## Common Issues and Solutions
|
|
Based on the incident analysis, here are common issues and their solutions:
|
|
|
|
### Issue: {postmortem.incident.title}
|
|
**Symptoms:** {postmortem.incident.description[:200]}...
|
|
**Solution:** {postmortem.root_cause_analysis[:300]}...
|
|
**Prevention:** {postmortem.lessons_learned[:300]}...
|
|
"""
|
|
|
|
troubleshooting = KnowledgeBaseArticle.objects.create(
|
|
title=f"Troubleshooting: {postmortem.incident.category} Issues",
|
|
slug=f"troubleshooting-{postmortem.incident.category.lower()}-{postmortem.id}",
|
|
content=troubleshooting_content,
|
|
summary=f"Troubleshooting guide for {postmortem.incident.category} issues based on incident analysis",
|
|
article_type='TROUBLESHOOTING',
|
|
category=postmortem.incident.category,
|
|
subcategory=postmortem.incident.subcategory,
|
|
tags=[postmortem.incident.category, 'troubleshooting', 'lessons-learned'],
|
|
related_services=postmortem.affected_services,
|
|
status='DRAFT',
|
|
author=postmortem.owner,
|
|
source_postmortems=[postmortem],
|
|
related_incidents=[postmortem.incident]
|
|
)
|
|
articles_created.append(troubleshooting)
|
|
|
|
logger.info(f"Created {len(articles_created)} knowledge articles from postmortem {postmortem.id}")
|
|
return articles_created
|
|
|
|
|
|
def extract_keywords_from_article(article: KnowledgeBaseArticle) -> list:
|
|
"""Extract keywords from article content for search optimization"""
|
|
|
|
import re
|
|
|
|
# Combine title, summary, and content
|
|
text = f"{article.title} {article.summary} {article.content}"
|
|
|
|
# Extract words (simple approach - in production, use more sophisticated NLP)
|
|
words = re.findall(r'\b[a-zA-Z]{3,}\b', text.lower())
|
|
|
|
# Remove common stop words
|
|
stop_words = {
|
|
'the', 'and', 'for', 'are', 'but', 'not', 'you', 'all', 'can', 'had', 'her', 'was', 'one', 'our',
|
|
'out', 'day', 'get', 'has', 'him', 'his', 'how', 'its', 'may', 'new', 'now', 'old', 'see', 'two',
|
|
'who', 'boy', 'did', 'man', 'oil', 'sit', 'try', 'use', 'way', 'will', 'with', 'this', 'that',
|
|
'from', 'they', 'know', 'want', 'been', 'good', 'much', 'some', 'time', 'very', 'when', 'come',
|
|
'here', 'just', 'like', 'long', 'make', 'many', 'over', 'such', 'take', 'than', 'them', 'well',
|
|
'were', 'what', 'your', 'about', 'after', 'again', 'before', 'could', 'every', 'first', 'great',
|
|
'little', 'might', 'never', 'other', 'place', 'right', 'should', 'still', 'think', 'under',
|
|
'water', 'where', 'while', 'world', 'years', 'being', 'called', 'found', 'going', 'having',
|
|
'known', 'large', 'looked', 'making', 'number', 'people', 'seemed', 'small', 'taken', 'turned',
|
|
'wanted', 'without', 'working'
|
|
}
|
|
|
|
# Filter out stop words and get unique words
|
|
keywords = list(set([word for word in words if word not in stop_words]))
|
|
|
|
# Limit to top 20 keywords
|
|
return keywords[:20]
|
|
|
|
|
|
def update_learning_patterns_from_recommendation(recommendation: IncidentRecommendation):
|
|
"""Update learning patterns based on applied recommendations"""
|
|
|
|
# Find or create learning pattern based on recommendation type and incident category
|
|
pattern_name = f"{recommendation.recommendation_type} Pattern for {recommendation.incident.category}"
|
|
|
|
pattern, created = LearningPattern.objects.get_or_create(
|
|
name=pattern_name,
|
|
pattern_type=recommendation.recommendation_type,
|
|
defaults={
|
|
'description': f"Pattern learned from applied recommendations for {recommendation.incident.category} incidents",
|
|
'frequency': 1,
|
|
'success_rate': 1.0, # Initial success rate
|
|
'confidence_score': recommendation.confidence_score,
|
|
'triggers': [recommendation.incident.category],
|
|
'actions': recommendation.suggested_actions,
|
|
'outcomes': [recommendation.expected_outcome] if recommendation.expected_outcome else []
|
|
}
|
|
)
|
|
|
|
if not created:
|
|
# Update existing pattern
|
|
pattern.frequency += 1
|
|
pattern.source_incidents.add(recommendation.incident)
|
|
|
|
# Update success rate based on effectiveness rating
|
|
if recommendation.effectiveness_rating:
|
|
# Simple moving average for success rate
|
|
current_success = (recommendation.effectiveness_rating - 1) / 4 # Convert 1-5 to 0-1
|
|
pattern.success_rate = ((pattern.success_rate * (pattern.frequency - 1)) + current_success) / pattern.frequency
|
|
|
|
# Update confidence score
|
|
pattern.confidence_score = max(pattern.confidence_score, recommendation.confidence_score)
|
|
|
|
pattern.save()
|
|
|
|
# Add source incident and postmortem if available
|
|
pattern.source_incidents.add(recommendation.incident)
|
|
if hasattr(recommendation.incident, 'postmortems'):
|
|
for postmortem in recommendation.incident.postmortems.all():
|
|
pattern.source_postmortems.add(postmortem)
|
|
|
|
|
|
def update_recommendations_from_validated_pattern(pattern: LearningPattern):
|
|
"""Update existing recommendations based on validated learning patterns"""
|
|
|
|
# Find recommendations that could benefit from this validated pattern
|
|
related_recommendations = IncidentRecommendation.objects.filter(
|
|
recommendation_type=pattern.pattern_type,
|
|
confidence_score__lt=pattern.confidence_score
|
|
)
|
|
|
|
for recommendation in related_recommendations:
|
|
# Update confidence score if pattern is more confident
|
|
if pattern.confidence_score > recommendation.confidence_score:
|
|
recommendation.confidence_score = pattern.confidence_score
|
|
recommendation.confidence_level = get_confidence_level(pattern.confidence_score)
|
|
recommendation.save(update_fields=['confidence_score', 'confidence_level'])
|
|
|
|
|
|
def get_confidence_level(score: float) -> str:
|
|
"""Convert confidence score to confidence level"""
|
|
if score >= 0.8:
|
|
return 'VERY_HIGH'
|
|
elif score >= 0.6:
|
|
return 'HIGH'
|
|
elif score >= 0.4:
|
|
return 'MEDIUM'
|
|
else:
|
|
return 'LOW'
|
|
|
|
|
|
# Additional signal handlers for cleanup and maintenance
|
|
|
|
@receiver(post_delete, sender=KnowledgeBaseArticle)
|
|
def knowledge_article_deleted_handler(sender, instance, **kwargs):
|
|
"""Handle knowledge base article deletion"""
|
|
logger.info(f"Deleted knowledge base article {instance.id}: {instance.title}")
|
|
|
|
|
|
@receiver(post_delete, sender=Postmortem)
|
|
def postmortem_deleted_handler(sender, instance, **kwargs):
|
|
"""Handle postmortem deletion"""
|
|
logger.info(f"Deleted postmortem {instance.id} for incident {instance.incident.id}")
|
|
|
|
|
|
@receiver(pre_save, sender=KnowledgeBaseArticle)
|
|
def knowledge_article_pre_save_handler(sender, instance, **kwargs):
|
|
"""Handle knowledge base article before save"""
|
|
|
|
# Auto-generate slug if not provided
|
|
if not instance.slug and instance.title:
|
|
import re
|
|
slug = re.sub(r'[^\w\s-]', '', instance.title.lower())
|
|
slug = re.sub(r'[-\s]+', '-', slug)
|
|
instance.slug = slug[:50] # Limit slug length
|
|
|
|
# Update last_updated_by if content changed
|
|
if instance.pk: # Only for updates, not creation
|
|
try:
|
|
old_instance = KnowledgeBaseArticle.objects.get(pk=instance.pk)
|
|
if (old_instance.content != instance.content or
|
|
old_instance.title != instance.title or
|
|
old_instance.summary != instance.summary):
|
|
# Content changed, but we can't set last_updated_by here as it's not in the signal context
|
|
pass
|
|
except KnowledgeBaseArticle.DoesNotExist:
|
|
pass
|