214 lines
9.3 KiB
Python
214 lines
9.3 KiB
Python
"""
|
|
Management command to set up incident intelligence module
|
|
"""
|
|
from django.core.management.base import BaseCommand
|
|
from django.db import transaction
|
|
from django.contrib.auth import get_user_model
|
|
from incident_intelligence.models import Incident, IncidentPattern
|
|
|
|
User = get_user_model()
|
|
|
|
|
|
class Command(BaseCommand):
|
|
help = 'Set up the incident intelligence module with sample data and configurations'
|
|
|
|
def add_arguments(self, parser):
|
|
parser.add_argument(
|
|
'--create-sample-data',
|
|
action='store_true',
|
|
help='Create sample incidents for testing',
|
|
)
|
|
parser.add_argument(
|
|
'--create-patterns',
|
|
action='store_true',
|
|
help='Create sample patterns',
|
|
)
|
|
parser.add_argument(
|
|
'--run-ai-analysis',
|
|
action='store_true',
|
|
help='Run AI analysis on existing incidents',
|
|
)
|
|
|
|
def handle(self, *args, **options):
|
|
self.stdout.write(
|
|
self.style.SUCCESS('Setting up Incident Intelligence module...')
|
|
)
|
|
|
|
if options['create_sample_data']:
|
|
self.create_sample_data()
|
|
|
|
if options['create_patterns']:
|
|
self.create_sample_patterns()
|
|
|
|
if options['run_ai_analysis']:
|
|
self.run_ai_analysis()
|
|
|
|
self.stdout.write(
|
|
self.style.SUCCESS('Incident Intelligence module setup completed!')
|
|
)
|
|
|
|
def create_sample_data(self):
|
|
"""Create sample incidents for testing"""
|
|
self.stdout.write('Creating sample incidents...')
|
|
|
|
sample_incidents = [
|
|
{
|
|
'title': 'Database Connection Timeout',
|
|
'description': 'Users are experiencing timeouts when trying to access the database. The issue started around 2 PM and affects all users.',
|
|
'free_text': 'Database is down, can\'t connect, getting timeout errors',
|
|
'severity': 'HIGH',
|
|
'affected_users': 150,
|
|
'business_impact': 'Critical business operations are affected. Users cannot access their data.',
|
|
},
|
|
{
|
|
'title': 'API Response Slow',
|
|
'description': 'The user service API is responding slowly, causing delays in user authentication and profile updates.',
|
|
'free_text': 'API is slow, taking forever to respond, users complaining',
|
|
'severity': 'MEDIUM',
|
|
'affected_users': 50,
|
|
'business_impact': 'User experience is degraded but core functionality still works.',
|
|
},
|
|
{
|
|
'title': 'Payment Gateway Error',
|
|
'description': 'Payment processing is failing with 500 errors. Customers cannot complete purchases.',
|
|
'free_text': 'Payment not working, getting errors, customers can\'t buy',
|
|
'severity': 'CRITICAL',
|
|
'affected_users': 200,
|
|
'business_impact': 'Revenue is directly impacted. Customers cannot make purchases.',
|
|
},
|
|
{
|
|
'title': 'Email Service Down',
|
|
'description': 'Email notifications are not being sent. Users are not receiving order confirmations and password reset emails.',
|
|
'free_text': 'Emails not sending, notifications broken, users not getting emails',
|
|
'severity': 'MEDIUM',
|
|
'affected_users': 75,
|
|
'business_impact': 'Communication with customers is disrupted.',
|
|
},
|
|
{
|
|
'title': 'Mobile App Crash',
|
|
'description': 'The mobile application is crashing on iOS devices when users try to view their order history.',
|
|
'free_text': 'App crashing on iPhone, can\'t see orders, keeps closing',
|
|
'severity': 'HIGH',
|
|
'affected_users': 100,
|
|
'business_impact': 'Mobile users cannot access their order information.',
|
|
},
|
|
{
|
|
'title': 'Database Connection Timeout',
|
|
'description': 'Users are experiencing timeouts when trying to access the database. The issue started around 3 PM and affects all users.',
|
|
'free_text': 'Database is down, can\'t connect, getting timeout errors',
|
|
'severity': 'HIGH',
|
|
'affected_users': 150,
|
|
'business_impact': 'Critical business operations are affected. Users cannot access their data.',
|
|
},
|
|
{
|
|
'title': 'Load Balancer Issue',
|
|
'description': 'The load balancer is not distributing traffic evenly, causing some servers to be overloaded.',
|
|
'free_text': 'Load balancer not working properly, servers overloaded',
|
|
'severity': 'HIGH',
|
|
'affected_users': 300,
|
|
'business_impact': 'System performance is degraded across multiple services.',
|
|
},
|
|
{
|
|
'title': 'Cache Miss Rate High',
|
|
'description': 'Redis cache is experiencing high miss rates, causing increased database load.',
|
|
'free_text': 'Cache not working, database overloaded, slow responses',
|
|
'severity': 'MEDIUM',
|
|
'affected_users': 0,
|
|
'business_impact': 'System performance is degraded but not directly visible to users.',
|
|
}
|
|
]
|
|
|
|
with transaction.atomic():
|
|
for incident_data in sample_incidents:
|
|
incident, created = Incident.objects.get_or_create(
|
|
title=incident_data['title'],
|
|
defaults=incident_data
|
|
)
|
|
if created:
|
|
self.stdout.write(f' Created incident: {incident.title}')
|
|
else:
|
|
self.stdout.write(f' Incident already exists: {incident.title}')
|
|
|
|
self.stdout.write(
|
|
self.style.SUCCESS(f'Created {len(sample_incidents)} sample incidents')
|
|
)
|
|
|
|
def create_sample_patterns(self):
|
|
"""Create sample patterns"""
|
|
self.stdout.write('Creating sample patterns...')
|
|
|
|
sample_patterns = [
|
|
{
|
|
'name': 'Database Connectivity Issues',
|
|
'pattern_type': 'RECURRING',
|
|
'description': 'Recurring database connection problems affecting multiple services',
|
|
'frequency': 'Weekly',
|
|
'affected_services': ['user-service', 'order-service', 'payment-service'],
|
|
'common_keywords': ['database', 'connection', 'timeout', 'error'],
|
|
'confidence_score': 0.85,
|
|
'is_active': True,
|
|
'is_resolved': False
|
|
},
|
|
{
|
|
'name': 'API Performance Degradation',
|
|
'pattern_type': 'TREND',
|
|
'description': 'Gradual degradation in API response times across services',
|
|
'frequency': 'Daily',
|
|
'affected_services': ['api-gateway', 'user-service', 'order-service'],
|
|
'common_keywords': ['slow', 'performance', 'latency', 'timeout'],
|
|
'confidence_score': 0.75,
|
|
'is_active': True,
|
|
'is_resolved': False
|
|
},
|
|
{
|
|
'name': 'Mobile App Crashes',
|
|
'pattern_type': 'RECURRING',
|
|
'description': 'Frequent crashes in mobile applications, particularly on iOS',
|
|
'frequency': 'Bi-weekly',
|
|
'affected_services': ['mobile-app', 'ios-app'],
|
|
'common_keywords': ['crash', 'mobile', 'ios', 'app'],
|
|
'confidence_score': 0.90,
|
|
'is_active': True,
|
|
'is_resolved': False
|
|
}
|
|
]
|
|
|
|
with transaction.atomic():
|
|
for pattern_data in sample_patterns:
|
|
pattern, created = IncidentPattern.objects.get_or_create(
|
|
name=pattern_data['name'],
|
|
defaults=pattern_data
|
|
)
|
|
if created:
|
|
self.stdout.write(f' Created pattern: {pattern.name}')
|
|
else:
|
|
self.stdout.write(f' Pattern already exists: {pattern.name}')
|
|
|
|
self.stdout.write(
|
|
self.style.SUCCESS(f'Created {len(sample_patterns)} sample patterns')
|
|
)
|
|
|
|
def run_ai_analysis(self):
|
|
"""Run AI analysis on existing incidents"""
|
|
self.stdout.write('Running AI analysis on existing incidents...')
|
|
|
|
try:
|
|
from incident_intelligence.tasks import batch_process_incidents_ai
|
|
|
|
# Get incidents that haven't been processed
|
|
unprocessed_incidents = Incident.objects.filter(ai_processed=False)
|
|
incident_ids = [str(incident.id) for incident in unprocessed_incidents]
|
|
|
|
if incident_ids:
|
|
batch_process_incidents_ai.delay(incident_ids)
|
|
self.stdout.write(
|
|
self.style.SUCCESS(f'Queued {len(incident_ids)} incidents for AI analysis')
|
|
)
|
|
else:
|
|
self.stdout.write('No unprocessed incidents found')
|
|
|
|
except Exception as e:
|
|
self.stdout.write(
|
|
self.style.ERROR(f'Failed to run AI analysis: {e}')
|
|
)
|