434 lines
17 KiB
Python
434 lines
17 KiB
Python
"""
|
|
Management command to set up automation & orchestration module
|
|
"""
|
|
from django.core.management.base import BaseCommand
|
|
from django.contrib.auth import get_user_model
|
|
from datetime import timedelta, datetime
|
|
from django.utils import timezone
|
|
from automation_orchestration.models import (
|
|
Runbook,
|
|
Integration,
|
|
ChatOpsIntegration,
|
|
AutoRemediation,
|
|
MaintenanceWindow,
|
|
WorkflowTemplate,
|
|
)
|
|
|
|
User = get_user_model()
|
|
|
|
|
|
class Command(BaseCommand):
|
|
help = 'Set up automation & orchestration module with sample data'
|
|
|
|
def add_arguments(self, parser):
|
|
parser.add_argument(
|
|
'--reset',
|
|
action='store_true',
|
|
help='Reset existing data before creating new data',
|
|
)
|
|
|
|
def handle(self, *args, **options):
|
|
if options['reset']:
|
|
self.stdout.write('Resetting existing automation data...')
|
|
self.reset_data()
|
|
|
|
self.stdout.write('Setting up automation & orchestration module...')
|
|
|
|
# Create sample runbooks
|
|
self.create_sample_runbooks()
|
|
|
|
# Create sample integrations
|
|
self.create_sample_integrations()
|
|
|
|
# Create sample ChatOps integrations
|
|
self.create_sample_chatops_integrations()
|
|
|
|
# Create sample auto-remediations
|
|
self.create_sample_auto_remediations()
|
|
|
|
# Create sample maintenance windows
|
|
self.create_sample_maintenance_windows()
|
|
|
|
# Create sample workflow templates
|
|
self.create_sample_workflow_templates()
|
|
|
|
self.stdout.write(
|
|
self.style.SUCCESS('Successfully set up automation & orchestration module!')
|
|
)
|
|
|
|
def reset_data(self):
|
|
"""Reset existing automation data"""
|
|
Runbook.objects.all().delete()
|
|
Integration.objects.all().delete()
|
|
ChatOpsIntegration.objects.all().delete()
|
|
AutoRemediation.objects.all().delete()
|
|
MaintenanceWindow.objects.all().delete()
|
|
WorkflowTemplate.objects.all().delete()
|
|
|
|
def create_sample_runbooks(self):
|
|
"""Create sample runbooks"""
|
|
self.stdout.write('Creating sample runbooks...')
|
|
|
|
# Get or create a superuser for sample data
|
|
admin_user = User.objects.filter(is_superuser=True).first()
|
|
if not admin_user:
|
|
admin_user = User.objects.create_superuser(
|
|
username='admin',
|
|
email='admin@example.com',
|
|
password='admin123'
|
|
)
|
|
|
|
# Sample runbook 1: Database restart
|
|
runbook1, created = Runbook.objects.get_or_create(
|
|
name='Database Service Restart',
|
|
defaults={
|
|
'description': 'Automated runbook for restarting database services',
|
|
'version': '1.0',
|
|
'trigger_type': 'AUTOMATIC',
|
|
'trigger_conditions': {
|
|
'severity': ['CRITICAL', 'EMERGENCY'],
|
|
'category': 'database',
|
|
'keywords': ['database', 'connection', 'timeout']
|
|
},
|
|
'steps': [
|
|
{
|
|
'name': 'Check database status',
|
|
'action': 'check_service_status',
|
|
'timeout': 30,
|
|
'parameters': {'service': 'postgresql'}
|
|
},
|
|
{
|
|
'name': 'Stop database service',
|
|
'action': 'stop_service',
|
|
'timeout': 60,
|
|
'parameters': {'service': 'postgresql'}
|
|
},
|
|
{
|
|
'name': 'Start database service',
|
|
'action': 'start_service',
|
|
'timeout': 120,
|
|
'parameters': {'service': 'postgresql'}
|
|
},
|
|
{
|
|
'name': 'Verify database connectivity',
|
|
'action': 'verify_connectivity',
|
|
'timeout': 30,
|
|
'parameters': {'host': 'localhost', 'port': 5432}
|
|
}
|
|
],
|
|
'estimated_duration': timedelta(minutes=5),
|
|
'category': 'database',
|
|
'tags': ['database', 'restart', 'automation'],
|
|
'status': 'ACTIVE',
|
|
'is_public': True,
|
|
'created_by': admin_user
|
|
}
|
|
)
|
|
|
|
# Sample runbook 2: Web server scaling
|
|
runbook2, created = Runbook.objects.get_or_create(
|
|
name='Web Server Scale Up',
|
|
defaults={
|
|
'description': 'Automated runbook for scaling up web servers',
|
|
'version': '1.0',
|
|
'trigger_type': 'AUTOMATIC',
|
|
'trigger_conditions': {
|
|
'severity': ['HIGH', 'CRITICAL'],
|
|
'category': 'performance',
|
|
'metrics': {'cpu_usage': '>80', 'response_time': '>2000'}
|
|
},
|
|
'steps': [
|
|
{
|
|
'name': 'Check current load',
|
|
'action': 'check_metrics',
|
|
'timeout': 30,
|
|
'parameters': {'metrics': ['cpu', 'memory', 'response_time']}
|
|
},
|
|
{
|
|
'name': 'Scale up instances',
|
|
'action': 'scale_instances',
|
|
'timeout': 300,
|
|
'parameters': {'count': 2, 'instance_type': 'web'}
|
|
},
|
|
{
|
|
'name': 'Update load balancer',
|
|
'action': 'update_load_balancer',
|
|
'timeout': 60,
|
|
'parameters': {'new_instances': True}
|
|
},
|
|
{
|
|
'name': 'Verify scaling',
|
|
'action': 'verify_scaling',
|
|
'timeout': 120,
|
|
'parameters': {'expected_instances': '+2'}
|
|
}
|
|
],
|
|
'estimated_duration': timedelta(minutes=10),
|
|
'category': 'scaling',
|
|
'tags': ['scaling', 'performance', 'web'],
|
|
'status': 'ACTIVE',
|
|
'is_public': True,
|
|
'created_by': admin_user
|
|
}
|
|
)
|
|
|
|
if created:
|
|
self.stdout.write(f' Created runbook: {runbook1.name}')
|
|
self.stdout.write(f' Created runbook: {runbook2.name}')
|
|
else:
|
|
self.stdout.write(' Sample runbooks already exist')
|
|
|
|
def create_sample_integrations(self):
|
|
"""Create sample integrations"""
|
|
self.stdout.write('Creating sample integrations...')
|
|
|
|
admin_user = User.objects.filter(is_superuser=True).first()
|
|
|
|
# Jira integration
|
|
jira_integration, created = Integration.objects.get_or_create(
|
|
name='Jira Production',
|
|
defaults={
|
|
'integration_type': 'JIRA',
|
|
'description': 'Jira integration for production environment',
|
|
'configuration': {
|
|
'base_url': 'https://company.atlassian.net',
|
|
'project_key': 'PROD',
|
|
'issue_type': 'Bug'
|
|
},
|
|
'authentication_config': {
|
|
'auth_type': 'basic',
|
|
'username': 'jira_user',
|
|
'api_token': 'encrypted_token_here'
|
|
},
|
|
'status': 'ACTIVE',
|
|
'health_status': 'HEALTHY',
|
|
'created_by': admin_user
|
|
}
|
|
)
|
|
|
|
# GitHub integration
|
|
github_integration, created = Integration.objects.get_or_create(
|
|
name='GitHub Main Repository',
|
|
defaults={
|
|
'integration_type': 'GITHUB',
|
|
'description': 'GitHub integration for main repository',
|
|
'configuration': {
|
|
'repository': 'company/main-repo',
|
|
'branch': 'main',
|
|
'webhook_secret': 'webhook_secret_here'
|
|
},
|
|
'authentication_config': {
|
|
'auth_type': 'token',
|
|
'access_token': 'encrypted_token_here'
|
|
},
|
|
'status': 'ACTIVE',
|
|
'health_status': 'HEALTHY',
|
|
'created_by': admin_user
|
|
}
|
|
)
|
|
|
|
if created:
|
|
self.stdout.write(f' Created integration: {jira_integration.name}')
|
|
self.stdout.write(f' Created integration: {github_integration.name}')
|
|
else:
|
|
self.stdout.write(' Sample integrations already exist')
|
|
|
|
def create_sample_chatops_integrations(self):
|
|
"""Create sample ChatOps integrations"""
|
|
self.stdout.write('Creating sample ChatOps integrations...')
|
|
|
|
admin_user = User.objects.filter(is_superuser=True).first()
|
|
|
|
# Slack integration
|
|
slack_integration, created = ChatOpsIntegration.objects.get_or_create(
|
|
name='Production Slack',
|
|
defaults={
|
|
'platform': 'SLACK',
|
|
'webhook_url': 'https://hooks.slack.com/services/T00000000/B00000000/XXXXXXXXXXXXXXXXXXXXXXXX',
|
|
'bot_token': 'xoxb-0000000000000-0000000000000-XXXXXXXXXXXXXXXXXXXXXXXX',
|
|
'channel_id': 'C0000000000',
|
|
'command_prefix': '!',
|
|
'available_commands': [
|
|
{
|
|
'name': 'incident',
|
|
'description': 'Create or manage incidents',
|
|
'usage': '!incident create "title" "description"'
|
|
},
|
|
{
|
|
'name': 'status',
|
|
'description': 'Check system status',
|
|
'usage': '!status [service]'
|
|
},
|
|
{
|
|
'name': 'runbook',
|
|
'description': 'Execute runbooks',
|
|
'usage': '!runbook execute <runbook_name>'
|
|
}
|
|
],
|
|
'allowed_users': ['U0000000000', 'U0000000001'],
|
|
'allowed_channels': ['C0000000000', 'C0000000001'],
|
|
'is_active': True,
|
|
'created_by': admin_user
|
|
}
|
|
)
|
|
|
|
if created:
|
|
self.stdout.write(f' Created ChatOps integration: {slack_integration.name}')
|
|
else:
|
|
self.stdout.write(' Sample ChatOps integrations already exist')
|
|
|
|
def create_sample_auto_remediations(self):
|
|
"""Create sample auto-remediations"""
|
|
self.stdout.write('Creating sample auto-remediations...')
|
|
|
|
admin_user = User.objects.filter(is_superuser=True).first()
|
|
|
|
# Service restart remediation
|
|
service_restart, created = AutoRemediation.objects.get_or_create(
|
|
name='Auto Restart Database Service',
|
|
defaults={
|
|
'description': 'Automatically restart database service when connection issues are detected',
|
|
'remediation_type': 'SERVICE_RESTART',
|
|
'trigger_conditions': {
|
|
'severity': ['CRITICAL', 'EMERGENCY'],
|
|
'category': 'database',
|
|
'error_patterns': ['connection timeout', 'connection refused', 'database unavailable']
|
|
},
|
|
'trigger_condition_type': 'CATEGORY',
|
|
'remediation_config': {
|
|
'service_name': 'postgresql',
|
|
'restart_command': 'systemctl restart postgresql',
|
|
'verify_command': 'systemctl is-active postgresql',
|
|
'max_restart_attempts': 3
|
|
},
|
|
'timeout_seconds': 300,
|
|
'requires_approval': False,
|
|
'max_executions_per_incident': 1,
|
|
'is_active': True,
|
|
'created_by': admin_user
|
|
}
|
|
)
|
|
|
|
# Deployment rollback remediation
|
|
rollback_remediation, created = AutoRemediation.objects.get_or_create(
|
|
name='Auto Rollback Failed Deployment',
|
|
defaults={
|
|
'description': 'Automatically rollback deployment when critical errors are detected',
|
|
'remediation_type': 'DEPLOYMENT_ROLLBACK',
|
|
'trigger_conditions': {
|
|
'severity': ['CRITICAL', 'EMERGENCY'],
|
|
'category': 'deployment',
|
|
'error_rate_threshold': 0.1,
|
|
'time_window_minutes': 5
|
|
},
|
|
'trigger_condition_type': 'SEVERITY',
|
|
'remediation_config': {
|
|
'rollback_to_version': 'previous',
|
|
'rollback_command': 'kubectl rollout undo deployment/web-app',
|
|
'verify_command': 'kubectl get pods -l app=web-app',
|
|
'notification_channels': ['slack', 'email']
|
|
},
|
|
'timeout_seconds': 600,
|
|
'requires_approval': True,
|
|
'max_executions_per_incident': 1,
|
|
'is_active': True,
|
|
'created_by': admin_user
|
|
}
|
|
)
|
|
|
|
if created:
|
|
self.stdout.write(f' Created auto-remediation: {service_restart.name}')
|
|
self.stdout.write(f' Created auto-remediation: {rollback_remediation.name}')
|
|
else:
|
|
self.stdout.write(' Sample auto-remediations already exist')
|
|
|
|
def create_sample_maintenance_windows(self):
|
|
"""Create sample maintenance windows"""
|
|
self.stdout.write('Creating sample maintenance windows...')
|
|
|
|
admin_user = User.objects.filter(is_superuser=True).first()
|
|
|
|
# Weekly maintenance window
|
|
weekly_maintenance, created = MaintenanceWindow.objects.get_or_create(
|
|
name='Weekly System Maintenance',
|
|
defaults={
|
|
'description': 'Weekly maintenance window for system updates and patches',
|
|
'start_time': timezone.now() + timedelta(days=1),
|
|
'end_time': timezone.now() + timedelta(days=1, hours=2),
|
|
'timezone': 'UTC',
|
|
'affected_services': ['web-app', 'api-service', 'database'],
|
|
'affected_components': ['load-balancer', 'cache', 'monitoring'],
|
|
'suppress_incident_creation': True,
|
|
'suppress_notifications': True,
|
|
'suppress_escalations': True,
|
|
'status': 'SCHEDULED',
|
|
'created_by': admin_user
|
|
}
|
|
)
|
|
|
|
if created:
|
|
self.stdout.write(f' Created maintenance window: {weekly_maintenance.name}')
|
|
else:
|
|
self.stdout.write(' Sample maintenance windows already exist')
|
|
|
|
def create_sample_workflow_templates(self):
|
|
"""Create sample workflow templates"""
|
|
self.stdout.write('Creating sample workflow templates...')
|
|
|
|
admin_user = User.objects.filter(is_superuser=True).first()
|
|
|
|
# Incident response workflow
|
|
incident_workflow, created = WorkflowTemplate.objects.get_or_create(
|
|
name='Standard Incident Response',
|
|
defaults={
|
|
'description': 'Standard workflow for incident response and resolution',
|
|
'template_type': 'INCIDENT_RESPONSE',
|
|
'workflow_steps': [
|
|
{
|
|
'name': 'Initial Assessment',
|
|
'action': 'assess_incident',
|
|
'conditions': {'severity': ['HIGH', 'CRITICAL', 'EMERGENCY']},
|
|
'timeout': 300
|
|
},
|
|
{
|
|
'name': 'Notify Stakeholders',
|
|
'action': 'notify_stakeholders',
|
|
'conditions': {'auto_notify': True},
|
|
'timeout': 60
|
|
},
|
|
{
|
|
'name': 'Execute Runbook',
|
|
'action': 'execute_runbook',
|
|
'conditions': {'has_runbook': True},
|
|
'timeout': 1800
|
|
},
|
|
{
|
|
'name': 'Update Status',
|
|
'action': 'update_incident_status',
|
|
'conditions': {'always': True},
|
|
'timeout': 30
|
|
}
|
|
],
|
|
'input_parameters': [
|
|
{'name': 'incident_id', 'type': 'string', 'required': True},
|
|
{'name': 'severity', 'type': 'string', 'required': True},
|
|
{'name': 'category', 'type': 'string', 'required': False}
|
|
],
|
|
'output_schema': {
|
|
'type': 'object',
|
|
'properties': {
|
|
'status': {'type': 'string'},
|
|
'resolution_time': {'type': 'string'},
|
|
'actions_taken': {'type': 'array'}
|
|
}
|
|
},
|
|
'is_public': True,
|
|
'created_by': admin_user
|
|
}
|
|
)
|
|
|
|
if created:
|
|
self.stdout.write(f' Created workflow template: {incident_workflow.name}')
|
|
else:
|
|
self.stdout.write(' Sample workflow templates already exist')
|