Files
ETB/ETB-API/automation_orchestration/management/commands/setup_automation.py
Iliyan Angelov 6b247e5b9f Updates
2025-09-19 11:58:53 +03:00

434 lines
17 KiB
Python

"""
Management command to set up automation & orchestration module
"""
from django.core.management.base import BaseCommand
from django.contrib.auth import get_user_model
from datetime import timedelta, datetime
from django.utils import timezone
from automation_orchestration.models import (
Runbook,
Integration,
ChatOpsIntegration,
AutoRemediation,
MaintenanceWindow,
WorkflowTemplate,
)
User = get_user_model()
class Command(BaseCommand):
help = 'Set up automation & orchestration module with sample data'
def add_arguments(self, parser):
parser.add_argument(
'--reset',
action='store_true',
help='Reset existing data before creating new data',
)
def handle(self, *args, **options):
if options['reset']:
self.stdout.write('Resetting existing automation data...')
self.reset_data()
self.stdout.write('Setting up automation & orchestration module...')
# Create sample runbooks
self.create_sample_runbooks()
# Create sample integrations
self.create_sample_integrations()
# Create sample ChatOps integrations
self.create_sample_chatops_integrations()
# Create sample auto-remediations
self.create_sample_auto_remediations()
# Create sample maintenance windows
self.create_sample_maintenance_windows()
# Create sample workflow templates
self.create_sample_workflow_templates()
self.stdout.write(
self.style.SUCCESS('Successfully set up automation & orchestration module!')
)
def reset_data(self):
"""Reset existing automation data"""
Runbook.objects.all().delete()
Integration.objects.all().delete()
ChatOpsIntegration.objects.all().delete()
AutoRemediation.objects.all().delete()
MaintenanceWindow.objects.all().delete()
WorkflowTemplate.objects.all().delete()
def create_sample_runbooks(self):
"""Create sample runbooks"""
self.stdout.write('Creating sample runbooks...')
# Get or create a superuser for sample data
admin_user = User.objects.filter(is_superuser=True).first()
if not admin_user:
admin_user = User.objects.create_superuser(
username='admin',
email='admin@example.com',
password='admin123'
)
# Sample runbook 1: Database restart
runbook1, created = Runbook.objects.get_or_create(
name='Database Service Restart',
defaults={
'description': 'Automated runbook for restarting database services',
'version': '1.0',
'trigger_type': 'AUTOMATIC',
'trigger_conditions': {
'severity': ['CRITICAL', 'EMERGENCY'],
'category': 'database',
'keywords': ['database', 'connection', 'timeout']
},
'steps': [
{
'name': 'Check database status',
'action': 'check_service_status',
'timeout': 30,
'parameters': {'service': 'postgresql'}
},
{
'name': 'Stop database service',
'action': 'stop_service',
'timeout': 60,
'parameters': {'service': 'postgresql'}
},
{
'name': 'Start database service',
'action': 'start_service',
'timeout': 120,
'parameters': {'service': 'postgresql'}
},
{
'name': 'Verify database connectivity',
'action': 'verify_connectivity',
'timeout': 30,
'parameters': {'host': 'localhost', 'port': 5432}
}
],
'estimated_duration': timedelta(minutes=5),
'category': 'database',
'tags': ['database', 'restart', 'automation'],
'status': 'ACTIVE',
'is_public': True,
'created_by': admin_user
}
)
# Sample runbook 2: Web server scaling
runbook2, created = Runbook.objects.get_or_create(
name='Web Server Scale Up',
defaults={
'description': 'Automated runbook for scaling up web servers',
'version': '1.0',
'trigger_type': 'AUTOMATIC',
'trigger_conditions': {
'severity': ['HIGH', 'CRITICAL'],
'category': 'performance',
'metrics': {'cpu_usage': '>80', 'response_time': '>2000'}
},
'steps': [
{
'name': 'Check current load',
'action': 'check_metrics',
'timeout': 30,
'parameters': {'metrics': ['cpu', 'memory', 'response_time']}
},
{
'name': 'Scale up instances',
'action': 'scale_instances',
'timeout': 300,
'parameters': {'count': 2, 'instance_type': 'web'}
},
{
'name': 'Update load balancer',
'action': 'update_load_balancer',
'timeout': 60,
'parameters': {'new_instances': True}
},
{
'name': 'Verify scaling',
'action': 'verify_scaling',
'timeout': 120,
'parameters': {'expected_instances': '+2'}
}
],
'estimated_duration': timedelta(minutes=10),
'category': 'scaling',
'tags': ['scaling', 'performance', 'web'],
'status': 'ACTIVE',
'is_public': True,
'created_by': admin_user
}
)
if created:
self.stdout.write(f' Created runbook: {runbook1.name}')
self.stdout.write(f' Created runbook: {runbook2.name}')
else:
self.stdout.write(' Sample runbooks already exist')
def create_sample_integrations(self):
"""Create sample integrations"""
self.stdout.write('Creating sample integrations...')
admin_user = User.objects.filter(is_superuser=True).first()
# Jira integration
jira_integration, created = Integration.objects.get_or_create(
name='Jira Production',
defaults={
'integration_type': 'JIRA',
'description': 'Jira integration for production environment',
'configuration': {
'base_url': 'https://company.atlassian.net',
'project_key': 'PROD',
'issue_type': 'Bug'
},
'authentication_config': {
'auth_type': 'basic',
'username': 'jira_user',
'api_token': 'encrypted_token_here'
},
'status': 'ACTIVE',
'health_status': 'HEALTHY',
'created_by': admin_user
}
)
# GitHub integration
github_integration, created = Integration.objects.get_or_create(
name='GitHub Main Repository',
defaults={
'integration_type': 'GITHUB',
'description': 'GitHub integration for main repository',
'configuration': {
'repository': 'company/main-repo',
'branch': 'main',
'webhook_secret': 'webhook_secret_here'
},
'authentication_config': {
'auth_type': 'token',
'access_token': 'encrypted_token_here'
},
'status': 'ACTIVE',
'health_status': 'HEALTHY',
'created_by': admin_user
}
)
if created:
self.stdout.write(f' Created integration: {jira_integration.name}')
self.stdout.write(f' Created integration: {github_integration.name}')
else:
self.stdout.write(' Sample integrations already exist')
def create_sample_chatops_integrations(self):
"""Create sample ChatOps integrations"""
self.stdout.write('Creating sample ChatOps integrations...')
admin_user = User.objects.filter(is_superuser=True).first()
# Slack integration
slack_integration, created = ChatOpsIntegration.objects.get_or_create(
name='Production Slack',
defaults={
'platform': 'SLACK',
'webhook_url': 'https://hooks.slack.com/services/T00000000/B00000000/XXXXXXXXXXXXXXXXXXXXXXXX',
'bot_token': 'xoxb-0000000000000-0000000000000-XXXXXXXXXXXXXXXXXXXXXXXX',
'channel_id': 'C0000000000',
'command_prefix': '!',
'available_commands': [
{
'name': 'incident',
'description': 'Create or manage incidents',
'usage': '!incident create "title" "description"'
},
{
'name': 'status',
'description': 'Check system status',
'usage': '!status [service]'
},
{
'name': 'runbook',
'description': 'Execute runbooks',
'usage': '!runbook execute <runbook_name>'
}
],
'allowed_users': ['U0000000000', 'U0000000001'],
'allowed_channels': ['C0000000000', 'C0000000001'],
'is_active': True,
'created_by': admin_user
}
)
if created:
self.stdout.write(f' Created ChatOps integration: {slack_integration.name}')
else:
self.stdout.write(' Sample ChatOps integrations already exist')
def create_sample_auto_remediations(self):
"""Create sample auto-remediations"""
self.stdout.write('Creating sample auto-remediations...')
admin_user = User.objects.filter(is_superuser=True).first()
# Service restart remediation
service_restart, created = AutoRemediation.objects.get_or_create(
name='Auto Restart Database Service',
defaults={
'description': 'Automatically restart database service when connection issues are detected',
'remediation_type': 'SERVICE_RESTART',
'trigger_conditions': {
'severity': ['CRITICAL', 'EMERGENCY'],
'category': 'database',
'error_patterns': ['connection timeout', 'connection refused', 'database unavailable']
},
'trigger_condition_type': 'CATEGORY',
'remediation_config': {
'service_name': 'postgresql',
'restart_command': 'systemctl restart postgresql',
'verify_command': 'systemctl is-active postgresql',
'max_restart_attempts': 3
},
'timeout_seconds': 300,
'requires_approval': False,
'max_executions_per_incident': 1,
'is_active': True,
'created_by': admin_user
}
)
# Deployment rollback remediation
rollback_remediation, created = AutoRemediation.objects.get_or_create(
name='Auto Rollback Failed Deployment',
defaults={
'description': 'Automatically rollback deployment when critical errors are detected',
'remediation_type': 'DEPLOYMENT_ROLLBACK',
'trigger_conditions': {
'severity': ['CRITICAL', 'EMERGENCY'],
'category': 'deployment',
'error_rate_threshold': 0.1,
'time_window_minutes': 5
},
'trigger_condition_type': 'SEVERITY',
'remediation_config': {
'rollback_to_version': 'previous',
'rollback_command': 'kubectl rollout undo deployment/web-app',
'verify_command': 'kubectl get pods -l app=web-app',
'notification_channels': ['slack', 'email']
},
'timeout_seconds': 600,
'requires_approval': True,
'max_executions_per_incident': 1,
'is_active': True,
'created_by': admin_user
}
)
if created:
self.stdout.write(f' Created auto-remediation: {service_restart.name}')
self.stdout.write(f' Created auto-remediation: {rollback_remediation.name}')
else:
self.stdout.write(' Sample auto-remediations already exist')
def create_sample_maintenance_windows(self):
"""Create sample maintenance windows"""
self.stdout.write('Creating sample maintenance windows...')
admin_user = User.objects.filter(is_superuser=True).first()
# Weekly maintenance window
weekly_maintenance, created = MaintenanceWindow.objects.get_or_create(
name='Weekly System Maintenance',
defaults={
'description': 'Weekly maintenance window for system updates and patches',
'start_time': timezone.now() + timedelta(days=1),
'end_time': timezone.now() + timedelta(days=1, hours=2),
'timezone': 'UTC',
'affected_services': ['web-app', 'api-service', 'database'],
'affected_components': ['load-balancer', 'cache', 'monitoring'],
'suppress_incident_creation': True,
'suppress_notifications': True,
'suppress_escalations': True,
'status': 'SCHEDULED',
'created_by': admin_user
}
)
if created:
self.stdout.write(f' Created maintenance window: {weekly_maintenance.name}')
else:
self.stdout.write(' Sample maintenance windows already exist')
def create_sample_workflow_templates(self):
"""Create sample workflow templates"""
self.stdout.write('Creating sample workflow templates...')
admin_user = User.objects.filter(is_superuser=True).first()
# Incident response workflow
incident_workflow, created = WorkflowTemplate.objects.get_or_create(
name='Standard Incident Response',
defaults={
'description': 'Standard workflow for incident response and resolution',
'template_type': 'INCIDENT_RESPONSE',
'workflow_steps': [
{
'name': 'Initial Assessment',
'action': 'assess_incident',
'conditions': {'severity': ['HIGH', 'CRITICAL', 'EMERGENCY']},
'timeout': 300
},
{
'name': 'Notify Stakeholders',
'action': 'notify_stakeholders',
'conditions': {'auto_notify': True},
'timeout': 60
},
{
'name': 'Execute Runbook',
'action': 'execute_runbook',
'conditions': {'has_runbook': True},
'timeout': 1800
},
{
'name': 'Update Status',
'action': 'update_incident_status',
'conditions': {'always': True},
'timeout': 30
}
],
'input_parameters': [
{'name': 'incident_id', 'type': 'string', 'required': True},
{'name': 'severity', 'type': 'string', 'required': True},
{'name': 'category', 'type': 'string', 'required': False}
],
'output_schema': {
'type': 'object',
'properties': {
'status': {'type': 'string'},
'resolution_time': {'type': 'string'},
'actions_taken': {'type': 'array'}
}
},
'is_public': True,
'created_by': admin_user
}
)
if created:
self.stdout.write(f' Created workflow template: {incident_workflow.name}')
else:
self.stdout.write(' Sample workflow templates already exist')