ETB/ETB-API/monitoring/services/alerting.py

"""
Alerting service for monitoring system
"""
import logging
from typing import Dict, Any, List, Optional
from datetime import datetime, timedelta
from django.utils import timezone
from django.core.mail import send_mail
from django.conf import settings
from django.contrib.auth import get_user_model

from monitoring.models import AlertRule, Alert, SystemMetric, MetricMeasurement, MonitoringTarget

User = get_user_model()
logger = logging.getLogger(__name__)


class AlertEvaluator:
    """Service for evaluating alert conditions"""

    def __init__(self):
        self.aggregator = None  # Will be imported to avoid circular imports

    def evaluate_alert_rules(self) -> List[Dict[str, Any]]:
        """Evaluate all active alert rules"""
        triggered_alerts = []

        active_rules = AlertRule.objects.filter(
            status='ACTIVE',
            is_enabled=True
        )

        for rule in active_rules:
            try:
                if self._evaluate_rule(rule):
                    alert_data = self._create_alert(rule)
                    triggered_alerts.append(alert_data)
            except Exception as e:
                logger.error(f"Failed to evaluate alert rule {rule.name}: {e}")

        return triggered_alerts

    def _evaluate_rule(self, rule: AlertRule) -> bool:
        """Evaluate if an alert rule condition is met"""
        condition = rule.condition
        condition_type = condition.get('type')

        if condition_type == 'THRESHOLD':
            return self._evaluate_threshold_condition(rule, condition)
        elif condition_type == 'ANOMALY':
            return self._evaluate_anomaly_condition(rule, condition)
        elif condition_type == 'AVAILABILITY':
            return self._evaluate_availability_condition(rule, condition)
        elif condition_type == 'PATTERN':
            return self._evaluate_pattern_condition(rule, condition)
        else:
            logger.warning(f"Unknown condition type: {condition_type}")
            return False

    def _evaluate_threshold_condition(self, rule: AlertRule, condition: Dict[str, Any]) -> bool:
        """Evaluate threshold-based alert conditions"""
        if not rule.metric:
            return False

        # Get latest metric value
        latest_measurement = MetricMeasurement.objects.filter(
            metric=rule.metric
        ).order_by('-timestamp').first()

        if not latest_measurement:
            return False

        current_value = float(latest_measurement.value)
        threshold_value = condition.get('threshold')
        operator = condition.get('operator', '>')

        if operator == '>':
            return current_value > threshold_value
        elif operator == '>=':
            return current_value >= threshold_value
        elif operator == '<':
            return current_value < threshold_value
        elif operator == '<=':
            return current_value <= threshold_value
        elif operator == '==':
            return current_value == threshold_value
        elif operator == '!=':
            return current_value != threshold_value
        else:
            logger.warning(f"Unknown operator: {operator}")
            return False

    def _evaluate_anomaly_condition(self, rule: AlertRule, condition: Dict[str, Any]) -> bool:
        """Evaluate anomaly-based alert conditions"""
        # This would integrate with anomaly detection models
        # For now, implement a simple statistical anomaly detection

        if not rule.metric:
            return False

        # Get recent measurements
        since = timezone.now() - timedelta(hours=24)
        measurements = MetricMeasurement.objects.filter(
            metric=rule.metric,
            timestamp__gte=since
        ).order_by('-timestamp')[:100]  # Last 100 measurements

        if len(measurements) < 10:  # Need minimum data points
            return False

        values = [float(m.value) for m in measurements]

        # Calculate mean and standard deviation
        mean = sum(values) / len(values)
        variance = sum((x - mean) ** 2 for x in values) / len(values)
        std_dev = variance ** 0.5

        # Check if latest value is an anomaly (more than 2 standard deviations)
        latest_value = values[0]
        anomaly_threshold = condition.get('threshold', 2.0)  # Default 2 sigma

        return abs(latest_value - mean) > (anomaly_threshold * std_dev)

    def _evaluate_availability_condition(self, rule: AlertRule, condition: Dict[str, Any]) -> bool:
        """Evaluate availability-based alert conditions"""
        if not rule.target:
            return False

        # Check if target is in critical state
        return rule.target.last_status == 'CRITICAL'

    def _evaluate_pattern_condition(self, rule: AlertRule, condition: Dict[str, Any]) -> bool:
        """Evaluate pattern-based alert conditions"""
        # This would integrate with pattern detection algorithms
        # For now, return False as placeholder
        return False

    def _create_alert(self, rule: AlertRule) -> Dict[str, Any]:
        """Create an alert instance"""
        # Get current value for context
        current_value = None
        threshold_value = None

        if rule.metric:
            latest_measurement = MetricMeasurement.objects.filter(
                metric=rule.metric
            ).order_by('-timestamp').first()
            if latest_measurement:
                current_value = float(latest_measurement.value)
                threshold_value = rule.metric.critical_threshold

        # Create alert
        alert = Alert.objects.create(
            rule=rule,
            title=f"{rule.name} - {rule.severity}",
            description=self._generate_alert_description(rule, current_value, threshold_value),
            severity=rule.severity,
            triggered_value=current_value,
            threshold_value=threshold_value,
            context_data={
                'rule_id': str(rule.id),
                'metric_name': rule.metric.name if rule.metric else None,
                'target_name': rule.target.name if rule.target else None,
                'condition': rule.condition
            }
        )

        return {
            'alert_id': str(alert.id),
            'rule_name': rule.name,
            'severity': rule.severity,
            'title': alert.title,
            'description': alert.description,
            'current_value': current_value,
            'threshold_value': threshold_value
        }

    def _generate_alert_description(self, rule: AlertRule, current_value: Optional[float], threshold_value: Optional[float]) -> str:
        """Generate alert description"""
        description = f"Alert rule '{rule.name}' has been triggered.\n"

        if rule.metric and current_value is not None:
            description += f"Current value: {current_value} {rule.metric.unit}\n"

        if threshold_value is not None:
            description += f"Threshold: {threshold_value} {rule.metric.unit if rule.metric else ''}\n"

        if rule.target:
            description += f"Target: {rule.target.name}\n"

        description += f"Severity: {rule.severity}\n"
        description += f"Time: {timezone.now().strftime('%Y-%m-%d %H:%M:%S')}"

        return description


class NotificationService:
    """Service for sending alert notifications"""

    def __init__(self):
        self.evaluator = AlertEvaluator()

    def send_alert_notifications(self, alert_data: Dict[str, Any]) -> Dict[str, Any]:
        """Send notifications for an alert"""
        results = {}

        # Get alert rule to determine notification channels
        rule_id = alert_data.get('rule_id')
        if not rule_id:
            return {'error': 'No rule ID provided'}

        try:
            rule = AlertRule.objects.get(id=rule_id)
        except AlertRule.DoesNotExist:
            return {'error': 'Alert rule not found'}

        notification_channels = rule.notification_channels or []

        for channel in notification_channels:
            try:
                if channel['type'] == 'EMAIL':
                    result = self._send_email_notification(alert_data, channel)
                elif channel['type'] == 'SLACK':
                    result = self._send_slack_notification(alert_data, channel)
                elif channel['type'] == 'WEBHOOK':
                    result = self._send_webhook_notification(alert_data, channel)
                else:
                    result = {'error': f'Unknown notification channel type: {channel["type"]}'}

                results[channel['type']] = result

            except Exception as e:
                logger.error(f"Failed to send {channel['type']} notification: {e}")
                results[channel['type']] = {'error': str(e)}

        return results

    def _send_email_notification(self, alert_data: Dict[str, Any], channel: Dict[str, Any]) -> Dict[str, Any]:
        """Send email notification"""
        try:
            recipients = channel.get('recipients', [])
            if not recipients:
                return {'error': 'No email recipients configured'}

            subject = f"[{alert_data.get('severity', 'ALERT')}] {alert_data.get('title', 'System Alert')}"
            message = alert_data.get('description', '')

            send_mail(
                subject=subject,
                message=message,
                from_email=settings.DEFAULT_FROM_EMAIL,
                recipient_list=recipients,
                fail_silently=False
            )

            return {'status': 'sent', 'recipients': recipients}

        except Exception as e:
            return {'error': str(e)}

    def _send_slack_notification(self, alert_data: Dict[str, Any], channel: Dict[str, Any]) -> Dict[str, Any]:
        """Send Slack notification"""
        try:
            webhook_url = channel.get('webhook_url')
            if not webhook_url:
                return {'error': 'No Slack webhook URL configured'}

            # Create Slack message
            color = self._get_slack_color(alert_data.get('severity', 'MEDIUM'))

            slack_message = {
                "text": alert_data.get('title', 'System Alert'),
                "attachments": [
                    {
                        "color": color,
                        "fields": [
                            {
                                "title": "Description",
                                "value": alert_data.get('description', ''),
                                "short": False
                            },
                            {
                                "title": "Severity",
                                "value": alert_data.get('severity', 'UNKNOWN'),
                                "short": True
                            },
                            {
                                "title": "Time",
                                "value": timezone.now().strftime('%Y-%m-%d %H:%M:%S'),
                                "short": True
                            }
                        ]
                    }
                ]
            }

            # Send to Slack (would use requests in real implementation)
            # requests.post(webhook_url, json=slack_message)

            return {'status': 'sent', 'channel': channel.get('channel', '#alerts')}

        except Exception as e:
            return {'error': str(e)}

    def _send_webhook_notification(self, alert_data: Dict[str, Any], channel: Dict[str, Any]) -> Dict[str, Any]:
        """Send webhook notification"""
        try:
            webhook_url = channel.get('url')
            if not webhook_url:
                return {'error': 'No webhook URL configured'}

            # Prepare webhook payload
            payload = {
                'alert': alert_data,
                'timestamp': timezone.now().isoformat(),
                'source': 'ETB-API-Monitoring'
            }

            # Send webhook (would use requests in real implementation)
            # requests.post(webhook_url, json=payload)

            return {'status': 'sent', 'url': webhook_url}

        except Exception as e:
            return {'error': str(e)}

    def _get_slack_color(self, severity: str) -> str:
        """Get Slack color based on severity"""
        color_map = {
            'LOW': 'good',
            'MEDIUM': 'warning',
            'HIGH': 'danger',
            'CRITICAL': 'danger'
        }
        return color_map.get(severity, 'warning')


class AlertingService:
    """Main alerting service that coordinates alert evaluation and notification"""

    def __init__(self):
        self.evaluator = AlertEvaluator()
        self.notification_service = NotificationService()

    def run_alert_evaluation(self) -> Dict[str, Any]:
        """Run alert evaluation and send notifications"""
        results = {
            'evaluated_rules': 0,
            'triggered_alerts': 0,
            'notifications_sent': 0,
            'errors': []
        }

        try:
            # Evaluate all alert rules
            triggered_alerts = self.evaluator.evaluate_alert_rules()
            results['triggered_alerts'] = len(triggered_alerts)

            # Send notifications for triggered alerts
            for alert_data in triggered_alerts:
                try:
                    notification_results = self.notification_service.send_alert_notifications(alert_data)
                    results['notifications_sent'] += 1
                except Exception as e:
                    logger.error(f"Failed to send notifications for alert {alert_data.get('alert_id')}: {e}")
                    results['errors'].append(str(e))

            # Count evaluated rules
            results['evaluated_rules'] = AlertRule.objects.filter(
                status='ACTIVE',
                is_enabled=True
            ).count()

        except Exception as e:
            logger.error(f"Alert evaluation failed: {e}")
            results['errors'].append(str(e))

        return results

    def acknowledge_alert(self, alert_id: str, user: User) -> Dict[str, Any]:
        """Acknowledge an alert"""
        try:
            alert = Alert.objects.get(id=alert_id)
            alert.status = 'ACKNOWLEDGED'
            alert.acknowledged_by = user
            alert.acknowledged_at = timezone.now()
            alert.save()

            return {
                'status': 'success',
                'message': f'Alert {alert_id} acknowledged by {user.username}'
            }

        except Alert.DoesNotExist:
            return {
                'status': 'error',
                'message': f'Alert {alert_id} not found'
            }
        except Exception as e:
            return {
                'status': 'error',
                'message': str(e)
            }

    def resolve_alert(self, alert_id: str, user: User) -> Dict[str, Any]:
        """Resolve an alert"""
        try:
            alert = Alert.objects.get(id=alert_id)
            alert.status = 'RESOLVED'
            alert.resolved_by = user
            alert.resolved_at = timezone.now()
            alert.save()

            return {
                'status': 'success',
                'message': f'Alert {alert_id} resolved by {user.username}'
            }

        except Alert.DoesNotExist:
            return {
                'status': 'error',
                'message': f'Alert {alert_id} not found'
            }
        except Exception as e:
            return {
                'status': 'error',
                'message': str(e)
            }

    def get_active_alerts(self, severity: Optional[str] = None) -> List[Dict[str, Any]]:
        """Get active alerts"""
        alerts = Alert.objects.filter(status='TRIGGERED')

        if severity:
            alerts = alerts.filter(severity=severity)

        return [
            {
                'id': str(alert.id),
                'title': alert.title,
                'description': alert.description,
                'severity': alert.severity,
                'triggered_at': alert.triggered_at,
                'rule_name': alert.rule.name,
                'current_value': float(alert.triggered_value) if alert.triggered_value else None,
                'threshold_value': float(alert.threshold_value) if alert.threshold_value else None
            }
            for alert in alerts.order_by('-triggered_at')
        ]