ETB/ETB-API/incident_intelligence/ai/correlation.py

"""
Correlation engine for linking related incidents and problem detection
"""
import time
from typing import Dict, List, Tuple, Optional
from dataclasses import dataclass
from datetime import datetime, timedelta
from django.utils import timezone
from .classification import IncidentClassifier


@dataclass
class CorrelationResult:
    """Result of incident correlation analysis"""
    correlation_type: str
    confidence_score: float
    correlation_strength: str
    shared_keywords: List[str]
    time_difference: timedelta
    similarity_score: float
    is_problem_indicator: bool
    problem_description: Optional[str]


class IncidentCorrelationEngine:
    """
    AI-driven correlation engine for linking related incidents
    """

    def __init__(self):
        self.model_version = "v1.0"
        self.classifier = IncidentClassifier()

        # Correlation thresholds
        self.correlation_thresholds = {
            'VERY_STRONG': 0.9,
            'STRONG': 0.7,
            'MODERATE': 0.5,
            'WEAK': 0.3
        }

        # Problem detection patterns
        self.problem_patterns = {
            'CASCADE_FAILURE': {
                'keywords': ['cascade', 'chain', 'reaction', 'domino', 'ripple', 'effect'],
                'time_window': timedelta(hours=2),
                'min_incidents': 3
            },
            'RECURRING_ISSUE': {
                'keywords': ['same', 'again', 'recurring', 'repeated', 'similar', 'identical'],
                'time_window': timedelta(days=7),
                'min_incidents': 2
            },
            'SERVICE_DEPENDENCY': {
                'keywords': ['dependency', 'dependent', 'downstream', 'upstream', 'service', 'api'],
                'time_window': timedelta(hours=1),
                'min_incidents': 2
            },
            'INFRASTRUCTURE_PATTERN': {
                'keywords': ['server', 'database', 'network', 'storage', 'infrastructure'],
                'time_window': timedelta(hours=4),
                'min_incidents': 3
            }
        }

    def correlate_incidents(self, incident_a: Dict, incident_b: Dict) -> Optional[CorrelationResult]:
        """
        Correlate two incidents and determine if they are related
        """
        # Calculate various similarity metrics
        text_similarity = self._calculate_text_similarity(incident_a, incident_b)
        temporal_similarity = self._calculate_temporal_similarity(incident_a, incident_b)
        service_similarity = self._calculate_service_similarity(incident_a, incident_b)
        category_similarity = self._calculate_category_similarity(incident_a, incident_b)

        # Calculate overall similarity score
        overall_similarity = (
            text_similarity * 0.4 +
            temporal_similarity * 0.2 +
            service_similarity * 0.2 +
            category_similarity * 0.2
        )

        # Determine if incidents are correlated
        if overall_similarity < 0.3:
            return None

        # Determine correlation type
        correlation_type = self._determine_correlation_type(
            incident_a, incident_b, text_similarity, temporal_similarity, service_similarity
        )

        # Calculate confidence score
        confidence_score = self._calculate_confidence_score(
            overall_similarity, correlation_type, incident_a, incident_b
        )

        # Determine correlation strength
        correlation_strength = self._determine_correlation_strength(confidence_score)

        # Extract shared keywords
        shared_keywords = self._extract_shared_keywords(incident_a, incident_b)

        # Calculate time difference
        time_diff = self._calculate_time_difference(incident_a, incident_b)

        # Check for problem indicators
        is_problem_indicator, problem_description = self._detect_problem_patterns(
            incident_a, incident_b, correlation_type, confidence_score
        )

        return CorrelationResult(
            correlation_type=correlation_type,
            confidence_score=confidence_score,
            correlation_strength=correlation_strength,
            shared_keywords=shared_keywords,
            time_difference=time_diff,
            similarity_score=overall_similarity,
            is_problem_indicator=is_problem_indicator,
            problem_description=problem_description
        )

    def _calculate_text_similarity(self, incident_a: Dict, incident_b: Dict) -> float:
        """Calculate text similarity between two incidents"""
        # Combine text fields
        text_a = f"{incident_a.get('title', '')} {incident_a.get('description', '')} {incident_a.get('free_text', '')}".lower()
        text_b = f"{incident_b.get('title', '')} {incident_b.get('description', '')} {incident_b.get('free_text', '')}".lower()

        # Extract keywords
        keywords_a = set(self.classifier._extract_keywords(text_a))
        keywords_b = set(self.classifier._extract_keywords(text_b))

        if not keywords_a or not keywords_b:
            return 0.0

        # Calculate Jaccard similarity
        intersection = len(keywords_a.intersection(keywords_b))
        union = len(keywords_a.union(keywords_b))

        jaccard_similarity = intersection / union if union > 0 else 0.0

        # Also check for exact phrase matches
        phrase_similarity = self._calculate_phrase_similarity(text_a, text_b)

        # Combine similarities
        return (jaccard_similarity * 0.7 + phrase_similarity * 0.3)

    def _calculate_phrase_similarity(self, text_a: str, text_b: str) -> float:
        """Calculate similarity based on common phrases"""
        # Extract 2-3 word phrases
        phrases_a = set()
        phrases_b = set()

        words_a = text_a.split()
        words_b = text_b.split()

        # Extract 2-word phrases
        for i in range(len(words_a) - 1):
            phrases_a.add(f"{words_a[i]} {words_a[i+1]}")

        for i in range(len(words_b) - 1):
            phrases_b.add(f"{words_b[i]} {words_b[i+1]}")

        # Extract 3-word phrases
        for i in range(len(words_a) - 2):
            phrases_a.add(f"{words_a[i]} {words_a[i+1]} {words_a[i+2]}")

        for i in range(len(words_b) - 2):
            phrases_b.add(f"{words_b[i]} {words_b[i+1]} {words_b[i+2]}")

        if not phrases_a or not phrases_b:
            return 0.0

        intersection = len(phrases_a.intersection(phrases_b))
        union = len(phrases_a.union(phrases_b))

        return intersection / union if union > 0 else 0.0

    def _calculate_temporal_similarity(self, incident_a: Dict, incident_b: Dict) -> float:
        """Calculate temporal similarity between incidents"""
        created_a = incident_a.get('created_at')
        created_b = incident_b.get('created_at')

        if not created_a or not created_b:
            return 0.0

        # Convert to datetime if needed
        if isinstance(created_a, str):
            created_a = datetime.fromisoformat(created_a.replace('Z', '+00:00'))
        if isinstance(created_b, str):
            created_b = datetime.fromisoformat(created_b.replace('Z', '+00:00'))

        time_diff = abs((created_a - created_b).total_seconds())

        # Calculate similarity based on time difference
        # Incidents within 1 hour: high similarity
        # Incidents within 24 hours: medium similarity
        # Incidents within 7 days: low similarity
        if time_diff <= 3600:  # 1 hour
            return 1.0
        elif time_diff <= 86400:  # 24 hours
            return 0.7
        elif time_diff <= 604800:  # 7 days
            return 0.3
        else:
            return 0.0

    def _calculate_service_similarity(self, incident_a: Dict, incident_b: Dict) -> float:
        """Calculate service/component similarity"""
        # Extract service/component information from text
        text_a = f"{incident_a.get('title', '')} {incident_a.get('description', '')}".lower()
        text_b = f"{incident_b.get('title', '')} {incident_b.get('description', '')}".lower()

        # Common service/component keywords
        service_keywords = [
            'api', 'service', 'database', 'server', 'application', 'website', 'mobile',
            'frontend', 'backend', 'microservice', 'gateway', 'load balancer', 'cache',
            'queue', 'message', 'notification', 'email', 'sms', 'payment', 'auth'
        ]

        services_a = set()
        services_b = set()

        for keyword in service_keywords:
            if keyword in text_a:
                services_a.add(keyword)
            if keyword in text_b:
                services_b.add(keyword)

        if not services_a or not services_b:
            return 0.0

        intersection = len(services_a.intersection(services_b))
        union = len(services_a.union(services_b))

        return intersection / union if union > 0 else 0.0

    def _calculate_category_similarity(self, incident_a: Dict, incident_b: Dict) -> float:
        """Calculate category similarity"""
        category_a = incident_a.get('category', '')
        category_b = incident_b.get('category', '')

        if not category_a or not category_b:
            return 0.0

        if category_a == category_b:
            return 1.0

        # Check for related categories
        related_categories = {
            'INFRASTRUCTURE': ['APPLICATION', 'SECURITY'],
            'APPLICATION': ['INFRASTRUCTURE', 'USER_EXPERIENCE'],
            'SECURITY': ['INFRASTRUCTURE', 'APPLICATION'],
            'USER_EXPERIENCE': ['APPLICATION', 'DATA'],
            'DATA': ['USER_EXPERIENCE', 'INTEGRATION'],
            'INTEGRATION': ['DATA', 'APPLICATION']
        }

        if category_b in related_categories.get(category_a, []):
            return 0.5

        return 0.0

    def _determine_correlation_type(self, incident_a: Dict, incident_b: Dict,
                                  text_similarity: float, temporal_similarity: float,
                                  service_similarity: float) -> str:
        """Determine the type of correlation between incidents"""

        # Same service correlation
        if service_similarity > 0.7:
            return 'SAME_SERVICE'

        # Same component correlation
        if text_similarity > 0.6 and service_similarity > 0.4:
            return 'SAME_COMPONENT'

        # Temporal correlation
        if temporal_similarity > 0.7 and text_similarity > 0.3:
            return 'TEMPORAL'

        # Pattern match
        if text_similarity > 0.5:
            return 'PATTERN'

        # Dependency correlation
        if service_similarity > 0.4 and temporal_similarity > 0.5:
            return 'DEPENDENCY'

        # Cascade effect
        if temporal_similarity > 0.8 and text_similarity > 0.4:
            return 'CASCADE'

        return 'PATTERN'  # Default

    def _calculate_confidence_score(self, overall_similarity: float, correlation_type: str,
                                  incident_a: Dict, incident_b: Dict) -> float:
        """Calculate confidence score for the correlation"""
        base_confidence = overall_similarity

        # Adjust based on correlation type
        type_adjustments = {
            'SAME_SERVICE': 0.1,
            'SAME_COMPONENT': 0.15,
            'TEMPORAL': 0.05,
            'PATTERN': 0.0,
            'DEPENDENCY': 0.1,
            'CASCADE': 0.2
        }

        base_confidence += type_adjustments.get(correlation_type, 0.0)

        # Adjust based on incident characteristics
        if incident_a.get('severity') == incident_b.get('severity'):
            base_confidence += 0.05

        if incident_a.get('status') == incident_b.get('status'):
            base_confidence += 0.03

        return min(base_confidence, 1.0)

    def _determine_correlation_strength(self, confidence_score: float) -> str:
        """Determine correlation strength based on confidence score"""
        if confidence_score >= self.correlation_thresholds['VERY_STRONG']:
            return 'VERY_STRONG'
        elif confidence_score >= self.correlation_thresholds['STRONG']:
            return 'STRONG'
        elif confidence_score >= self.correlation_thresholds['MODERATE']:
            return 'MODERATE'
        else:
            return 'WEAK'

    def _extract_shared_keywords(self, incident_a: Dict, incident_b: Dict) -> List[str]:
        """Extract keywords shared between incidents"""
        text_a = f"{incident_a.get('title', '')} {incident_a.get('description', '')}".lower()
        text_b = f"{incident_b.get('title', '')} {incident_b.get('description', '')}".lower()

        keywords_a = set(self.classifier._extract_keywords(text_a))
        keywords_b = set(self.classifier._extract_keywords(text_b))

        shared = list(keywords_a.intersection(keywords_b))
        return shared[:10]  # Return top 10 shared keywords

    def _calculate_time_difference(self, incident_a: Dict, incident_b: Dict) -> timedelta:
        """Calculate time difference between incidents"""
        created_a = incident_a.get('created_at')
        created_b = incident_b.get('created_at')

        if not created_a or not created_b:
            return timedelta(0)

        # Convert to datetime if needed
        if isinstance(created_a, str):
            created_a = datetime.fromisoformat(created_a.replace('Z', '+00:00'))
        if isinstance(created_b, str):
            created_b = datetime.fromisoformat(created_b.replace('Z', '+00:00'))

        return abs(created_a - created_b)

    def _detect_problem_patterns(self, incident_a: Dict, incident_b: Dict,
                               correlation_type: str, confidence_score: float) -> Tuple[bool, Optional[str]]:
        """Detect if correlation indicates a larger problem"""

        # High confidence correlations are more likely to indicate problems
        if confidence_score < 0.6:
            return False, None

        # Check for specific problem patterns
        text_a = f"{incident_a.get('title', '')} {incident_a.get('description', '')}".lower()
        text_b = f"{incident_b.get('title', '')} {incident_b.get('description', '')}".lower()
        combined_text = f"{text_a} {text_b}"

        for pattern_name, pattern_data in self.problem_patterns.items():
            # Check for pattern keywords
            keyword_matches = sum(1 for keyword in pattern_data['keywords'] if keyword in combined_text)

            if keyword_matches >= 2:  # At least 2 keywords match
                return True, f"Potential {pattern_name.replace('_', ' ').lower()} detected"

        # Check for cascade effects
        if correlation_type == 'CASCADE' and confidence_score > 0.7:
            return True, "Potential cascade failure detected"

        # Check for recurring issues
        if correlation_type == 'SAME_SERVICE' and confidence_score > 0.8:
            return True, "Potential recurring service issue detected"

        return False, None

    def find_related_incidents(self, target_incident: Dict, all_incidents: List[Dict],
                             limit: int = 10) -> List[Tuple[Dict, CorrelationResult]]:
        """Find incidents related to a target incident"""
        correlations = []

        for incident in all_incidents:
            if incident['id'] == target_incident['id']:
                continue

            correlation = self.correlate_incidents(target_incident, incident)
            if correlation:
                correlations.append((incident, correlation))

        # Sort by confidence score and return top results
        correlations.sort(key=lambda x: x[1].confidence_score, reverse=True)
        return correlations[:limit]

    def detect_problem_clusters(self, incidents: List[Dict],
                              min_incidents: int = 3,
                              time_window: timedelta = timedelta(hours=24)) -> List[Dict]:
        """Detect clusters of related incidents that might indicate larger problems"""
        clusters = []
        processed_incidents = set()

        for incident in incidents:
            if incident['id'] in processed_incidents:
                continue

            # Find related incidents within time window
            related_incidents = []
            incident_time = incident.get('created_at')

            if isinstance(incident_time, str):
                incident_time = datetime.fromisoformat(incident_time.replace('Z', '+00:00'))

            for other_incident in incidents:
                if other_incident['id'] == incident['id'] or other_incident['id'] in processed_incidents:
                    continue

                other_time = other_incident.get('created_at')
                if isinstance(other_time, str):
                    other_time = datetime.fromisoformat(other_time.replace('Z', '+00:00'))

                # Check if within time window
                if abs((incident_time - other_time).total_seconds()) <= time_window.total_seconds():
                    correlation = self.correlate_incidents(incident, other_incident)
                    if correlation and correlation.confidence_score > 0.5:
                        related_incidents.append((other_incident, correlation))

            # If we found enough related incidents, create a cluster
            if len(related_incidents) >= min_incidents - 1:  # -1 because we include the original incident
                cluster = {
                    'incidents': [incident] + [inc[0] for inc in related_incidents],
                    'correlations': [inc[1] for inc in related_incidents],
                    'problem_type': self._classify_problem_type(incident, related_incidents),
                    'confidence': sum(inc[1].confidence_score for inc in related_incidents) / len(related_incidents),
                    'time_span': self._calculate_cluster_time_span([incident] + [inc[0] for inc in related_incidents])
                }
                clusters.append(cluster)

                # Mark incidents as processed
                processed_incidents.add(incident['id'])
                for related_incident, _ in related_incidents:
                    processed_incidents.add(related_incident['id'])

        return clusters

    def _classify_problem_type(self, incident: Dict, related_incidents: List[Tuple[Dict, CorrelationResult]]) -> str:
        """Classify the type of problem based on incident cluster"""
        correlation_types = [corr.correlation_type for _, corr in related_incidents]

        if 'CASCADE' in correlation_types:
            return 'CASCADE_FAILURE'
        elif 'SAME_SERVICE' in correlation_types:
            return 'SERVICE_OUTAGE'
        elif 'TEMPORAL' in correlation_types:
            return 'RECURRING_ISSUE'
        else:
            return 'PATTERN_BASED_PROBLEM'

    def _calculate_cluster_time_span(self, incidents: List[Dict]) -> timedelta:
        """Calculate the time span of a cluster of incidents"""
        times = []
        for incident in incidents:
            created_at = incident.get('created_at')
            if isinstance(created_at, str):
                created_at = datetime.fromisoformat(created_at.replace('Z', '+00:00'))
            times.append(created_at)

        if len(times) < 2:
            return timedelta(0)

        return max(times) - min(times)