Updates

2025-09-19 11:58:53 +03:00
parent 306b20e24a
commit 6b247e5b9f
11423 changed files with 1500615 additions and 778 deletions
--- a/ETB-API/analytics_predictive_insights/ml/init.py
+++ b/ETB-API/analytics_predictive_insights/ml/init.py
@@ -0,0 +1 @@
+# ML components for analytics and predictive insights
--- a/ETB-API/analytics_predictive_insights/ml/pycache/init.cpython-312.pyc
+++ b/ETB-API/analytics_predictive_insights/ml/pycache/init.cpython-312.pyc
--- a/ETB-API/analytics_predictive_insights/ml/pycache/anomaly_detection.cpython-312.pyc
+++ b/ETB-API/analytics_predictive_insights/ml/pycache/anomaly_detection.cpython-312.pyc
--- a/ETB-API/analytics_predictive_insights/ml/anomaly_detection.py
+++ b/ETB-API/analytics_predictive_insights/ml/anomaly_detection.py
@@ -0,0 +1,491 @@
+"""
+ML-based anomaly detection for incident management
+Implements various anomaly detection algorithms for identifying unusual patterns
+"""
+import numpy as np
+import pandas as pd
+from typing import Dict, List, Tuple, Optional, Any
+from datetime import datetime, timedelta
+from sklearn.ensemble import IsolationForest
+from sklearn.preprocessing import StandardScaler
+from sklearn.cluster import DBSCAN
+from sklearn.decomposition import PCA
+from scipy import stats
+import logging
+
+from django.utils import timezone
+from django.db.models import Q, Avg, Count, Sum
+from incident_intelligence.models import Incident
+from ..models import AnomalyDetection, PredictiveModel
+
+logger = logging.getLogger(__name__)
+
+
+class AnomalyDetector:
+    """Base class for anomaly detection algorithms"""
+    
+    def __init__(self, model_config: Dict[str, Any] = None):
+        self.model_config = model_config or {}
+        self.scaler = StandardScaler()
+        self.is_fitted = False
+    
+    def fit(self, data: pd.DataFrame) -> None:
+        """Fit the anomaly detection model"""
+        raise NotImplementedError
+    
+    def predict(self, data: pd.DataFrame) -> np.ndarray:
+        """Predict anomalies in the data"""
+        raise NotImplementedError
+    
+    def get_anomaly_scores(self, data: pd.DataFrame) -> np.ndarray:
+        """Get anomaly scores for the data"""
+        raise NotImplementedError
+
+
+class StatisticalAnomalyDetector(AnomalyDetector):
+    """Statistical anomaly detection using z-score and IQR methods"""
+    
+    def __init__(self, model_config: Dict[str, Any] = None):
+        super().__init__(model_config)
+        self.z_threshold = self.model_config.get('z_threshold', 3.0)
+        self.iqr_multiplier = self.model_config.get('iqr_multiplier', 1.5)
+        self.stats_cache = {}
+    
+    def fit(self, data: pd.DataFrame) -> None:
+        """Calculate statistical parameters for anomaly detection"""
+        for column in data.columns:
+            if data[column].dtype in ['int64', 'float64']:
+                values = data[column].dropna()
+                if len(values) > 0:
+                    self.stats_cache[column] = {
+                        'mean': values.mean(),
+                        'std': values.std(),
+                        'q1': values.quantile(0.25),
+                        'q3': values.quantile(0.75),
+                        'iqr': values.quantile(0.75) - values.quantile(0.25)
+                    }
+        self.is_fitted = True
+    
+    def predict(self, data: pd.DataFrame) -> np.ndarray:
+        """Predict anomalies using statistical methods"""
+        if not self.is_fitted:
+            raise ValueError("Model must be fitted before prediction")
+        
+        anomaly_flags = np.zeros(len(data), dtype=bool)
+        
+        for column in data.columns:
+            if column in self.stats_cache and data[column].dtype in ['int64', 'float64']:
+                values = data[column].dropna()
+                if len(values) > 0:
+                    stats = self.stats_cache[column]
+                    
+                    # Z-score method
+                    z_scores = np.abs((values - stats['mean']) / stats['std'])
+                    z_anomalies = z_scores > self.z_threshold
+                    
+                    # IQR method
+                    lower_bound = stats['q1'] - self.iqr_multiplier * stats['iqr']
+                    upper_bound = stats['q3'] + self.iqr_multiplier * stats['iqr']
+                    iqr_anomalies = (values < lower_bound) | (values > upper_bound)
+                    
+                    # Combine both methods
+                    column_anomalies = z_anomalies | iqr_anomalies
+                    anomaly_flags[values.index] |= column_anomalies
+        
+        return anomaly_flags
+    
+    def get_anomaly_scores(self, data: pd.DataFrame) -> np.ndarray:
+        """Get anomaly scores based on z-scores"""
+        if not self.is_fitted:
+            raise ValueError("Model must be fitted before prediction")
+        
+        scores = np.zeros(len(data))
+        
+        for column in data.columns:
+            if column in self.stats_cache and data[column].dtype in ['int64', 'float64']:
+                values = data[column].dropna()
+                if len(values) > 0:
+                    stats = self.stats_cache[column]
+                    z_scores = np.abs((values - stats['mean']) / stats['std'])
+                    scores[values.index] += z_scores
+        
+        return scores
+
+
+class IsolationForestAnomalyDetector(AnomalyDetector):
+    """Isolation Forest anomaly detection"""
+    
+    def __init__(self, model_config: Dict[str, Any] = None):
+        super().__init__(model_config)
+        self.contamination = self.model_config.get('contamination', 0.1)
+        self.n_estimators = self.model_config.get('n_estimators', 100)
+        self.model = IsolationForest(
+            contamination=self.contamination,
+            n_estimators=self.n_estimators,
+            random_state=42
+        )
+    
+    def fit(self, data: pd.DataFrame) -> None:
+        """Fit the Isolation Forest model"""
+        # Select numeric columns only
+        numeric_data = data.select_dtypes(include=[np.number])
+        
+        if numeric_data.empty:
+            raise ValueError("No numeric columns found in data")
+        
+        # Handle missing values
+        numeric_data = numeric_data.fillna(numeric_data.median())
+        
+        # Scale the data
+        scaled_data = self.scaler.fit_transform(numeric_data)
+        
+        # Fit the model
+        self.model.fit(scaled_data)
+        self.is_fitted = True
+    
+    def predict(self, data: pd.DataFrame) -> np.ndarray:
+        """Predict anomalies using Isolation Forest"""
+        if not self.is_fitted:
+            raise ValueError("Model must be fitted before prediction")
+        
+        # Select numeric columns only
+        numeric_data = data.select_dtypes(include=[np.number])
+        
+        if numeric_data.empty:
+            return np.zeros(len(data), dtype=bool)
+        
+        # Handle missing values
+        numeric_data = numeric_data.fillna(numeric_data.median())
+        
+        # Scale the data
+        scaled_data = self.scaler.transform(numeric_data)
+        
+        # Predict anomalies (-1 for anomalies, 1 for normal)
+        predictions = self.model.predict(scaled_data)
+        return predictions == -1
+    
+    def get_anomaly_scores(self, data: pd.DataFrame) -> np.ndarray:
+        """Get anomaly scores from Isolation Forest"""
+        if not self.is_fitted:
+            raise ValueError("Model must be fitted before prediction")
+        
+        # Select numeric columns only
+        numeric_data = data.select_dtypes(include=[np.number])
+        
+        if numeric_data.empty:
+            return np.zeros(len(data))
+        
+        # Handle missing values
+        numeric_data = numeric_data.fillna(numeric_data.median())
+        
+        # Scale the data
+        scaled_data = self.scaler.transform(numeric_data)
+        
+        # Get anomaly scores
+        scores = self.model.decision_function(scaled_data)
+        # Convert to positive scores (higher = more anomalous)
+        return -scores
+
+
+class TemporalAnomalyDetector(AnomalyDetector):
+    """Temporal anomaly detection for time series data"""
+    
+    def __init__(self, model_config: Dict[str, Any] = None):
+        super().__init__(model_config)
+        self.window_size = self.model_config.get('window_size', 24)  # hours
+        self.threshold_multiplier = self.model_config.get('threshold_multiplier', 2.0)
+        self.temporal_stats = {}
+    
+    def fit(self, data: pd.DataFrame) -> None:
+        """Calculate temporal statistics for anomaly detection"""
+        if 'timestamp' not in data.columns:
+            raise ValueError("Timestamp column is required for temporal anomaly detection")
+        
+        # Sort by timestamp
+        data_sorted = data.sort_values('timestamp')
+        
+        # Calculate rolling statistics
+        for column in data_sorted.columns:
+            if column != 'timestamp' and data_sorted[column].dtype in ['int64', 'float64']:
+                # Calculate rolling mean and std
+                rolling_mean = data_sorted[column].rolling(window=self.window_size, min_periods=1).mean()
+                rolling_std = data_sorted[column].rolling(window=self.window_size, min_periods=1).std()
+                
+                self.temporal_stats[column] = {
+                    'rolling_mean': rolling_mean,
+                    'rolling_std': rolling_std
+                }
+        
+        self.is_fitted = True
+    
+    def predict(self, data: pd.DataFrame) -> np.ndarray:
+        """Predict temporal anomalies"""
+        if not self.is_fitted:
+            raise ValueError("Model must be fitted before prediction")
+        
+        if 'timestamp' not in data.columns:
+            return np.zeros(len(data), dtype=bool)
+        
+        # Sort by timestamp
+        data_sorted = data.sort_values('timestamp')
+        anomaly_flags = np.zeros(len(data_sorted), dtype=bool)
+        
+        for column in data_sorted.columns:
+            if column in self.temporal_stats and column != 'timestamp':
+                values = data_sorted[column]
+                rolling_mean = self.temporal_stats[column]['rolling_mean']
+                rolling_std = self.temporal_stats[column]['rolling_std']
+                
+                # Calculate z-scores based on rolling statistics
+                z_scores = np.abs((values - rolling_mean) / (rolling_std + 1e-8))
+                column_anomalies = z_scores > self.threshold_multiplier
+                
+                anomaly_flags |= column_anomalies
+        
+        return anomaly_flags
+    
+    def get_anomaly_scores(self, data: pd.DataFrame) -> np.ndarray:
+        """Get temporal anomaly scores"""
+        if not self.is_fitted:
+            raise ValueError("Model must be fitted before prediction")
+        
+        if 'timestamp' not in data.columns:
+            return np.zeros(len(data))
+        
+        # Sort by timestamp
+        data_sorted = data.sort_values('timestamp')
+        scores = np.zeros(len(data_sorted))
+        
+        for column in data_sorted.columns:
+            if column in self.temporal_stats and column != 'timestamp':
+                values = data_sorted[column]
+                rolling_mean = self.temporal_stats[column]['rolling_mean']
+                rolling_std = self.temporal_stats[column]['rolling_std']
+                
+                # Calculate z-scores based on rolling statistics
+                z_scores = np.abs((values - rolling_mean) / (rolling_std + 1e-8))
+                scores += z_scores
+        
+        return scores
+
+
+class AnomalyDetectionEngine:
+    """Main engine for anomaly detection"""
+    
+    def __init__(self):
+        self.detectors = {
+            'statistical': StatisticalAnomalyDetector,
+            'isolation_forest': IsolationForestAnomalyDetector,
+            'temporal': TemporalAnomalyDetector
+        }
+    
+    def create_detector(self, algorithm_type: str, model_config: Dict[str, Any] = None) -> AnomalyDetector:
+        """Create an anomaly detector instance"""
+        if algorithm_type not in self.detectors:
+            raise ValueError(f"Unknown algorithm type: {algorithm_type}")
+        
+        return self.detectors[algorithm_type](model_config)
+    
+    def prepare_incident_data(self, time_window_hours: int = 24) -> pd.DataFrame:
+        """Prepare incident data for anomaly detection"""
+        end_time = timezone.now()
+        start_time = end_time - timedelta(hours=time_window_hours)
+        
+        # Get incidents from the time window
+        incidents = Incident.objects.filter(
+            created_at__gte=start_time,
+            created_at__lte=end_time
+        ).values(
+            'id', 'created_at', 'severity', 'category', 'subcategory',
+            'affected_users', 'estimated_downtime', 'status'
+        )
+        
+        if not incidents:
+            return pd.DataFrame()
+        
+        # Convert to DataFrame
+        df = pd.DataFrame(list(incidents))
+        
+        # Convert datetime to timestamp
+        df['timestamp'] = pd.to_datetime(df['created_at']).astype('int64') // 10**9
+        
+        # Encode categorical variables
+        severity_mapping = {'LOW': 1, 'MEDIUM': 2, 'HIGH': 3, 'CRITICAL': 4, 'EMERGENCY': 5}
+        df['severity_encoded'] = df['severity'].map(severity_mapping).fillna(0)
+        
+        # Convert estimated_downtime to hours
+        df['downtime_hours'] = df['estimated_downtime'].apply(
+            lambda x: x.total_seconds() / 3600 if x else 0
+        )
+        
+        # Create time-based features
+        df['hour_of_day'] = pd.to_datetime(df['created_at']).dt.hour
+        df['day_of_week'] = pd.to_datetime(df['created_at']).dt.dayofweek
+        
+        return df
+    
+    def detect_anomalies(self, model: PredictiveModel, time_window_hours: int = 24) -> List[Dict[str, Any]]:
+        """Detect anomalies using the specified model"""
+        try:
+            # Prepare data
+            data = self.prepare_incident_data(time_window_hours)
+            
+            if data.empty:
+                logger.warning("No incident data found for anomaly detection")
+                return []
+            
+            # Create detector
+            detector = self.create_detector(
+                model.algorithm_type,
+                model.model_config
+            )
+            
+            # Fit the model
+            detector.fit(data)
+            
+            # Predict anomalies
+            anomaly_flags = detector.predict(data)
+            anomaly_scores = detector.get_anomaly_scores(data)
+            
+            # Process results
+            anomalies = []
+            for idx, is_anomaly in enumerate(anomaly_flags):
+                if is_anomaly:
+                    incident_data = data.iloc[idx]
+                    anomaly_data = {
+                        'model': model,
+                        'anomaly_type': self._determine_anomaly_type(model.algorithm_type),
+                        'severity': self._determine_severity(anomaly_scores[idx]),
+                        'confidence_score': min(1.0, max(0.0, anomaly_scores[idx] / 10.0)),
+                        'anomaly_score': float(anomaly_scores[idx]),
+                        'threshold_used': self._get_threshold(model.algorithm_type, model.model_config),
+                        'time_window_start': timezone.now() - timedelta(hours=time_window_hours),
+                        'time_window_end': timezone.now(),
+                        'description': self._generate_description(incident_data, anomaly_scores[idx]),
+                        'affected_services': [incident_data.get('category', 'Unknown')],
+                        'affected_metrics': ['incident_frequency', 'severity_distribution'],
+                        'metadata': {
+                            'incident_id': str(incident_data['id']),
+                            'detection_algorithm': model.algorithm_type,
+                            'time_window_hours': time_window_hours
+                        }
+                    }
+                    anomalies.append(anomaly_data)
+            
+            return anomalies
+            
+        except Exception as e:
+            logger.error(f"Error in anomaly detection: {str(e)}")
+            return []
+    
+    def _determine_anomaly_type(self, algorithm_type: str) -> str:
+        """Determine anomaly type based on algorithm"""
+        mapping = {
+            'statistical': 'STATISTICAL',
+            'isolation_forest': 'PATTERN',
+            'temporal': 'TEMPORAL'
+        }
+        return mapping.get(algorithm_type, 'STATISTICAL')
+    
+    def _determine_severity(self, anomaly_score: float) -> str:
+        """Determine severity based on anomaly score"""
+        if anomaly_score >= 5.0:
+            return 'CRITICAL'
+        elif anomaly_score >= 3.0:
+            return 'HIGH'
+        elif anomaly_score >= 2.0:
+            return 'MEDIUM'
+        else:
+            return 'LOW'
+    
+    def _get_threshold(self, algorithm_type: str, model_config: Dict[str, Any]) -> float:
+        """Get threshold used for anomaly detection"""
+        if algorithm_type == 'statistical':
+            return model_config.get('z_threshold', 3.0)
+        elif algorithm_type == 'isolation_forest':
+            return model_config.get('contamination', 0.1)
+        elif algorithm_type == 'temporal':
+            return model_config.get('threshold_multiplier', 2.0)
+        return 1.0
+    
+    def _generate_description(self, incident_data: pd.Series, anomaly_score: float) -> str:
+        """Generate description for the anomaly"""
+        severity = incident_data.get('severity', 'Unknown')
+        category = incident_data.get('category', 'Unknown')
+        affected_users = incident_data.get('affected_users', 0)
+        
+        return f"Anomalous incident detected: {severity} severity incident in {category} category affecting {affected_users} users. Anomaly score: {anomaly_score:.2f}"
+
+
+class AnomalyDetectionService:
+    """Service for managing anomaly detection"""
+    
+    def __init__(self):
+        self.engine = AnomalyDetectionEngine()
+    
+    def run_anomaly_detection(self, model_id: str = None) -> int:
+        """Run anomaly detection for all active models or a specific model"""
+        if model_id:
+            models = PredictiveModel.objects.filter(
+                id=model_id,
+                model_type='ANOMALY_DETECTION',
+                status='ACTIVE'
+            )
+        else:
+            models = PredictiveModel.objects.filter(
+                model_type='ANOMALY_DETECTION',
+                status='ACTIVE'
+            )
+        
+        total_anomalies = 0
+        
+        for model in models:
+            try:
+                # Detect anomalies
+                anomalies = self.engine.detect_anomalies(model)
+                
+                # Save anomalies to database
+                for anomaly_data in anomalies:
+                    AnomalyDetection.objects.create(**anomaly_data)
+                    total_anomalies += 1
+                
+                logger.info(f"Detected {len(anomalies)} anomalies using model {model.name}")
+                
+            except Exception as e:
+                logger.error(f"Error running anomaly detection for model {model.name}: {str(e)}")
+        
+        return total_anomalies
+    
+    def get_anomaly_summary(self, time_window_hours: int = 24) -> Dict[str, Any]:
+        """Get summary of recent anomalies"""
+        end_time = timezone.now()
+        start_time = end_time - timedelta(hours=time_window_hours)
+        
+        anomalies = AnomalyDetection.objects.filter(
+            detected_at__gte=start_time,
+            detected_at__lte=end_time
+        )
+        
+        return {
+            'total_anomalies': anomalies.count(),
+            'critical_anomalies': anomalies.filter(severity='CRITICAL').count(),
+            'high_anomalies': anomalies.filter(severity='HIGH').count(),
+            'medium_anomalies': anomalies.filter(severity='MEDIUM').count(),
+            'low_anomalies': anomalies.filter(severity='LOW').count(),
+            'unresolved_anomalies': anomalies.filter(
+                status__in=['DETECTED', 'INVESTIGATING']
+            ).count(),
+            'false_positive_rate': self._calculate_false_positive_rate(anomalies),
+            'average_confidence': anomalies.aggregate(
+                avg=Avg('confidence_score')
+            )['avg'] or 0.0
+        }
+    
+    def _calculate_false_positive_rate(self, anomalies) -> float:
+        """Calculate false positive rate"""
+        total_anomalies = anomalies.count()
+        if total_anomalies == 0:
+            return 0.0
+        
+        false_positives = anomalies.filter(status='FALSE_POSITIVE').count()
+        return (false_positives / total_anomalies) * 100
--- a/ETB-API/analytics_predictive_insights/ml/predictive_models.py
+++ b/ETB-API/analytics_predictive_insights/ml/predictive_models.py
@@ -0,0 +1,684 @@
+"""
+ML-based predictive models for incident management
+Implements various predictive algorithms for incident prediction, severity prediction, and cost analysis
+"""
+import numpy as np
+import pandas as pd
+from typing import Dict, List, Tuple, Optional, Any, Union
+from datetime import datetime, timedelta
+from sklearn.ensemble import RandomForestRegressor, RandomForestClassifier
+from sklearn.preprocessing import StandardScaler, LabelEncoder
+from sklearn.model_selection import train_test_split
+from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, mean_squared_error, r2_score
+import joblib
+import logging
+
+from django.utils import timezone
+from django.db.models import Q, Avg, Count, Sum, Max, Min
+from incident_intelligence.models import Incident
+from ..models import PredictiveModel, PredictiveInsight, CostImpactAnalysis
+
+logger = logging.getLogger(__name__)
+
+
+class BasePredictiveModel:
+    """Base class for predictive models"""
+    
+    def __init__(self, model_config: Dict[str, Any] = None):
+        self.model_config = model_config or {}
+        self.scaler = StandardScaler()
+        self.label_encoders = {}
+        self.is_fitted = False
+        self.feature_columns = []
+        self.target_column = None
+    
+    def prepare_features(self, data: pd.DataFrame) -> pd.DataFrame:
+        """Prepare features for model training/prediction"""
+        raise NotImplementedError
+    
+    def fit(self, X: pd.DataFrame, y: pd.Series) -> Dict[str, float]:
+        """Fit the model and return performance metrics"""
+        raise NotImplementedError
+    
+    def predict(self, X: pd.DataFrame) -> np.ndarray:
+        """Make predictions"""
+        raise NotImplementedError
+    
+    def get_feature_importance(self) -> Dict[str, float]:
+        """Get feature importance scores"""
+        raise NotImplementedError
+
+
+class IncidentPredictionModel(BasePredictiveModel):
+    """Model for predicting incident occurrence"""
+    
+    def __init__(self, model_config: Dict[str, Any] = None):
+        super().__init__(model_config)
+        self.model = RandomForestClassifier(
+            n_estimators=self.model_config.get('n_estimators', 100),
+            max_depth=self.model_config.get('max_depth', 10),
+            random_state=42
+        )
+    
+    def prepare_features(self, data: pd.DataFrame) -> pd.DataFrame:
+        """Prepare features for incident prediction"""
+        features = pd.DataFrame()
+        
+        # Time-based features
+        if 'timestamp' in data.columns:
+            timestamp = pd.to_datetime(data['timestamp'])
+            features['hour_of_day'] = timestamp.dt.hour
+            features['day_of_week'] = timestamp.dt.dayofweek
+            features['day_of_month'] = timestamp.dt.day
+            features['month'] = timestamp.dt.month
+            features['is_weekend'] = (timestamp.dt.dayofweek >= 5).astype(int)
+            features['is_business_hours'] = ((timestamp.dt.hour >= 9) & (timestamp.dt.hour <= 17)).astype(int)
+        
+        # Historical incident features
+        if 'incident_count_1h' in data.columns:
+            features['incident_count_1h'] = data['incident_count_1h']
+        if 'incident_count_24h' in data.columns:
+            features['incident_count_24h'] = data['incident_count_24h']
+        if 'avg_severity_24h' in data.columns:
+            features['avg_severity_24h'] = data['avg_severity_24h']
+        
+        # System metrics (if available)
+        system_metrics = ['cpu_usage', 'memory_usage', 'disk_usage', 'network_usage']
+        for metric in system_metrics:
+            if metric in data.columns:
+                features[metric] = data[metric]
+        
+        # Service-specific features
+        if 'service_name' in data.columns:
+            # Encode service names
+            if 'service_name' not in self.label_encoders:
+                self.label_encoders['service_name'] = LabelEncoder()
+                features['service_encoded'] = self.label_encoders['service_name'].fit_transform(data['service_name'])
+            else:
+                features['service_encoded'] = self.label_encoders['service_name'].transform(data['service_name'])
+        
+        return features
+    
+    def fit(self, X: pd.DataFrame, y: pd.Series) -> Dict[str, float]:
+        """Fit the incident prediction model"""
+        # Prepare features
+        X_processed = self.prepare_features(X)
+        self.feature_columns = X_processed.columns.tolist()
+        
+        # Scale features
+        X_scaled = self.scaler.fit_transform(X_processed)
+        
+        # Split data for validation
+        X_train, X_val, y_train, y_val = train_test_split(
+            X_scaled, y, test_size=0.2, random_state=42, stratify=y
+        )
+        
+        # Fit model
+        self.model.fit(X_train, y_train)
+        
+        # Evaluate model
+        y_pred = self.model.predict(X_val)
+        y_pred_proba = self.model.predict_proba(X_val)[:, 1]
+        
+        metrics = {
+            'accuracy': accuracy_score(y_val, y_pred),
+            'precision': precision_score(y_val, y_pred, average='weighted'),
+            'recall': recall_score(y_val, y_pred, average='weighted'),
+            'f1_score': f1_score(y_val, y_pred, average='weighted')
+        }
+        
+        self.is_fitted = True
+        return metrics
+    
+    def predict(self, X: pd.DataFrame) -> np.ndarray:
+        """Predict incident probability"""
+        if not self.is_fitted:
+            raise ValueError("Model must be fitted before prediction")
+        
+        X_processed = self.prepare_features(X)
+        X_scaled = self.scaler.transform(X_processed)
+        
+        # Return probability of incident occurrence
+        return self.model.predict_proba(X_scaled)[:, 1]
+    
+    def get_feature_importance(self) -> Dict[str, float]:
+        """Get feature importance scores"""
+        if not self.is_fitted:
+            return {}
+        
+        importance_scores = self.model.feature_importances_
+        return dict(zip(self.feature_columns, importance_scores))
+
+
+class SeverityPredictionModel(BasePredictiveModel):
+    """Model for predicting incident severity"""
+    
+    def __init__(self, model_config: Dict[str, Any] = None):
+        super().__init__(model_config)
+        self.model = RandomForestClassifier(
+            n_estimators=self.model_config.get('n_estimators', 100),
+            max_depth=self.model_config.get('max_depth', 10),
+            random_state=42
+        )
+        self.severity_mapping = {
+            'LOW': 1, 'MEDIUM': 2, 'HIGH': 3, 'CRITICAL': 4, 'EMERGENCY': 5
+        }
+        self.reverse_severity_mapping = {v: k for k, v in self.severity_mapping.items()}
+    
+    def prepare_features(self, data: pd.DataFrame) -> pd.DataFrame:
+        """Prepare features for severity prediction"""
+        features = pd.DataFrame()
+        
+        # Text-based features
+        if 'title' in data.columns:
+            features['title_length'] = data['title'].str.len()
+            features['title_word_count'] = data['title'].str.split().str.len()
+        
+        if 'description' in data.columns:
+            features['description_length'] = data['description'].str.len()
+            features['description_word_count'] = data['description'].str.split().str.len()
+        
+        # Categorical features
+        if 'category' in data.columns:
+            if 'category' not in self.label_encoders:
+                self.label_encoders['category'] = LabelEncoder()
+                features['category_encoded'] = self.label_encoders['category'].fit_transform(data['category'])
+            else:
+                features['category_encoded'] = self.label_encoders['category'].transform(data['category'])
+        
+        if 'subcategory' in data.columns:
+            if 'subcategory' not in self.label_encoders:
+                self.label_encoders['subcategory'] = LabelEncoder()
+                features['subcategory_encoded'] = self.label_encoders['subcategory'].fit_transform(data['subcategory'])
+            else:
+                features['subcategory_encoded'] = self.label_encoders['subcategory'].transform(data['subcategory'])
+        
+        # Impact features
+        if 'affected_users' in data.columns:
+            features['affected_users'] = data['affected_users']
+            features['affected_users_log'] = np.log1p(data['affected_users'])
+        
+        # Time-based features
+        if 'created_at' in data.columns:
+            timestamp = pd.to_datetime(data['created_at'])
+            features['hour_of_day'] = timestamp.dt.hour
+            features['day_of_week'] = timestamp.dt.dayofweek
+            features['is_weekend'] = (timestamp.dt.dayofweek >= 5).astype(int)
+            features['is_business_hours'] = ((timestamp.dt.hour >= 9) & (timestamp.dt.hour <= 17)).astype(int)
+        
+        # Historical features
+        if 'reporter_id' in data.columns:
+            # Count of previous incidents by reporter
+            features['reporter_incident_count'] = data.groupby('reporter_id')['reporter_id'].transform('count')
+        
+        return features
+    
+    def fit(self, X: pd.DataFrame, y: pd.Series) -> Dict[str, float]:
+        """Fit the severity prediction model"""
+        # Prepare features
+        X_processed = self.prepare_features(X)
+        self.feature_columns = X_processed.columns.tolist()
+        
+        # Encode target variable
+        y_encoded = y.map(self.severity_mapping)
+        
+        # Scale features
+        X_scaled = self.scaler.fit_transform(X_processed)
+        
+        # Split data for validation
+        X_train, X_val, y_train, y_val = train_test_split(
+            X_scaled, y_encoded, test_size=0.2, random_state=42, stratify=y_encoded
+        )
+        
+        # Fit model
+        self.model.fit(X_train, y_train)
+        
+        # Evaluate model
+        y_pred = self.model.predict(X_val)
+        
+        metrics = {
+            'accuracy': accuracy_score(y_val, y_pred),
+            'precision': precision_score(y_val, y_pred, average='weighted'),
+            'recall': recall_score(y_val, y_pred, average='weighted'),
+            'f1_score': f1_score(y_val, y_pred, average='weighted')
+        }
+        
+        self.is_fitted = True
+        return metrics
+    
+    def predict(self, X: pd.DataFrame) -> np.ndarray:
+        """Predict incident severity"""
+        if not self.is_fitted:
+            raise ValueError("Model must be fitted before prediction")
+        
+        X_processed = self.prepare_features(X)
+        X_scaled = self.scaler.transform(X_processed)
+        
+        # Get predicted severity levels
+        y_pred_encoded = self.model.predict(X_scaled)
+        
+        # Convert back to severity labels
+        return np.array([self.reverse_severity_mapping.get(level, 'MEDIUM') for level in y_pred_encoded])
+    
+    def get_feature_importance(self) -> Dict[str, float]:
+        """Get feature importance scores"""
+        if not self.is_fitted:
+            return {}
+        
+        importance_scores = self.model.feature_importances_
+        return dict(zip(self.feature_columns, importance_scores))
+
+
+class ResolutionTimePredictionModel(BasePredictiveModel):
+    """Model for predicting incident resolution time"""
+    
+    def __init__(self, model_config: Dict[str, Any] = None):
+        super().__init__(model_config)
+        self.model = RandomForestRegressor(
+            n_estimators=self.model_config.get('n_estimators', 100),
+            max_depth=self.model_config.get('max_depth', 10),
+            random_state=42
+        )
+    
+    def prepare_features(self, data: pd.DataFrame) -> pd.DataFrame:
+        """Prepare features for resolution time prediction"""
+        features = pd.DataFrame()
+        
+        # Severity features
+        if 'severity' in data.columns:
+            severity_mapping = {'LOW': 1, 'MEDIUM': 2, 'HIGH': 3, 'CRITICAL': 4, 'EMERGENCY': 5}
+            features['severity_encoded'] = data['severity'].map(severity_mapping).fillna(2)
+        
+        # Categorical features
+        if 'category' in data.columns:
+            if 'category' not in self.label_encoders:
+                self.label_encoders['category'] = LabelEncoder()
+                features['category_encoded'] = self.label_encoders['category'].fit_transform(data['category'])
+            else:
+                features['category_encoded'] = self.label_encoders['category'].transform(data['category'])
+        
+        # Impact features
+        if 'affected_users' in data.columns:
+            features['affected_users'] = data['affected_users']
+            features['affected_users_log'] = np.log1p(data['affected_users'])
+        
+        # Time-based features
+        if 'created_at' in data.columns:
+            timestamp = pd.to_datetime(data['created_at'])
+            features['hour_of_day'] = timestamp.dt.hour
+            features['day_of_week'] = timestamp.dt.dayofweek
+            features['is_weekend'] = (timestamp.dt.dayofweek >= 5).astype(int)
+            features['is_business_hours'] = ((timestamp.dt.hour >= 9) & (timestamp.dt.hour <= 17)).astype(int)
+        
+        # Historical features
+        if 'assigned_to' in data.columns:
+            # Average resolution time for assignee
+            features['assignee_avg_resolution_time'] = data.groupby('assigned_to')['resolution_time_hours'].transform('mean')
+        
+        # Text features
+        if 'title' in data.columns:
+            features['title_length'] = data['title'].str.len()
+        
+        if 'description' in data.columns:
+            features['description_length'] = data['description'].str.len()
+        
+        return features
+    
+    def fit(self, X: pd.DataFrame, y: pd.Series) -> Dict[str, float]:
+        """Fit the resolution time prediction model"""
+        # Prepare features
+        X_processed = self.prepare_features(X)
+        self.feature_columns = X_processed.columns.tolist()
+        
+        # Scale features
+        X_scaled = self.scaler.fit_transform(X_processed)
+        
+        # Split data for validation
+        X_train, X_val, y_train, y_val = train_test_split(
+            X_scaled, y, test_size=0.2, random_state=42
+        )
+        
+        # Fit model
+        self.model.fit(X_train, y_train)
+        
+        # Evaluate model
+        y_pred = self.model.predict(X_val)
+        
+        metrics = {
+            'mse': mean_squared_error(y_val, y_pred),
+            'rmse': np.sqrt(mean_squared_error(y_val, y_pred)),
+            'r2_score': r2_score(y_val, y_pred)
+        }
+        
+        self.is_fitted = True
+        return metrics
+    
+    def predict(self, X: pd.DataFrame) -> np.ndarray:
+        """Predict resolution time in hours"""
+        if not self.is_fitted:
+            raise ValueError("Model must be fitted before prediction")
+        
+        X_processed = self.prepare_features(X)
+        X_scaled = self.scaler.transform(X_processed)
+        
+        return self.model.predict(X_scaled)
+    
+    def get_feature_importance(self) -> Dict[str, float]:
+        """Get feature importance scores"""
+        if not self.is_fitted:
+            return {}
+        
+        importance_scores = self.model.feature_importances_
+        return dict(zip(self.feature_columns, importance_scores))
+
+
+class CostPredictionModel(BasePredictiveModel):
+    """Model for predicting incident cost impact"""
+    
+    def __init__(self, model_config: Dict[str, Any] = None):
+        super().__init__(model_config)
+        self.model = RandomForestRegressor(
+            n_estimators=self.model_config.get('n_estimators', 100),
+            max_depth=self.model_config.get('max_depth', 10),
+            random_state=42
+        )
+    
+    def prepare_features(self, data: pd.DataFrame) -> pd.DataFrame:
+        """Prepare features for cost prediction"""
+        features = pd.DataFrame()
+        
+        # Severity features
+        if 'severity' in data.columns:
+            severity_mapping = {'LOW': 1, 'MEDIUM': 2, 'HIGH': 3, 'CRITICAL': 4, 'EMERGENCY': 5}
+            features['severity_encoded'] = data['severity'].map(severity_mapping).fillna(2)
+        
+        # Impact features
+        if 'affected_users' in data.columns:
+            features['affected_users'] = data['affected_users']
+            features['affected_users_log'] = np.log1p(data['affected_users'])
+        
+        if 'downtime_hours' in data.columns:
+            features['downtime_hours'] = data['downtime_hours']
+            features['downtime_hours_log'] = np.log1p(data['downtime_hours'])
+        
+        # Categorical features
+        if 'category' in data.columns:
+            if 'category' not in self.label_encoders:
+                self.label_encoders['category'] = LabelEncoder()
+                features['category_encoded'] = self.label_encoders['category'].fit_transform(data['category'])
+            else:
+                features['category_encoded'] = self.label_encoders['category'].transform(data['category'])
+        
+        # Business context
+        if 'business_unit' in data.columns:
+            if 'business_unit' not in self.label_encoders:
+                self.label_encoders['business_unit'] = LabelEncoder()
+                features['business_unit_encoded'] = self.label_encoders['business_unit'].fit_transform(data['business_unit'])
+            else:
+                features['business_unit_encoded'] = self.label_encoders['business_unit'].transform(data['business_unit'])
+        
+        # Time-based features
+        if 'created_at' in data.columns:
+            timestamp = pd.to_datetime(data['created_at'])
+            features['hour_of_day'] = timestamp.dt.hour
+            features['day_of_week'] = timestamp.dt.dayofweek
+            features['is_weekend'] = (timestamp.dt.dayofweek >= 5).astype(int)
+            features['is_business_hours'] = ((timestamp.dt.hour >= 9) & (timestamp.dt.hour <= 17)).astype(int)
+        
+        return features
+    
+    def fit(self, X: pd.DataFrame, y: pd.Series) -> Dict[str, float]:
+        """Fit the cost prediction model"""
+        # Prepare features
+        X_processed = self.prepare_features(X)
+        self.feature_columns = X_processed.columns.tolist()
+        
+        # Scale features
+        X_scaled = self.scaler.fit_transform(X_processed)
+        
+        # Split data for validation
+        X_train, X_val, y_train, y_val = train_test_split(
+            X_scaled, y, test_size=0.2, random_state=42
+        )
+        
+        # Fit model
+        self.model.fit(X_train, y_train)
+        
+        # Evaluate model
+        y_pred = self.model.predict(X_val)
+        
+        metrics = {
+            'mse': mean_squared_error(y_val, y_pred),
+            'rmse': np.sqrt(mean_squared_error(y_val, y_pred)),
+            'r2_score': r2_score(y_val, y_pred)
+        }
+        
+        self.is_fitted = True
+        return metrics
+    
+    def predict(self, X: pd.DataFrame) -> np.ndarray:
+        """Predict cost impact in USD"""
+        if not self.is_fitted:
+            raise ValueError("Model must be fitted before prediction")
+        
+        X_processed = self.prepare_features(X)
+        X_scaled = self.scaler.transform(X_processed)
+        
+        return self.model.predict(X_scaled)
+    
+    def get_feature_importance(self) -> Dict[str, float]:
+        """Get feature importance scores"""
+        if not self.is_fitted:
+            return {}
+        
+        importance_scores = self.model.feature_importances_
+        return dict(zip(self.feature_columns, importance_scores))
+
+
+class PredictiveModelFactory:
+    """Factory for creating predictive models"""
+    
+    @staticmethod
+    def create_model(model_type: str, model_config: Dict[str, Any] = None) -> BasePredictiveModel:
+        """Create a predictive model instance"""
+        models = {
+            'INCIDENT_PREDICTION': IncidentPredictionModel,
+            'SEVERITY_PREDICTION': SeverityPredictionModel,
+            'RESOLUTION_TIME_PREDICTION': ResolutionTimePredictionModel,
+            'COST_PREDICTION': CostPredictionModel
+        }
+        
+        if model_type not in models:
+            raise ValueError(f"Unknown model type: {model_type}")
+        
+        return models[model_type](model_config)
+
+
+class PredictiveModelService:
+    """Service for managing predictive models"""
+    
+    def __init__(self):
+        self.factory = PredictiveModelFactory()
+    
+    def prepare_training_data(self, model_type: str, days_back: int = 90) -> Tuple[pd.DataFrame, pd.Series]:
+        """Prepare training data for the specified model type"""
+        end_date = timezone.now()
+        start_date = end_date - timedelta(days=days_back)
+        
+        # Get incidents from the time period
+        incidents = Incident.objects.filter(
+            created_at__gte=start_date,
+            created_at__lte=end_date
+        ).values(
+            'id', 'title', 'description', 'severity', 'category', 'subcategory',
+            'affected_users', 'estimated_downtime', 'created_at', 'resolved_at',
+            'assigned_to', 'reporter', 'status'
+        )
+        
+        if not incidents:
+            return pd.DataFrame(), pd.Series()
+        
+        df = pd.DataFrame(list(incidents))
+        
+        # Prepare target variable based on model type
+        if model_type == 'INCIDENT_PREDICTION':
+            # For incident prediction, we need to create time series data
+            # This is a simplified version - in practice, you'd need more sophisticated time series preparation
+            y = pd.Series([1] * len(df))  # Placeholder
+        elif model_type == 'SEVERITY_PREDICTION':
+            y = df['severity']
+        elif model_type == 'RESOLUTION_TIME_PREDICTION':
+            # Calculate resolution time in hours
+            df['resolved_at'] = pd.to_datetime(df['resolved_at'])
+            df['created_at'] = pd.to_datetime(df['created_at'])
+            df['resolution_time_hours'] = (df['resolved_at'] - df['created_at']).dt.total_seconds() / 3600
+            y = df['resolution_time_hours'].fillna(df['resolution_time_hours'].median())
+        elif model_type == 'COST_PREDICTION':
+            # Get cost data
+            cost_analyses = CostImpactAnalysis.objects.filter(
+                incident_id__in=df['id']
+            ).values('incident_id', 'cost_amount')
+            
+            cost_df = pd.DataFrame(list(cost_analyses))
+            if not cost_df.empty:
+                df = df.merge(cost_df, left_on='id', right_on='incident_id', how='left')
+                y = df['cost_amount'].fillna(df['cost_amount'].median())
+            else:
+                y = pd.Series([0] * len(df))
+        else:
+            raise ValueError(f"Unknown model type: {model_type}")
+        
+        return df, y
+    
+    def train_model(self, model_id: str) -> Dict[str, Any]:
+        """Train a predictive model"""
+        try:
+            model = PredictiveModel.objects.get(id=model_id)
+            
+            # Prepare training data
+            X, y = self.prepare_training_data(model.model_type, model.training_data_period_days)
+            
+            if X.empty or len(y) < model.min_training_samples:
+                return {
+                    'success': False,
+                    'error': f'Insufficient training data. Need at least {model.min_training_samples} samples, got {len(y)}'
+                }
+            
+            # Create model instance
+            ml_model = self.factory.create_model(model.model_type, model.model_config)
+            
+            # Train the model
+            start_time = timezone.now()
+            metrics = ml_model.fit(X, y)
+            end_time = timezone.now()
+            
+            # Update model with performance metrics
+            model.accuracy_score = metrics.get('accuracy', metrics.get('r2_score'))
+            model.precision_score = metrics.get('precision')
+            model.recall_score = metrics.get('recall')
+            model.f1_score = metrics.get('f1_score')
+            model.status = 'ACTIVE'
+            model.last_trained_at = end_time
+            model.training_duration_seconds = (end_time - start_time).total_seconds()
+            model.training_samples_count = len(y)
+            model.feature_columns = ml_model.feature_columns
+            
+            # Save model (in a real implementation, you'd save the actual model file)
+            model.model_file_path = f"models/{model.id}_{model.version}.joblib"
+            
+            model.save()
+            
+            return {
+                'success': True,
+                'metrics': metrics,
+                'training_samples': len(y),
+                'training_duration': model.training_duration_seconds
+            }
+            
+        except Exception as e:
+            logger.error(f"Error training model {model_id}: {str(e)}")
+            return {
+                'success': False,
+                'error': str(e)
+            }
+    
+    def generate_predictions(self, model_id: str, prediction_horizon_hours: int = 24) -> List[Dict[str, Any]]:
+        """Generate predictions using a trained model"""
+        try:
+            model = PredictiveModel.objects.get(id=model_id, status='ACTIVE')
+            
+            # Create model instance
+            ml_model = self.factory.create_model(model.model_type, model.model_config)
+            
+            # Load model (in a real implementation, you'd load from the saved file)
+            # For now, we'll create a mock prediction
+            
+            # Prepare prediction data
+            X, _ = self.prepare_training_data(model.model_type, 7)  # Last 7 days
+            
+            if X.empty:
+                return []
+            
+            # Make predictions
+            predictions = ml_model.predict(X.tail(10))  # Predict for last 10 incidents
+            
+            # Create insight objects
+            insights = []
+            for i, prediction in enumerate(predictions):
+                insight_data = {
+                    'model': model,
+                    'insight_type': model.model_type,
+                    'title': f"Prediction for {model.model_type.replace('_', ' ').title()}",
+                    'description': f"Model predicts {prediction} for upcoming incidents",
+                    'confidence_level': 'MEDIUM',  # Could be calculated based on model confidence
+                    'confidence_score': 0.7,  # Placeholder
+                    'predicted_value': {'value': float(prediction)},
+                    'prediction_horizon': prediction_horizon_hours,
+                    'prediction_date': timezone.now() + timedelta(hours=prediction_horizon_hours),
+                    'input_features': X.iloc[i].to_dict(),
+                    'supporting_evidence': [],
+                    'affected_services': [X.iloc[i].get('category', 'Unknown')],
+                    'recommendations': self._generate_recommendations(model.model_type, prediction),
+                    'expires_at': timezone.now() + timedelta(hours=prediction_horizon_hours * 2)
+                }
+                insights.append(insight_data)
+            
+            return insights
+            
+        except Exception as e:
+            logger.error(f"Error generating predictions for model {model_id}: {str(e)}")
+            return []
+    
+    def _generate_recommendations(self, model_type: str, prediction: Any) -> List[str]:
+        """Generate recommendations based on prediction"""
+        recommendations = []
+        
+        if model_type == 'INCIDENT_PREDICTION':
+            if prediction > 0.7:
+                recommendations.append("High probability of incident occurrence - consider proactive monitoring")
+                recommendations.append("Ensure on-call team is ready for potential incidents")
+            elif prediction > 0.4:
+                recommendations.append("Moderate probability of incident - monitor system metrics closely")
+        
+        elif model_type == 'SEVERITY_PREDICTION':
+            if prediction in ['CRITICAL', 'EMERGENCY']:
+                recommendations.append("High severity incident predicted - prepare escalation procedures")
+                recommendations.append("Ensure senior staff are available for response")
+            elif prediction == 'HIGH':
+                recommendations.append("High severity incident predicted - review response procedures")
+        
+        elif model_type == 'RESOLUTION_TIME_PREDICTION':
+            if prediction > 24:
+                recommendations.append("Long resolution time predicted - consider additional resources")
+                recommendations.append("Review escalation procedures for complex incidents")
+            elif prediction > 8:
+                recommendations.append("Extended resolution time predicted - prepare for extended response")
+        
+        elif model_type == 'COST_PREDICTION':
+            if prediction > 10000:
+                recommendations.append("High cost impact predicted - prepare cost mitigation strategies")
+                recommendations.append("Consider business continuity measures")
+            elif prediction > 5000:
+                recommendations.append("Significant cost impact predicted - review cost control measures")
+        
+        return recommendations
				`@@ -0,0 +1 @@`
				`# ML components for analytics and predictive insights`