updates

2025-12-01 06:50:10 +02:00
parent 91f51bc6fe
commit 62c1fe5951
4682 changed files with 544807 additions and 31208 deletions
--- a/Backend/src/shared/utils/sanitization.py
+++ b/Backend/src/shared/utils/sanitization.py
@@ -0,0 +1,168 @@
+"""
+HTML/XSS sanitization utilities using bleach library.
+Prevents stored XSS attacks by sanitizing user-generated content.
+"""
+import bleach
+from typing import Optional
+
+# Allowed HTML tags for rich text content
+ALLOWED_TAGS = [
+    'p', 'br', 'strong', 'em', 'u', 'b', 'i', 's', 'strike',
+    'a', 'ul', 'ol', 'li', 'h1', 'h2', 'h3', 'h4', 'h5', 'h6',
+    'blockquote', 'pre', 'code', 'hr', 'div', 'span',
+    'table', 'thead', 'tbody', 'tr', 'th', 'td',
+    'img'
+]
+
+# Allowed attributes for specific tags
+ALLOWED_ATTRIBUTES = {
+    'a': ['href', 'title', 'target', 'rel'],
+    'img': ['src', 'alt', 'title', 'width', 'height'],
+    'div': ['class'],
+    'span': ['class'],
+    'p': ['class'],
+    'table': ['class', 'border'],
+    'th': ['colspan', 'rowspan'],
+    'td': ['colspan', 'rowspan']
+}
+
+# Allowed URL schemes
+ALLOWED_PROTOCOLS = ['http', 'https', 'mailto']
+
+# Allowed CSS classes (optional - can be expanded)
+ALLOWED_STYLES = []
+
+
+def sanitize_html(content: Optional[str], strip: bool = False) -> str:
+    """
+    Sanitize HTML content to prevent XSS attacks.
+    
+    Args:
+        content: The HTML content to sanitize (can be None)
+        strip: If True, remove disallowed tags instead of escaping them
+    
+    Returns:
+        Sanitized HTML string
+    """
+    if not content:
+        return ''
+    
+    if not isinstance(content, str):
+        content = str(content)
+    
+    # Sanitize HTML
+    sanitized = bleach.clean(
+        content,
+        tags=ALLOWED_TAGS,
+        attributes=ALLOWED_ATTRIBUTES,
+        protocols=ALLOWED_PROTOCOLS,
+        strip=strip,
+        strip_comments=True
+    )
+    
+    # Linkify URLs (convert plain URLs to links)
+    # Only linkify if content doesn't already contain HTML links
+    if '<a' not in sanitized:
+        sanitized = bleach.linkify(
+            sanitized,
+            protocols=ALLOWED_PROTOCOLS,
+            parse_email=True
+        )
+    
+    return sanitized
+
+
+def sanitize_text(content: Optional[str]) -> str:
+    """
+    Strip all HTML tags from content, leaving only plain text.
+    Useful for fields that should not contain any HTML.
+    
+    Args:
+        content: The content to sanitize (can be None)
+    
+    Returns:
+        Plain text string with all HTML removed
+    """
+    if not content:
+        return ''
+    
+    if not isinstance(content, str):
+        content = str(content)
+    
+    # Strip all HTML tags
+    return bleach.clean(content, tags=[], strip=True)
+
+
+def sanitize_filename(filename: str) -> str:
+    """
+    Sanitize filename to prevent path traversal and other attacks.
+    
+    Args:
+        filename: The original filename
+    
+    Returns:
+        Sanitized filename safe for filesystem operations
+    """
+    import os
+    import secrets
+    from pathlib import Path
+    
+    if not filename:
+        # Generate a random filename if none provided
+        return f"{secrets.token_urlsafe(16)}.bin"
+    
+    # Remove path components (prevent directory traversal)
+    filename = os.path.basename(filename)
+    
+    # Remove dangerous characters
+    # Keep only alphanumeric, dots, dashes, and underscores
+    safe_chars = []
+    for char in filename:
+        if char.isalnum() or char in '._-':
+            safe_chars.append(char)
+        else:
+            safe_chars.append('_')
+    
+    filename = ''.join(safe_chars)
+    
+    # Limit length (filesystem limit is typically 255)
+    if len(filename) > 255:
+        name, ext = os.path.splitext(filename)
+        max_name_length = 255 - len(ext)
+        filename = name[:max_name_length] + ext
+    
+    # Ensure filename is not empty
+    if not filename or filename == '.' or filename == '..':
+        filename = f"{secrets.token_urlsafe(16)}.bin"
+    
+    return filename
+
+
+def sanitize_url(url: Optional[str]) -> Optional[str]:
+    """
+    Sanitize URL to ensure it uses allowed protocols.
+    
+    Args:
+        url: The URL to sanitize
+    
+    Returns:
+        Sanitized URL or None if invalid
+    """
+    if not url:
+        return None
+    
+    if not isinstance(url, str):
+        url = str(url)
+    
+    # Check if URL uses allowed protocol
+    url_lower = url.lower().strip()
+    if any(url_lower.startswith(proto + ':') for proto in ALLOWED_PROTOCOLS):
+        return url
+    
+    # If no protocol, assume https
+    if '://' not in url:
+        return f'https://{url}'
+    
+    # Invalid protocol - return None
+    return None
+