""" File validation utilities for secure file uploads. Validates file types using magic bytes (file signatures) to prevent spoofing. """ from PIL import Image import io from typing import Tuple, Optional from fastapi import UploadFile, HTTPException, status # Magic bytes for common image formats IMAGE_MAGIC_BYTES = { b'\xFF\xD8\xFF': 'image/jpeg', b'\x89\x50\x4E\x47\x0D\x0A\x1A\x0A': 'image/png', b'GIF87a': 'image/gif', b'GIF89a': 'image/gif', b'RIFF': 'image/webp', # WebP files start with RIFF, need deeper check b'\x00\x00\x01\x00': 'image/x-icon', b'\x00\x00\x02\x00': 'image/x-icon', } ALLOWED_IMAGE_TYPES = {'image/jpeg', 'image/png', 'image/gif', 'image/webp'} def validate_image_file_signature(file_content: bytes, filename: str) -> Tuple[bool, str]: """ Validate file type using magic bytes (file signature). This prevents MIME type spoofing attacks. Args: file_content: The file content as bytes filename: The filename (for extension checking) Returns: Tuple of (is_valid, error_message) """ if not file_content: return False, "File is empty" # Check magic bytes for image types file_start = file_content[:16] # Check first 16 bytes detected_type = None # Check for JPEG if file_content.startswith(b'\xFF\xD8\xFF'): detected_type = 'image/jpeg' # Check for PNG elif file_content.startswith(b'\x89\x50\x4E\x47\x0D\x0A\x1A\x0A'): detected_type = 'image/png' # Check for GIF elif file_content.startswith(b'GIF87a') or file_content.startswith(b'GIF89a'): detected_type = 'image/gif' # Check for WebP (RIFF header with WEBP in bytes 8-11) elif file_content.startswith(b'RIFF') and len(file_content) > 12: if file_content[8:12] == b'WEBP': detected_type = 'image/webp' # Check for ICO elif file_content.startswith(b'\x00\x00\x01\x00') or file_content.startswith(b'\x00\x00\x02\x00'): detected_type = 'image/x-icon' # If magic bytes don't match known image types, try PIL verification if not detected_type: try: # Try to open with PIL to verify it's a valid image img = Image.open(io.BytesIO(file_content)) img.verify() # Get format from PIL img_format = img.format.lower() if img.format else None if img_format == 'jpeg': detected_type = 'image/jpeg' elif img_format == 'png': detected_type = 'image/png' elif img_format == 'gif': detected_type = 'image/gif' elif img_format == 'webp': detected_type = 'image/webp' else: return False, f"Unsupported image format: {img_format}" except Exception: return False, "File is not a valid image or is corrupted" # Verify detected type is in allowed list if detected_type not in ALLOWED_IMAGE_TYPES and detected_type != 'image/x-icon': return False, f"File type {detected_type} is not allowed. Allowed types: {', '.join(ALLOWED_IMAGE_TYPES)}" return True, detected_type async def validate_uploaded_image(file: UploadFile, max_size: int) -> bytes: """ Validate an uploaded image file completely. Args: file: FastAPI UploadFile object max_size: Maximum file size in bytes Returns: File content as bytes Raises: HTTPException if validation fails """ # Check MIME type first (quick check) if not file.content_type or not file.content_type.startswith('image/'): raise HTTPException( status_code=status.HTTP_400_BAD_REQUEST, detail=f'File must be an image. Received MIME type: {file.content_type}' ) # Read file content content = await file.read() # Validate file size if len(content) > max_size: raise HTTPException( status_code=status.HTTP_413_REQUEST_ENTITY_TOO_LARGE, detail=f'File size ({len(content)} bytes) exceeds maximum allowed size ({max_size} bytes / {max_size // 1024 // 1024}MB)' ) # Validate file signature (magic bytes) is_valid, result = validate_image_file_signature(content, file.filename or '') if not is_valid: raise HTTPException( status_code=status.HTTP_400_BAD_REQUEST, detail=f'Invalid file type: {result}. File signature validation failed. Please upload a valid image file.' ) # Additional PIL validation to ensure image is not corrupted try: img = Image.open(io.BytesIO(content)) # Verify image integrity img.verify() # Re-open for further processing (verify() closes the image) img = Image.open(io.BytesIO(content)) # Check image dimensions to prevent decompression bombs if img.width > 10000 or img.height > 10000: raise HTTPException( status_code=status.HTTP_400_BAD_REQUEST, detail='Image dimensions too large. Maximum dimensions: 10000x10000 pixels' ) except Exception as e: raise HTTPException( status_code=status.HTTP_400_BAD_REQUEST, detail=f'Invalid or corrupted image file: {str(e)}' ) return content