Files
Hotel-Booking/Backend/src/utils/file_validation.py
Iliyan Angelov 312f85530c updates
2025-11-28 02:40:05 +02:00

149 lines
5.3 KiB
Python

"""
File validation utilities for secure file uploads.
Validates file types using magic bytes (file signatures) to prevent spoofing.
"""
from PIL import Image
import io
from typing import Tuple, Optional
from fastapi import UploadFile, HTTPException, status
# Magic bytes for common image formats
IMAGE_MAGIC_BYTES = {
b'\xFF\xD8\xFF': 'image/jpeg',
b'\x89\x50\x4E\x47\x0D\x0A\x1A\x0A': 'image/png',
b'GIF87a': 'image/gif',
b'GIF89a': 'image/gif',
b'RIFF': 'image/webp', # WebP files start with RIFF, need deeper check
b'\x00\x00\x01\x00': 'image/x-icon',
b'\x00\x00\x02\x00': 'image/x-icon',
}
ALLOWED_IMAGE_TYPES = {'image/jpeg', 'image/png', 'image/gif', 'image/webp'}
def validate_image_file_signature(file_content: bytes, filename: str) -> Tuple[bool, str]:
"""
Validate file type using magic bytes (file signature).
This prevents MIME type spoofing attacks.
Args:
file_content: The file content as bytes
filename: The filename (for extension checking)
Returns:
Tuple of (is_valid, error_message)
"""
if not file_content:
return False, "File is empty"
# Check magic bytes for image types
file_start = file_content[:16] # Check first 16 bytes
detected_type = None
# Check for JPEG
if file_content.startswith(b'\xFF\xD8\xFF'):
detected_type = 'image/jpeg'
# Check for PNG
elif file_content.startswith(b'\x89\x50\x4E\x47\x0D\x0A\x1A\x0A'):
detected_type = 'image/png'
# Check for GIF
elif file_content.startswith(b'GIF87a') or file_content.startswith(b'GIF89a'):
detected_type = 'image/gif'
# Check for WebP (RIFF header with WEBP in bytes 8-11)
elif file_content.startswith(b'RIFF') and len(file_content) > 12:
if file_content[8:12] == b'WEBP':
detected_type = 'image/webp'
# Check for ICO
elif file_content.startswith(b'\x00\x00\x01\x00') or file_content.startswith(b'\x00\x00\x02\x00'):
detected_type = 'image/x-icon'
# If magic bytes don't match known image types, try PIL verification
if not detected_type:
try:
# Try to open with PIL to verify it's a valid image
img = Image.open(io.BytesIO(file_content))
img.verify()
# Get format from PIL
img_format = img.format.lower() if img.format else None
if img_format == 'jpeg':
detected_type = 'image/jpeg'
elif img_format == 'png':
detected_type = 'image/png'
elif img_format == 'gif':
detected_type = 'image/gif'
elif img_format == 'webp':
detected_type = 'image/webp'
else:
return False, f"Unsupported image format: {img_format}"
except Exception:
return False, "File is not a valid image or is corrupted"
# Verify detected type is in allowed list
if detected_type not in ALLOWED_IMAGE_TYPES and detected_type != 'image/x-icon':
return False, f"File type {detected_type} is not allowed. Allowed types: {', '.join(ALLOWED_IMAGE_TYPES)}"
return True, detected_type
async def validate_uploaded_image(file: UploadFile, max_size: int) -> bytes:
"""
Validate an uploaded image file completely.
Args:
file: FastAPI UploadFile object
max_size: Maximum file size in bytes
Returns:
File content as bytes
Raises:
HTTPException if validation fails
"""
# Check MIME type first (quick check)
if not file.content_type or not file.content_type.startswith('image/'):
raise HTTPException(
status_code=status.HTTP_400_BAD_REQUEST,
detail=f'File must be an image. Received MIME type: {file.content_type}'
)
# Read file content
content = await file.read()
# Validate file size
if len(content) > max_size:
raise HTTPException(
status_code=status.HTTP_413_REQUEST_ENTITY_TOO_LARGE,
detail=f'File size ({len(content)} bytes) exceeds maximum allowed size ({max_size} bytes / {max_size // 1024 // 1024}MB)'
)
# Validate file signature (magic bytes)
is_valid, result = validate_image_file_signature(content, file.filename or '')
if not is_valid:
raise HTTPException(
status_code=status.HTTP_400_BAD_REQUEST,
detail=f'Invalid file type: {result}. File signature validation failed. Please upload a valid image file.'
)
# Additional PIL validation to ensure image is not corrupted
try:
img = Image.open(io.BytesIO(content))
# Verify image integrity
img.verify()
# Re-open for further processing (verify() closes the image)
img = Image.open(io.BytesIO(content))
# Check image dimensions to prevent decompression bombs
if img.width > 10000 or img.height > 10000:
raise HTTPException(
status_code=status.HTTP_400_BAD_REQUEST,
detail='Image dimensions too large. Maximum dimensions: 10000x10000 pixels'
)
except Exception as e:
raise HTTPException(
status_code=status.HTTP_400_BAD_REQUEST,
detail=f'Invalid or corrupted image file: {str(e)}'
)
return content