149 lines
5.3 KiB
Python
149 lines
5.3 KiB
Python
"""
|
|
File validation utilities for secure file uploads.
|
|
Validates file types using magic bytes (file signatures) to prevent spoofing.
|
|
"""
|
|
from PIL import Image
|
|
import io
|
|
from typing import Tuple, Optional
|
|
from fastapi import UploadFile, HTTPException, status
|
|
|
|
# Magic bytes for common image formats
|
|
IMAGE_MAGIC_BYTES = {
|
|
b'\xFF\xD8\xFF': 'image/jpeg',
|
|
b'\x89\x50\x4E\x47\x0D\x0A\x1A\x0A': 'image/png',
|
|
b'GIF87a': 'image/gif',
|
|
b'GIF89a': 'image/gif',
|
|
b'RIFF': 'image/webp', # WebP files start with RIFF, need deeper check
|
|
b'\x00\x00\x01\x00': 'image/x-icon',
|
|
b'\x00\x00\x02\x00': 'image/x-icon',
|
|
}
|
|
|
|
ALLOWED_IMAGE_TYPES = {'image/jpeg', 'image/png', 'image/gif', 'image/webp'}
|
|
|
|
def validate_image_file_signature(file_content: bytes, filename: str) -> Tuple[bool, str]:
|
|
"""
|
|
Validate file type using magic bytes (file signature).
|
|
This prevents MIME type spoofing attacks.
|
|
|
|
Args:
|
|
file_content: The file content as bytes
|
|
filename: The filename (for extension checking)
|
|
|
|
Returns:
|
|
Tuple of (is_valid, error_message)
|
|
"""
|
|
if not file_content:
|
|
return False, "File is empty"
|
|
|
|
# Check magic bytes for image types
|
|
file_start = file_content[:16] # Check first 16 bytes
|
|
|
|
detected_type = None
|
|
|
|
# Check for JPEG
|
|
if file_content.startswith(b'\xFF\xD8\xFF'):
|
|
detected_type = 'image/jpeg'
|
|
# Check for PNG
|
|
elif file_content.startswith(b'\x89\x50\x4E\x47\x0D\x0A\x1A\x0A'):
|
|
detected_type = 'image/png'
|
|
# Check for GIF
|
|
elif file_content.startswith(b'GIF87a') or file_content.startswith(b'GIF89a'):
|
|
detected_type = 'image/gif'
|
|
# Check for WebP (RIFF header with WEBP in bytes 8-11)
|
|
elif file_content.startswith(b'RIFF') and len(file_content) > 12:
|
|
if file_content[8:12] == b'WEBP':
|
|
detected_type = 'image/webp'
|
|
# Check for ICO
|
|
elif file_content.startswith(b'\x00\x00\x01\x00') or file_content.startswith(b'\x00\x00\x02\x00'):
|
|
detected_type = 'image/x-icon'
|
|
|
|
# If magic bytes don't match known image types, try PIL verification
|
|
if not detected_type:
|
|
try:
|
|
# Try to open with PIL to verify it's a valid image
|
|
img = Image.open(io.BytesIO(file_content))
|
|
img.verify()
|
|
|
|
# Get format from PIL
|
|
img_format = img.format.lower() if img.format else None
|
|
if img_format == 'jpeg':
|
|
detected_type = 'image/jpeg'
|
|
elif img_format == 'png':
|
|
detected_type = 'image/png'
|
|
elif img_format == 'gif':
|
|
detected_type = 'image/gif'
|
|
elif img_format == 'webp':
|
|
detected_type = 'image/webp'
|
|
else:
|
|
return False, f"Unsupported image format: {img_format}"
|
|
except Exception:
|
|
return False, "File is not a valid image or is corrupted"
|
|
|
|
# Verify detected type is in allowed list
|
|
if detected_type not in ALLOWED_IMAGE_TYPES and detected_type != 'image/x-icon':
|
|
return False, f"File type {detected_type} is not allowed. Allowed types: {', '.join(ALLOWED_IMAGE_TYPES)}"
|
|
|
|
return True, detected_type
|
|
|
|
|
|
async def validate_uploaded_image(file: UploadFile, max_size: int) -> bytes:
|
|
"""
|
|
Validate an uploaded image file completely.
|
|
|
|
Args:
|
|
file: FastAPI UploadFile object
|
|
max_size: Maximum file size in bytes
|
|
|
|
Returns:
|
|
File content as bytes
|
|
|
|
Raises:
|
|
HTTPException if validation fails
|
|
"""
|
|
# Check MIME type first (quick check)
|
|
if not file.content_type or not file.content_type.startswith('image/'):
|
|
raise HTTPException(
|
|
status_code=status.HTTP_400_BAD_REQUEST,
|
|
detail=f'File must be an image. Received MIME type: {file.content_type}'
|
|
)
|
|
|
|
# Read file content
|
|
content = await file.read()
|
|
|
|
# Validate file size
|
|
if len(content) > max_size:
|
|
raise HTTPException(
|
|
status_code=status.HTTP_413_REQUEST_ENTITY_TOO_LARGE,
|
|
detail=f'File size ({len(content)} bytes) exceeds maximum allowed size ({max_size} bytes / {max_size // 1024 // 1024}MB)'
|
|
)
|
|
|
|
# Validate file signature (magic bytes)
|
|
is_valid, result = validate_image_file_signature(content, file.filename or '')
|
|
if not is_valid:
|
|
raise HTTPException(
|
|
status_code=status.HTTP_400_BAD_REQUEST,
|
|
detail=f'Invalid file type: {result}. File signature validation failed. Please upload a valid image file.'
|
|
)
|
|
|
|
# Additional PIL validation to ensure image is not corrupted
|
|
try:
|
|
img = Image.open(io.BytesIO(content))
|
|
# Verify image integrity
|
|
img.verify()
|
|
# Re-open for further processing (verify() closes the image)
|
|
img = Image.open(io.BytesIO(content))
|
|
# Check image dimensions to prevent decompression bombs
|
|
if img.width > 10000 or img.height > 10000:
|
|
raise HTTPException(
|
|
status_code=status.HTTP_400_BAD_REQUEST,
|
|
detail='Image dimensions too large. Maximum dimensions: 10000x10000 pixels'
|
|
)
|
|
except Exception as e:
|
|
raise HTTPException(
|
|
status_code=status.HTTP_400_BAD_REQUEST,
|
|
detail=f'Invalid or corrupted image file: {str(e)}'
|
|
)
|
|
|
|
return content
|
|
|