updates
This commit is contained in:
148
Backend/src/utils/file_validation.py
Normal file
148
Backend/src/utils/file_validation.py
Normal file
@@ -0,0 +1,148 @@
|
||||
"""
|
||||
File validation utilities for secure file uploads.
|
||||
Validates file types using magic bytes (file signatures) to prevent spoofing.
|
||||
"""
|
||||
from PIL import Image
|
||||
import io
|
||||
from typing import Tuple, Optional
|
||||
from fastapi import UploadFile, HTTPException, status
|
||||
|
||||
# Magic bytes for common image formats
|
||||
IMAGE_MAGIC_BYTES = {
|
||||
b'\xFF\xD8\xFF': 'image/jpeg',
|
||||
b'\x89\x50\x4E\x47\x0D\x0A\x1A\x0A': 'image/png',
|
||||
b'GIF87a': 'image/gif',
|
||||
b'GIF89a': 'image/gif',
|
||||
b'RIFF': 'image/webp', # WebP files start with RIFF, need deeper check
|
||||
b'\x00\x00\x01\x00': 'image/x-icon',
|
||||
b'\x00\x00\x02\x00': 'image/x-icon',
|
||||
}
|
||||
|
||||
ALLOWED_IMAGE_TYPES = {'image/jpeg', 'image/png', 'image/gif', 'image/webp'}
|
||||
|
||||
def validate_image_file_signature(file_content: bytes, filename: str) -> Tuple[bool, str]:
|
||||
"""
|
||||
Validate file type using magic bytes (file signature).
|
||||
This prevents MIME type spoofing attacks.
|
||||
|
||||
Args:
|
||||
file_content: The file content as bytes
|
||||
filename: The filename (for extension checking)
|
||||
|
||||
Returns:
|
||||
Tuple of (is_valid, error_message)
|
||||
"""
|
||||
if not file_content:
|
||||
return False, "File is empty"
|
||||
|
||||
# Check magic bytes for image types
|
||||
file_start = file_content[:16] # Check first 16 bytes
|
||||
|
||||
detected_type = None
|
||||
|
||||
# Check for JPEG
|
||||
if file_content.startswith(b'\xFF\xD8\xFF'):
|
||||
detected_type = 'image/jpeg'
|
||||
# Check for PNG
|
||||
elif file_content.startswith(b'\x89\x50\x4E\x47\x0D\x0A\x1A\x0A'):
|
||||
detected_type = 'image/png'
|
||||
# Check for GIF
|
||||
elif file_content.startswith(b'GIF87a') or file_content.startswith(b'GIF89a'):
|
||||
detected_type = 'image/gif'
|
||||
# Check for WebP (RIFF header with WEBP in bytes 8-11)
|
||||
elif file_content.startswith(b'RIFF') and len(file_content) > 12:
|
||||
if file_content[8:12] == b'WEBP':
|
||||
detected_type = 'image/webp'
|
||||
# Check for ICO
|
||||
elif file_content.startswith(b'\x00\x00\x01\x00') or file_content.startswith(b'\x00\x00\x02\x00'):
|
||||
detected_type = 'image/x-icon'
|
||||
|
||||
# If magic bytes don't match known image types, try PIL verification
|
||||
if not detected_type:
|
||||
try:
|
||||
# Try to open with PIL to verify it's a valid image
|
||||
img = Image.open(io.BytesIO(file_content))
|
||||
img.verify()
|
||||
|
||||
# Get format from PIL
|
||||
img_format = img.format.lower() if img.format else None
|
||||
if img_format == 'jpeg':
|
||||
detected_type = 'image/jpeg'
|
||||
elif img_format == 'png':
|
||||
detected_type = 'image/png'
|
||||
elif img_format == 'gif':
|
||||
detected_type = 'image/gif'
|
||||
elif img_format == 'webp':
|
||||
detected_type = 'image/webp'
|
||||
else:
|
||||
return False, f"Unsupported image format: {img_format}"
|
||||
except Exception:
|
||||
return False, "File is not a valid image or is corrupted"
|
||||
|
||||
# Verify detected type is in allowed list
|
||||
if detected_type not in ALLOWED_IMAGE_TYPES and detected_type != 'image/x-icon':
|
||||
return False, f"File type {detected_type} is not allowed. Allowed types: {', '.join(ALLOWED_IMAGE_TYPES)}"
|
||||
|
||||
return True, detected_type
|
||||
|
||||
|
||||
async def validate_uploaded_image(file: UploadFile, max_size: int) -> bytes:
|
||||
"""
|
||||
Validate an uploaded image file completely.
|
||||
|
||||
Args:
|
||||
file: FastAPI UploadFile object
|
||||
max_size: Maximum file size in bytes
|
||||
|
||||
Returns:
|
||||
File content as bytes
|
||||
|
||||
Raises:
|
||||
HTTPException if validation fails
|
||||
"""
|
||||
# Check MIME type first (quick check)
|
||||
if not file.content_type or not file.content_type.startswith('image/'):
|
||||
raise HTTPException(
|
||||
status_code=status.HTTP_400_BAD_REQUEST,
|
||||
detail=f'File must be an image. Received MIME type: {file.content_type}'
|
||||
)
|
||||
|
||||
# Read file content
|
||||
content = await file.read()
|
||||
|
||||
# Validate file size
|
||||
if len(content) > max_size:
|
||||
raise HTTPException(
|
||||
status_code=status.HTTP_413_REQUEST_ENTITY_TOO_LARGE,
|
||||
detail=f'File size ({len(content)} bytes) exceeds maximum allowed size ({max_size} bytes / {max_size // 1024 // 1024}MB)'
|
||||
)
|
||||
|
||||
# Validate file signature (magic bytes)
|
||||
is_valid, result = validate_image_file_signature(content, file.filename or '')
|
||||
if not is_valid:
|
||||
raise HTTPException(
|
||||
status_code=status.HTTP_400_BAD_REQUEST,
|
||||
detail=f'Invalid file type: {result}. File signature validation failed. Please upload a valid image file.'
|
||||
)
|
||||
|
||||
# Additional PIL validation to ensure image is not corrupted
|
||||
try:
|
||||
img = Image.open(io.BytesIO(content))
|
||||
# Verify image integrity
|
||||
img.verify()
|
||||
# Re-open for further processing (verify() closes the image)
|
||||
img = Image.open(io.BytesIO(content))
|
||||
# Check image dimensions to prevent decompression bombs
|
||||
if img.width > 10000 or img.height > 10000:
|
||||
raise HTTPException(
|
||||
status_code=status.HTTP_400_BAD_REQUEST,
|
||||
detail='Image dimensions too large. Maximum dimensions: 10000x10000 pixels'
|
||||
)
|
||||
except Exception as e:
|
||||
raise HTTPException(
|
||||
status_code=status.HTTP_400_BAD_REQUEST,
|
||||
detail=f'Invalid or corrupted image file: {str(e)}'
|
||||
)
|
||||
|
||||
return content
|
||||
|
||||
Reference in New Issue
Block a user