This commit is contained in:
Iliyan Angelov
2025-12-05 17:43:03 +02:00
parent e1988fe37a
commit 13c91f95f4
51 changed files with 11933 additions and 289 deletions

View File

@@ -60,6 +60,25 @@ def sanitize_html(content: Optional[str], strip: bool = False) -> str:
strip_comments=True
)
# Additional link sanitization - ensure external links have rel="noopener noreferrer"
if '<a' in sanitized:
import re
# Add rel="noopener noreferrer" to external links
def add_rel(match):
tag = match.group(0)
if 'href=' in tag and ('http://' in tag or 'https://' in tag):
if 'rel=' not in tag:
# Insert rel attribute before closing >
return tag[:-1] + ' rel="noopener noreferrer">'
elif 'noopener' not in tag and 'noreferrer' not in tag:
# Add to existing rel attribute
tag = tag.replace('rel="', 'rel="noopener noreferrer ')
tag = tag.replace("rel='", "rel='noopener noreferrer ")
return tag
return tag
sanitized = re.sub(r'<a[^>]*>', add_rel, sanitized)
# Linkify URLs (convert plain URLs to links)
# Only linkify if content doesn't already contain HTML links
if '<a' not in sanitized:
@@ -76,6 +95,7 @@ def sanitize_text(content: Optional[str]) -> str:
"""
Strip all HTML tags from content, leaving only plain text.
Useful for fields that should not contain any HTML.
Alias for sanitize_text_for_html for backward compatibility.
Args:
content: The content to sanitize (can be None)
@@ -93,6 +113,21 @@ def sanitize_text(content: Optional[str]) -> str:
return bleach.clean(content, tags=[], strip=True)
def sanitize_text_for_html(text: Optional[str]) -> str:
"""
Escape text content to be safely included in HTML.
Use this for plain text that should be displayed as-is.
Alias for sanitize_text for consistency.
Args:
text: Plain text string to escape
Returns:
HTML-escaped string
"""
return sanitize_text(text)
def sanitize_filename(filename: str) -> str:
"""
Sanitize filename to prevent path traversal and other attacks.