This commit is contained in:
Iliyan Angelov
2025-12-01 06:50:10 +02:00
parent 91f51bc6fe
commit 62c1fe5951
4682 changed files with 544807 additions and 31208 deletions

View File

@@ -0,0 +1,28 @@
from pathlib import Path
import logging
logger = logging.getLogger(__name__)
def detect_encoding(file_path: Path) -> str:
"""
UTF-8 is the most common encoding standard, this is a simple
way to improve the support for related Windows based files.
Handles the most common cases efficiently.
"""
try:
with open(file_path, "rb") as f:
# Read first 3 bytes for BOM detection
bom = f.read(3)
# Check most common Windows patterns first
if bom[:2] in (b"\xff\xfe", b"\xfe\xff"):
return "utf-16"
elif bom.startswith(b"\xef\xbb\xbf"):
return "utf-8-sig"
return "utf-8"
except Exception:
logger.exception("Error detecting encoding")
return "utf-8"