""" Caching middleware for `pip-audit`. """ from __future__ import annotations import logging import os import shutil import subprocess import sys from pathlib import Path from tempfile import NamedTemporaryFile from typing import Any import pip_api import requests from cachecontrol import CacheControl from cachecontrol.caches import FileCache from packaging.version import Version from platformdirs import user_cache_path from pip_audit._service.interface import ServiceError logger = logging.getLogger(__name__) # The `cache dir` command was added to `pip` as of 20.1 so we should check before trying to use it # to discover the `pip` HTTP cache _MINIMUM_PIP_VERSION = Version("20.1") _PIP_VERSION = Version(str(pip_api.PIP_VERSION)) _PIP_AUDIT_LEGACY_INTERNAL_CACHE = Path.home() / ".pip-audit-cache" def _get_pip_cache() -> Path: # Unless the cache directory is specifically set by the `--cache-dir` option, we try to share # the `pip` HTTP cache cmd = [sys.executable, "-m", "pip", "cache", "dir"] try: process = subprocess.run(cmd, check=True, stdout=subprocess.PIPE, stderr=subprocess.DEVNULL) except subprocess.CalledProcessError as cpe: # pragma: no cover # NOTE: This should only happen if pip's cache has been explicitly disabled, # which we check for in the caller (via `PIP_NO_CACHE_DIR`). raise ServiceError(f"Failed to query the `pip` HTTP cache directory: {cmd}") from cpe cache_dir = process.stdout.decode("utf-8").strip("\n") http_cache_dir = Path(cache_dir) / "http" return http_cache_dir def _get_cache_dir(custom_cache_dir: Path | None, *, use_pip: bool = True) -> Path: """ Returns a directory path suitable for HTTP caching. The directory is **not** guaranteed to exist. `use_pip` tells the function to prefer `pip`'s pre-existing cache, **unless** `PIP_NO_CACHE_DIR` is present in the environment. """ # If the user has explicitly requested a directory, pass it through unscathed. if custom_cache_dir is not None: return custom_cache_dir # Retrieve pip-audit's default internal cache using `platformdirs`. pip_audit_cache_dir = user_cache_path("pip-audit", appauthor=False, ensure_exists=True) # If the retrieved cache isn't the legacy one, try to delete the old cache if it exists. if ( _PIP_AUDIT_LEGACY_INTERNAL_CACHE.exists() and pip_audit_cache_dir != _PIP_AUDIT_LEGACY_INTERNAL_CACHE ): shutil.rmtree(_PIP_AUDIT_LEGACY_INTERNAL_CACHE) # Respect pip's PIP_NO_CACHE_DIR environment setting. if use_pip and not os.getenv("PIP_NO_CACHE_DIR"): pip_cache_dir = _get_pip_cache() if _PIP_VERSION >= _MINIMUM_PIP_VERSION else None if pip_cache_dir is not None: return pip_cache_dir else: logger.warning( f"pip {_PIP_VERSION} doesn't support the `cache dir` subcommand, " f"using {pip_audit_cache_dir} instead" ) return pip_audit_cache_dir else: return pip_audit_cache_dir class _SafeFileCache(FileCache): """ A rough mirror of `pip`'s `SafeFileCache` that *should* be runtime-compatible with `pip` (i.e., does not interfere with `pip` when it shares the same caching directory as a running `pip` process). """ def __init__(self, directory: Path): self._logged_warning = False super().__init__(str(directory)) def get(self, key: str) -> Any | None: try: return super().get(key) except Exception as e: # pragma: no cover if not self._logged_warning: logger.warning( f"Failed to read from cache directory, performance may be degraded: {e}" ) self._logged_warning = True return None def set(self, key: str, value: bytes, expires: Any | None = None) -> None: try: self._set_impl(key, value) except Exception as e: # pragma: no cover if not self._logged_warning: logger.warning( f"Failed to write to cache directory, performance may be degraded: {e}" ) self._logged_warning = True def _set_impl(self, key: str, value: bytes) -> None: name: str = super()._fn(key) # Make sure the directory exists try: os.makedirs(os.path.dirname(name), self.dirmode) except OSError: # pragma: no cover pass # We don't want to use lock files since `pip` isn't going to recognise those. We should # write to the cache in a similar way to how `pip` does it. We create a temporary file, # then atomically replace the actual cache key's filename with it. This ensures # that other concurrent `pip` or `pip-audit` instances don't read partial data. with NamedTemporaryFile(delete=False, dir=os.path.dirname(name)) as io: io.write(value) # NOTE(ww): Similar to what `pip` does in `adjacent_tmp_file`. io.flush() os.fsync(io.fileno()) # NOTE(ww): Windows won't let us rename the temporary file until it's closed, # which is why we call `os.replace()` here rather than in the `with` block above. os.replace(io.name, name) def delete(self, key: str) -> None: # pragma: no cover try: super().delete(key) except Exception as e: if not self._logged_warning: logger.warning( f"Failed to delete file from cache directory, performance may be degraded: {e}" ) self._logged_warning = True def caching_session(cache_dir: Path | None, *, use_pip: bool = False) -> requests.Session: """ Return a `requests` style session, with suitable caching middleware. Uses the given `cache_dir` for the HTTP cache. `use_pip` determines how the fallback cache directory is determined, if `cache_dir` is None. When `use_pip` is `False`, `caching_session` will use a `pip-audit` internal cache directory. When `use_pip` is `True`, `caching_session` will attempt to discover `pip`'s cache directory, falling back on the internal `pip-audit` cache directory if the user's version of `pip` is too old. """ # We limit the number of redirects to 5, since the services we connect to # should really never redirect more than once or twice. inner_session = requests.Session() inner_session.max_redirects = 5 return CacheControl( inner_session, cache=_SafeFileCache(_get_cache_dir(cache_dir, use_pip=use_pip)), )