This commit is contained in:
Iliyan Angelov
2025-12-01 06:50:10 +02:00
parent 91f51bc6fe
commit 62c1fe5951
4682 changed files with 544807 additions and 31208 deletions

View File

@@ -0,0 +1,27 @@
"""
Vulnerability service interfaces and implementations for `pip-audit`.
"""
from .interface import (
ConnectionError,
Dependency,
ResolvedDependency,
ServiceError,
SkippedDependency,
VulnerabilityResult,
VulnerabilityService,
)
from .osv import OsvService
from .pypi import PyPIService
__all__ = [
"ConnectionError",
"Dependency",
"ResolvedDependency",
"ServiceError",
"SkippedDependency",
"VulnerabilityResult",
"VulnerabilityService",
"OsvService",
"PyPIService",
]

View File

@@ -0,0 +1,190 @@
"""
Interfaces for interacting with vulnerability services, i.e. sources
of vulnerability information for fully resolved Python packages.
"""
from __future__ import annotations
from abc import ABC, abstractmethod
from collections.abc import Iterator
from dataclasses import dataclass, replace
from datetime import datetime
from typing import Any, NewType
from packaging.utils import canonicalize_name
from packaging.version import Version
VulnerabilityID = NewType("VulnerabilityID", str)
@dataclass(frozen=True)
class Dependency:
"""
Represents an abstract Python package.
This class cannot be constructed directly.
"""
name: str
"""
The package's **uncanonicalized** name.
Use the `canonicalized_name` property when a canonicalized form is necessary.
"""
def __init__(self, *_args: Any, **_kwargs: Any) -> None:
"""
A stub constructor that always fails.
"""
raise NotImplementedError
# TODO(ww): Use functools.cached_property when supported Python is 3.8+.
@property
def canonical_name(self) -> str:
"""
The `Dependency`'s PEP-503 canonicalized name.
"""
return canonicalize_name(self.name)
def is_skipped(self) -> bool:
"""
Check whether the `Dependency` was skipped by the audit.
"""
return self.__class__ is SkippedDependency
@dataclass(frozen=True)
class ResolvedDependency(Dependency):
"""
Represents a fully resolved Python package.
"""
version: Version
@dataclass(frozen=True)
class SkippedDependency(Dependency):
"""
Represents a Python package that was unable to be audited and therefore, skipped.
"""
skip_reason: str
@dataclass(frozen=True)
class VulnerabilityResult:
"""
Represents a "result" from a vulnerability service, indicating a vulnerability
in some Python package.
"""
id: VulnerabilityID
"""
A service-provided identifier for the vulnerability.
"""
description: str
"""
A human-readable description of the vulnerability.
"""
fix_versions: list[Version]
"""
A list of versions that can be upgraded to that resolve the vulnerability.
"""
aliases: set[str]
"""
A set of aliases (alternative identifiers) for this result.
"""
published: datetime | None = None
"""
When the vulnerability was first published.
"""
def alias_of(self, other: VulnerabilityResult) -> bool:
"""
Returns whether this result is an "alias" of another result.
Two results are said to be aliases if their respective sets of
`{id, *aliases}` intersect at all. A result is therefore its own alias.
"""
return bool((self.aliases | {self.id}).intersection(other.aliases | {other.id}))
def merge_aliases(self, other: VulnerabilityResult) -> VulnerabilityResult:
"""
Merge `other`'s aliases into this result, returning a new result.
"""
# Our own ID should never occur in the alias set.
aliases = self.aliases | other.aliases - {self.id}
return replace(self, aliases=aliases)
def has_any_id(self, ids: set[str]) -> bool:
"""
Returns whether ids intersects with {id} | aliases.
"""
return bool(ids & (self.aliases | {self.id}))
class VulnerabilityService(ABC):
"""
Represents an abstract provider of Python package vulnerability information.
"""
@abstractmethod
def query(
self, spec: Dependency
) -> tuple[Dependency, list[VulnerabilityResult]]: # pragma: no cover
"""
Query the `VulnerabilityService` for information about the given `Dependency`,
returning a list of `VulnerabilityResult`.
"""
raise NotImplementedError
def query_all(
self, specs: Iterator[Dependency]
) -> Iterator[tuple[Dependency, list[VulnerabilityResult]]]:
"""
Query the vulnerability service for information on multiple dependencies.
`VulnerabilityService` implementations can override this implementation with
a more optimized one, if they support batched or bulk requests.
"""
for spec in specs:
yield self.query(spec)
@staticmethod
def _parse_rfc3339(dt: str | None) -> datetime | None:
if dt is None:
return None
# NOTE: OSV's schema says timestamps are RFC3339 but strptime
# has no way to indicate an optional field (like `%f`), so
# we have to try-and-retry with the two different expected formats.
# See: https://github.com/google/osv.dev/issues/857
try:
return datetime.strptime(dt, "%Y-%m-%dT%H:%M:%S.%fZ")
except ValueError:
return datetime.strptime(dt, "%Y-%m-%dT%H:%M:%SZ")
class ServiceError(Exception):
"""
Raised when a `VulnerabilityService` fails, for any reason.
Concrete implementations of `VulnerabilityService` are expected to subclass
this exception to provide more context.
"""
pass
class ConnectionError(ServiceError):
"""
A specialization of `ServiceError` specifically for cases where the
vulnerability service is unreachable or offline.
"""
pass

View File

@@ -0,0 +1,155 @@
"""
Functionality for using the [OSV](https://osv.dev/) API as a `VulnerabilityService`.
"""
from __future__ import annotations
import json
import logging
from pathlib import Path
from typing import Any, cast
import requests
from packaging.version import Version
from pip_audit._cache import caching_session
from pip_audit._service.interface import (
ConnectionError,
Dependency,
ResolvedDependency,
ServiceError,
VulnerabilityResult,
VulnerabilityService,
)
logger = logging.getLogger(__name__)
class OsvService(VulnerabilityService):
"""
An implementation of `VulnerabilityService` that uses OSV to provide Python
package vulnerability information.
"""
def __init__(self, cache_dir: Path | None = None, timeout: int | None = None):
"""
Create a new `OsvService`.
`cache_dir` is an optional cache directory to use, for caching and reusing OSV API
requests. If `None`, `pip-audit` will use its own internal caching directory.
`timeout` is an optional argument to control how many seconds the component should wait for
responses to network requests.
"""
self.session = caching_session(cache_dir, use_pip=False)
self.timeout = timeout
def query(self, spec: Dependency) -> tuple[Dependency, list[VulnerabilityResult]]:
"""
Queries OSV for the given `Dependency` specification.
See `VulnerabilityService.query`.
"""
if spec.is_skipped():
return spec, []
spec = cast(ResolvedDependency, spec)
url = "https://api.osv.dev/v1/query"
query = {
"package": {"name": spec.canonical_name, "ecosystem": "PyPI"},
"version": str(spec.version),
}
try:
response: requests.Response = self.session.post(
url=url,
data=json.dumps(query),
timeout=self.timeout,
)
response.raise_for_status()
except requests.ConnectTimeout:
raise ConnectionError("Could not connect to OSV's vulnerability feed")
except requests.HTTPError as http_error:
raise ServiceError from http_error
# If the response is empty, that means that the package/version pair doesn't have any
# associated vulnerabilities
#
# In that case, return an empty list
results: list[VulnerabilityResult] = []
response_json = response.json()
if not response_json:
return spec, results
vuln: dict[str, Any]
for vuln in response_json["vulns"]:
# Sanity check: only the v1 schema is specified at the moment,
# and the code below probably won't work with future incompatible
# schemas without additional changes.
# The absence of a schema is treated as 1.0.0, per the OSV spec.
schema_version = Version(vuln.get("schema_version", "1.0.0"))
if schema_version.major != 1:
logger.warning(f"Unsupported OSV schema version: {schema_version}")
continue
id = vuln["id"]
# If the vulnerability has been withdrawn, we skip it entirely.
withdrawn_at = vuln.get("withdrawn")
if withdrawn_at is not None:
logger.debug(f"OSV vuln entry '{id}' marked as withdrawn at {withdrawn_at}")
continue
# The summary is intended to be shorter, so we prefer it over
# details, if present. However, neither is required.
description = vuln.get("summary")
if description is None:
description = vuln.get("details")
if description is None:
description = "N/A"
# The "summary" field should be a single line, but "details" might
# be multiple (Markdown-formatted) lines. So, we normalize our
# description into a single line (and potentially break the Markdown
# formatting in the process).
description = description.replace("\n", " ")
# OSV doesn't mandate this field either. There's very little we
# can do without it, so we skip any results that are missing it.
affecteds = vuln.get("affected")
if affecteds is None:
logger.warning(f"OSV vuln entry '{id}' is missing 'affected' list")
continue
fix_versions: list[Version] = []
for affected in affecteds:
pkg = affected["package"]
# We only care about PyPI versions
if pkg["name"] == spec.canonical_name and pkg["ecosystem"] == "PyPI":
for ranges in affected["ranges"]:
if ranges["type"] == "ECOSYSTEM":
# Filter out non-fix versions
fix_version_strs = [
version["fixed"]
for version in ranges["events"]
if "fixed" in version
]
# Convert them to version objects
fix_versions = [
Version(version_str) for version_str in fix_version_strs
]
break
# The ranges aren't guaranteed to come in chronological order
fix_versions.sort()
results.append(
VulnerabilityResult(
id=id,
description=description,
fix_versions=fix_versions,
aliases=set(vuln.get("aliases", [])),
published=self._parse_rfc3339(vuln.get("published")),
)
)
return spec, results

View File

@@ -0,0 +1,135 @@
"""
Functionality for using the [PyPI](https://warehouse.pypa.io/api-reference/json.html)
API as a `VulnerabilityService`.
"""
from __future__ import annotations
import logging
from pathlib import Path
from typing import cast
import requests
from packaging.version import InvalidVersion, Version
from pip_audit._cache import caching_session
from pip_audit._service.interface import (
ConnectionError,
Dependency,
ResolvedDependency,
ServiceError,
SkippedDependency,
VulnerabilityResult,
VulnerabilityService,
)
logger = logging.getLogger(__name__)
class PyPIService(VulnerabilityService):
"""
An implementation of `VulnerabilityService` that uses PyPI to provide Python
package vulnerability information.
"""
def __init__(self, cache_dir: Path | None = None, timeout: int | None = None) -> None:
"""
Create a new `PyPIService`.
`cache_dir` is an optional cache directory to use, for caching and reusing PyPI API
requests. If `None`, `pip-audit` will attempt to use `pip`'s cache directory before falling
back on its own default cache directory.
`timeout` is an optional argument to control how many seconds the component should wait for
responses to network requests.
"""
self.session = caching_session(cache_dir)
self.timeout = timeout
def query(self, spec: Dependency) -> tuple[Dependency, list[VulnerabilityResult]]:
"""
Queries PyPI for the given `Dependency` specification.
See `VulnerabilityService.query`.
"""
if spec.is_skipped():
return spec, []
spec = cast(ResolvedDependency, spec)
url = f"https://pypi.org/pypi/{spec.canonical_name}/{str(spec.version)}/json"
try:
response: requests.Response = self.session.get(url=url, timeout=self.timeout)
response.raise_for_status()
except requests.TooManyRedirects:
# This should never happen with a healthy PyPI instance, but might
# happen during an outage or network event.
# Ref 2022-06-10: https://status.python.org/incidents/lgpr13fy71bk
raise ConnectionError("PyPI is not redirecting properly")
except requests.ConnectTimeout:
# Apart from a normal network outage, this can happen for two main
# reasons:
# 1. PyPI's APIs are offline
# 2. The user is behind a firewall or corporate network that blocks
# PyPI (and they're probably using custom indices)
raise ConnectionError("Could not connect to PyPI's vulnerability feed")
except requests.HTTPError as http_error:
if response.status_code == 404:
skip_reason = (
"Dependency not found on PyPI and could not be audited: "
f"{spec.canonical_name} ({spec.version})"
)
logger.debug(skip_reason)
return SkippedDependency(name=spec.name, skip_reason=skip_reason), []
raise ServiceError from http_error
response_json = response.json()
results: list[VulnerabilityResult] = []
vulns = response_json.get("vulnerabilities")
# No `vulnerabilities` key means that there are no vulnerabilities for any version
if vulns is None:
return spec, results
for v in vulns:
id = v["id"]
# If the vulnerability has been withdrawn, we skip it entirely.
withdrawn_at = v.get("withdrawn")
if withdrawn_at is not None:
logger.debug(f"PyPI vuln entry '{id}' marked as withdrawn at {withdrawn_at}")
continue
# Put together the fix versions list
try:
fix_versions = [Version(fixed_in) for fixed_in in v["fixed_in"]]
except InvalidVersion as iv:
raise ServiceError(f"Received malformed version from PyPI: {v['fixed_in']}") from iv
# The ranges aren't guaranteed to come in chronological order
fix_versions.sort()
description = v.get("summary")
if description is None:
description = v.get("details")
if description is None:
description = "N/A"
# The "summary" field should be a single line, but "details" might
# be multiple (Markdown-formatted) lines. So, we normalize our
# description into a single line (and potentially break the Markdown
# formatting in the process).
description = description.replace("\n", " ")
results.append(
VulnerabilityResult(
id=id,
description=description,
fix_versions=fix_versions,
aliases=set(v["aliases"]),
published=self._parse_rfc3339(v.get("published")),
)
)
return spec, results