updates
This commit is contained in:
@@ -0,0 +1,27 @@
|
||||
"""
|
||||
Vulnerability service interfaces and implementations for `pip-audit`.
|
||||
"""
|
||||
|
||||
from .interface import (
|
||||
ConnectionError,
|
||||
Dependency,
|
||||
ResolvedDependency,
|
||||
ServiceError,
|
||||
SkippedDependency,
|
||||
VulnerabilityResult,
|
||||
VulnerabilityService,
|
||||
)
|
||||
from .osv import OsvService
|
||||
from .pypi import PyPIService
|
||||
|
||||
__all__ = [
|
||||
"ConnectionError",
|
||||
"Dependency",
|
||||
"ResolvedDependency",
|
||||
"ServiceError",
|
||||
"SkippedDependency",
|
||||
"VulnerabilityResult",
|
||||
"VulnerabilityService",
|
||||
"OsvService",
|
||||
"PyPIService",
|
||||
]
|
||||
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
@@ -0,0 +1,190 @@
|
||||
"""
|
||||
Interfaces for interacting with vulnerability services, i.e. sources
|
||||
of vulnerability information for fully resolved Python packages.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
from abc import ABC, abstractmethod
|
||||
from collections.abc import Iterator
|
||||
from dataclasses import dataclass, replace
|
||||
from datetime import datetime
|
||||
from typing import Any, NewType
|
||||
|
||||
from packaging.utils import canonicalize_name
|
||||
from packaging.version import Version
|
||||
|
||||
VulnerabilityID = NewType("VulnerabilityID", str)
|
||||
|
||||
|
||||
@dataclass(frozen=True)
|
||||
class Dependency:
|
||||
"""
|
||||
Represents an abstract Python package.
|
||||
|
||||
This class cannot be constructed directly.
|
||||
"""
|
||||
|
||||
name: str
|
||||
"""
|
||||
The package's **uncanonicalized** name.
|
||||
|
||||
Use the `canonicalized_name` property when a canonicalized form is necessary.
|
||||
"""
|
||||
|
||||
def __init__(self, *_args: Any, **_kwargs: Any) -> None:
|
||||
"""
|
||||
A stub constructor that always fails.
|
||||
"""
|
||||
raise NotImplementedError
|
||||
|
||||
# TODO(ww): Use functools.cached_property when supported Python is 3.8+.
|
||||
@property
|
||||
def canonical_name(self) -> str:
|
||||
"""
|
||||
The `Dependency`'s PEP-503 canonicalized name.
|
||||
"""
|
||||
return canonicalize_name(self.name)
|
||||
|
||||
def is_skipped(self) -> bool:
|
||||
"""
|
||||
Check whether the `Dependency` was skipped by the audit.
|
||||
"""
|
||||
return self.__class__ is SkippedDependency
|
||||
|
||||
|
||||
@dataclass(frozen=True)
|
||||
class ResolvedDependency(Dependency):
|
||||
"""
|
||||
Represents a fully resolved Python package.
|
||||
"""
|
||||
|
||||
version: Version
|
||||
|
||||
|
||||
@dataclass(frozen=True)
|
||||
class SkippedDependency(Dependency):
|
||||
"""
|
||||
Represents a Python package that was unable to be audited and therefore, skipped.
|
||||
"""
|
||||
|
||||
skip_reason: str
|
||||
|
||||
|
||||
@dataclass(frozen=True)
|
||||
class VulnerabilityResult:
|
||||
"""
|
||||
Represents a "result" from a vulnerability service, indicating a vulnerability
|
||||
in some Python package.
|
||||
"""
|
||||
|
||||
id: VulnerabilityID
|
||||
"""
|
||||
A service-provided identifier for the vulnerability.
|
||||
"""
|
||||
|
||||
description: str
|
||||
"""
|
||||
A human-readable description of the vulnerability.
|
||||
"""
|
||||
|
||||
fix_versions: list[Version]
|
||||
"""
|
||||
A list of versions that can be upgraded to that resolve the vulnerability.
|
||||
"""
|
||||
|
||||
aliases: set[str]
|
||||
"""
|
||||
A set of aliases (alternative identifiers) for this result.
|
||||
"""
|
||||
|
||||
published: datetime | None = None
|
||||
"""
|
||||
When the vulnerability was first published.
|
||||
"""
|
||||
|
||||
def alias_of(self, other: VulnerabilityResult) -> bool:
|
||||
"""
|
||||
Returns whether this result is an "alias" of another result.
|
||||
|
||||
Two results are said to be aliases if their respective sets of
|
||||
`{id, *aliases}` intersect at all. A result is therefore its own alias.
|
||||
"""
|
||||
return bool((self.aliases | {self.id}).intersection(other.aliases | {other.id}))
|
||||
|
||||
def merge_aliases(self, other: VulnerabilityResult) -> VulnerabilityResult:
|
||||
"""
|
||||
Merge `other`'s aliases into this result, returning a new result.
|
||||
"""
|
||||
|
||||
# Our own ID should never occur in the alias set.
|
||||
aliases = self.aliases | other.aliases - {self.id}
|
||||
return replace(self, aliases=aliases)
|
||||
|
||||
def has_any_id(self, ids: set[str]) -> bool:
|
||||
"""
|
||||
Returns whether ids intersects with {id} | aliases.
|
||||
"""
|
||||
return bool(ids & (self.aliases | {self.id}))
|
||||
|
||||
|
||||
class VulnerabilityService(ABC):
|
||||
"""
|
||||
Represents an abstract provider of Python package vulnerability information.
|
||||
"""
|
||||
|
||||
@abstractmethod
|
||||
def query(
|
||||
self, spec: Dependency
|
||||
) -> tuple[Dependency, list[VulnerabilityResult]]: # pragma: no cover
|
||||
"""
|
||||
Query the `VulnerabilityService` for information about the given `Dependency`,
|
||||
returning a list of `VulnerabilityResult`.
|
||||
"""
|
||||
raise NotImplementedError
|
||||
|
||||
def query_all(
|
||||
self, specs: Iterator[Dependency]
|
||||
) -> Iterator[tuple[Dependency, list[VulnerabilityResult]]]:
|
||||
"""
|
||||
Query the vulnerability service for information on multiple dependencies.
|
||||
|
||||
`VulnerabilityService` implementations can override this implementation with
|
||||
a more optimized one, if they support batched or bulk requests.
|
||||
"""
|
||||
for spec in specs:
|
||||
yield self.query(spec)
|
||||
|
||||
@staticmethod
|
||||
def _parse_rfc3339(dt: str | None) -> datetime | None:
|
||||
if dt is None:
|
||||
return None
|
||||
|
||||
# NOTE: OSV's schema says timestamps are RFC3339 but strptime
|
||||
# has no way to indicate an optional field (like `%f`), so
|
||||
# we have to try-and-retry with the two different expected formats.
|
||||
# See: https://github.com/google/osv.dev/issues/857
|
||||
try:
|
||||
return datetime.strptime(dt, "%Y-%m-%dT%H:%M:%S.%fZ")
|
||||
except ValueError:
|
||||
return datetime.strptime(dt, "%Y-%m-%dT%H:%M:%SZ")
|
||||
|
||||
|
||||
class ServiceError(Exception):
|
||||
"""
|
||||
Raised when a `VulnerabilityService` fails, for any reason.
|
||||
|
||||
Concrete implementations of `VulnerabilityService` are expected to subclass
|
||||
this exception to provide more context.
|
||||
"""
|
||||
|
||||
pass
|
||||
|
||||
|
||||
class ConnectionError(ServiceError):
|
||||
"""
|
||||
A specialization of `ServiceError` specifically for cases where the
|
||||
vulnerability service is unreachable or offline.
|
||||
"""
|
||||
|
||||
pass
|
||||
@@ -0,0 +1,155 @@
|
||||
"""
|
||||
Functionality for using the [OSV](https://osv.dev/) API as a `VulnerabilityService`.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import json
|
||||
import logging
|
||||
from pathlib import Path
|
||||
from typing import Any, cast
|
||||
|
||||
import requests
|
||||
from packaging.version import Version
|
||||
|
||||
from pip_audit._cache import caching_session
|
||||
from pip_audit._service.interface import (
|
||||
ConnectionError,
|
||||
Dependency,
|
||||
ResolvedDependency,
|
||||
ServiceError,
|
||||
VulnerabilityResult,
|
||||
VulnerabilityService,
|
||||
)
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
class OsvService(VulnerabilityService):
|
||||
"""
|
||||
An implementation of `VulnerabilityService` that uses OSV to provide Python
|
||||
package vulnerability information.
|
||||
"""
|
||||
|
||||
def __init__(self, cache_dir: Path | None = None, timeout: int | None = None):
|
||||
"""
|
||||
Create a new `OsvService`.
|
||||
|
||||
`cache_dir` is an optional cache directory to use, for caching and reusing OSV API
|
||||
requests. If `None`, `pip-audit` will use its own internal caching directory.
|
||||
|
||||
`timeout` is an optional argument to control how many seconds the component should wait for
|
||||
responses to network requests.
|
||||
"""
|
||||
self.session = caching_session(cache_dir, use_pip=False)
|
||||
self.timeout = timeout
|
||||
|
||||
def query(self, spec: Dependency) -> tuple[Dependency, list[VulnerabilityResult]]:
|
||||
"""
|
||||
Queries OSV for the given `Dependency` specification.
|
||||
|
||||
See `VulnerabilityService.query`.
|
||||
"""
|
||||
if spec.is_skipped():
|
||||
return spec, []
|
||||
spec = cast(ResolvedDependency, spec)
|
||||
|
||||
url = "https://api.osv.dev/v1/query"
|
||||
query = {
|
||||
"package": {"name": spec.canonical_name, "ecosystem": "PyPI"},
|
||||
"version": str(spec.version),
|
||||
}
|
||||
try:
|
||||
response: requests.Response = self.session.post(
|
||||
url=url,
|
||||
data=json.dumps(query),
|
||||
timeout=self.timeout,
|
||||
)
|
||||
response.raise_for_status()
|
||||
except requests.ConnectTimeout:
|
||||
raise ConnectionError("Could not connect to OSV's vulnerability feed")
|
||||
except requests.HTTPError as http_error:
|
||||
raise ServiceError from http_error
|
||||
|
||||
# If the response is empty, that means that the package/version pair doesn't have any
|
||||
# associated vulnerabilities
|
||||
#
|
||||
# In that case, return an empty list
|
||||
results: list[VulnerabilityResult] = []
|
||||
response_json = response.json()
|
||||
if not response_json:
|
||||
return spec, results
|
||||
|
||||
vuln: dict[str, Any]
|
||||
for vuln in response_json["vulns"]:
|
||||
# Sanity check: only the v1 schema is specified at the moment,
|
||||
# and the code below probably won't work with future incompatible
|
||||
# schemas without additional changes.
|
||||
# The absence of a schema is treated as 1.0.0, per the OSV spec.
|
||||
schema_version = Version(vuln.get("schema_version", "1.0.0"))
|
||||
if schema_version.major != 1:
|
||||
logger.warning(f"Unsupported OSV schema version: {schema_version}")
|
||||
continue
|
||||
|
||||
id = vuln["id"]
|
||||
|
||||
# If the vulnerability has been withdrawn, we skip it entirely.
|
||||
withdrawn_at = vuln.get("withdrawn")
|
||||
if withdrawn_at is not None:
|
||||
logger.debug(f"OSV vuln entry '{id}' marked as withdrawn at {withdrawn_at}")
|
||||
continue
|
||||
|
||||
# The summary is intended to be shorter, so we prefer it over
|
||||
# details, if present. However, neither is required.
|
||||
description = vuln.get("summary")
|
||||
if description is None:
|
||||
description = vuln.get("details")
|
||||
if description is None:
|
||||
description = "N/A"
|
||||
|
||||
# The "summary" field should be a single line, but "details" might
|
||||
# be multiple (Markdown-formatted) lines. So, we normalize our
|
||||
# description into a single line (and potentially break the Markdown
|
||||
# formatting in the process).
|
||||
description = description.replace("\n", " ")
|
||||
|
||||
# OSV doesn't mandate this field either. There's very little we
|
||||
# can do without it, so we skip any results that are missing it.
|
||||
affecteds = vuln.get("affected")
|
||||
if affecteds is None:
|
||||
logger.warning(f"OSV vuln entry '{id}' is missing 'affected' list")
|
||||
continue
|
||||
|
||||
fix_versions: list[Version] = []
|
||||
for affected in affecteds:
|
||||
pkg = affected["package"]
|
||||
# We only care about PyPI versions
|
||||
if pkg["name"] == spec.canonical_name and pkg["ecosystem"] == "PyPI":
|
||||
for ranges in affected["ranges"]:
|
||||
if ranges["type"] == "ECOSYSTEM":
|
||||
# Filter out non-fix versions
|
||||
fix_version_strs = [
|
||||
version["fixed"]
|
||||
for version in ranges["events"]
|
||||
if "fixed" in version
|
||||
]
|
||||
# Convert them to version objects
|
||||
fix_versions = [
|
||||
Version(version_str) for version_str in fix_version_strs
|
||||
]
|
||||
break
|
||||
|
||||
# The ranges aren't guaranteed to come in chronological order
|
||||
fix_versions.sort()
|
||||
|
||||
results.append(
|
||||
VulnerabilityResult(
|
||||
id=id,
|
||||
description=description,
|
||||
fix_versions=fix_versions,
|
||||
aliases=set(vuln.get("aliases", [])),
|
||||
published=self._parse_rfc3339(vuln.get("published")),
|
||||
)
|
||||
)
|
||||
|
||||
return spec, results
|
||||
@@ -0,0 +1,135 @@
|
||||
"""
|
||||
Functionality for using the [PyPI](https://warehouse.pypa.io/api-reference/json.html)
|
||||
API as a `VulnerabilityService`.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import logging
|
||||
from pathlib import Path
|
||||
from typing import cast
|
||||
|
||||
import requests
|
||||
from packaging.version import InvalidVersion, Version
|
||||
|
||||
from pip_audit._cache import caching_session
|
||||
from pip_audit._service.interface import (
|
||||
ConnectionError,
|
||||
Dependency,
|
||||
ResolvedDependency,
|
||||
ServiceError,
|
||||
SkippedDependency,
|
||||
VulnerabilityResult,
|
||||
VulnerabilityService,
|
||||
)
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
class PyPIService(VulnerabilityService):
|
||||
"""
|
||||
An implementation of `VulnerabilityService` that uses PyPI to provide Python
|
||||
package vulnerability information.
|
||||
"""
|
||||
|
||||
def __init__(self, cache_dir: Path | None = None, timeout: int | None = None) -> None:
|
||||
"""
|
||||
Create a new `PyPIService`.
|
||||
|
||||
`cache_dir` is an optional cache directory to use, for caching and reusing PyPI API
|
||||
requests. If `None`, `pip-audit` will attempt to use `pip`'s cache directory before falling
|
||||
back on its own default cache directory.
|
||||
|
||||
`timeout` is an optional argument to control how many seconds the component should wait for
|
||||
responses to network requests.
|
||||
"""
|
||||
self.session = caching_session(cache_dir)
|
||||
self.timeout = timeout
|
||||
|
||||
def query(self, spec: Dependency) -> tuple[Dependency, list[VulnerabilityResult]]:
|
||||
"""
|
||||
Queries PyPI for the given `Dependency` specification.
|
||||
|
||||
See `VulnerabilityService.query`.
|
||||
"""
|
||||
if spec.is_skipped():
|
||||
return spec, []
|
||||
spec = cast(ResolvedDependency, spec)
|
||||
|
||||
url = f"https://pypi.org/pypi/{spec.canonical_name}/{str(spec.version)}/json"
|
||||
|
||||
try:
|
||||
response: requests.Response = self.session.get(url=url, timeout=self.timeout)
|
||||
response.raise_for_status()
|
||||
except requests.TooManyRedirects:
|
||||
# This should never happen with a healthy PyPI instance, but might
|
||||
# happen during an outage or network event.
|
||||
# Ref 2022-06-10: https://status.python.org/incidents/lgpr13fy71bk
|
||||
raise ConnectionError("PyPI is not redirecting properly")
|
||||
except requests.ConnectTimeout:
|
||||
# Apart from a normal network outage, this can happen for two main
|
||||
# reasons:
|
||||
# 1. PyPI's APIs are offline
|
||||
# 2. The user is behind a firewall or corporate network that blocks
|
||||
# PyPI (and they're probably using custom indices)
|
||||
raise ConnectionError("Could not connect to PyPI's vulnerability feed")
|
||||
except requests.HTTPError as http_error:
|
||||
if response.status_code == 404:
|
||||
skip_reason = (
|
||||
"Dependency not found on PyPI and could not be audited: "
|
||||
f"{spec.canonical_name} ({spec.version})"
|
||||
)
|
||||
logger.debug(skip_reason)
|
||||
return SkippedDependency(name=spec.name, skip_reason=skip_reason), []
|
||||
raise ServiceError from http_error
|
||||
|
||||
response_json = response.json()
|
||||
results: list[VulnerabilityResult] = []
|
||||
vulns = response_json.get("vulnerabilities")
|
||||
|
||||
# No `vulnerabilities` key means that there are no vulnerabilities for any version
|
||||
if vulns is None:
|
||||
return spec, results
|
||||
|
||||
for v in vulns:
|
||||
id = v["id"]
|
||||
|
||||
# If the vulnerability has been withdrawn, we skip it entirely.
|
||||
withdrawn_at = v.get("withdrawn")
|
||||
if withdrawn_at is not None:
|
||||
logger.debug(f"PyPI vuln entry '{id}' marked as withdrawn at {withdrawn_at}")
|
||||
continue
|
||||
|
||||
# Put together the fix versions list
|
||||
try:
|
||||
fix_versions = [Version(fixed_in) for fixed_in in v["fixed_in"]]
|
||||
except InvalidVersion as iv:
|
||||
raise ServiceError(f"Received malformed version from PyPI: {v['fixed_in']}") from iv
|
||||
|
||||
# The ranges aren't guaranteed to come in chronological order
|
||||
fix_versions.sort()
|
||||
|
||||
description = v.get("summary")
|
||||
if description is None:
|
||||
description = v.get("details")
|
||||
|
||||
if description is None:
|
||||
description = "N/A"
|
||||
|
||||
# The "summary" field should be a single line, but "details" might
|
||||
# be multiple (Markdown-formatted) lines. So, we normalize our
|
||||
# description into a single line (and potentially break the Markdown
|
||||
# formatting in the process).
|
||||
description = description.replace("\n", " ")
|
||||
|
||||
results.append(
|
||||
VulnerabilityResult(
|
||||
id=id,
|
||||
description=description,
|
||||
fix_versions=fix_versions,
|
||||
aliases=set(v["aliases"]),
|
||||
published=self._parse_rfc3339(v.get("published")),
|
||||
)
|
||||
)
|
||||
|
||||
return spec, results
|
||||
Reference in New Issue
Block a user