This commit is contained in:
Iliyan Angelov
2025-12-01 06:50:10 +02:00
parent 91f51bc6fe
commit 62c1fe5951
4682 changed files with 544807 additions and 31208 deletions

View File

@@ -0,0 +1,7 @@
"""Top-level package for Dependency Parser."""
__author__ = """Jannis Gebauer"""
__email__ = 'support@pyup.io'
__version__ = '0.6.3'
from .parser import parse # noqa

View File

@@ -0,0 +1,231 @@
import json
from json import JSONEncoder
from . import filetypes, errors
class Dependency:
"""
"""
def __init__(self, name, specs, line, source="pypi", meta={}, extras=[],
line_numbers=None, index_server=None, hashes=(),
dependency_type=None, sections=None):
"""
:param name:
:param specs:
:param line:
:param source:
:param extras:
:param line_numbers:
:param index_server:
:param hashes:
:param dependency_type:
"""
self.name = name
self.key = name.lower().replace("_", "-")
self.specs = specs
self.line = line
self.source = source
self.meta = meta
self.line_numbers = line_numbers
self.index_server = index_server
self.hashes = hashes
self.dependency_type = dependency_type
self.extras = extras
self.sections = sections
def __str__(self): # pragma: no cover
"""
:return:
"""
return "Dependency({name}, {specs}, {line})".format(
name=self.name,
specs=self.specs,
line=self.line
)
def serialize(self):
"""
:return:
"""
return {
"name": self.name,
"specs": self.specs,
"line": self.line,
"source": self.source,
"meta": self.meta,
"line_numbers": self.line_numbers,
"index_server": self.index_server,
"hashes": self.hashes,
"dependency_type": self.dependency_type,
"extras": self.extras,
"sections": self.sections
}
@classmethod
def deserialize(cls, d):
"""
:param d:
:return:
"""
return cls(**d)
@property
def full_name(self):
"""
:return:
"""
if self.extras:
return "{}[{}]".format(self.name, ",".join(self.extras))
return self.name
class DparseJSONEncoder(JSONEncoder):
def default(self, o):
from packaging.specifiers import SpecifierSet
if isinstance(o, SpecifierSet):
return str(o)
if isinstance(o, set):
return list(o)
return JSONEncoder.default(self, o)
class DependencyFile:
"""
"""
def __init__(self, content, path=None, sha=None, file_type=None,
marker=((), ()), parser=None, resolve=False):
"""
:param content:
:param path:
:param sha:
:param marker:
:param file_type:
:param parser:
"""
self.content = content
self.file_type = file_type
self.path = path
self.sha = sha
self.marker = marker
self.dependencies = []
self.resolved_files = []
self.is_valid = False
self.file_marker, self.line_marker = marker
if parser:
self.parser = parser
else:
from . import parser as parser_class
if file_type is not None:
if file_type == filetypes.requirements_txt:
self.parser = parser_class.RequirementsTXTParser
elif file_type == filetypes.tox_ini:
self.parser = parser_class.ToxINIParser
elif file_type == filetypes.conda_yml:
self.parser = parser_class.CondaYMLParser
elif file_type == filetypes.pipfile:
self.parser = parser_class.PipfileParser
elif file_type == filetypes.pipfile_lock:
self.parser = parser_class.PipfileLockParser
elif file_type == filetypes.setup_cfg:
self.parser = parser_class.SetupCfgParser
elif file_type == filetypes.poetry_lock:
self.parser = parser_class.PoetryLockParser
elif file_type == filetypes.pyproject_toml:
self.parser = parser_class.PyprojectTomlParser
elif path is not None:
if path.endswith((".txt", ".in")):
self.parser = parser_class.RequirementsTXTParser
elif path.endswith(".yml"):
self.parser = parser_class.CondaYMLParser
elif path.endswith(".ini"):
self.parser = parser_class.ToxINIParser
elif path.endswith("Pipfile"):
self.parser = parser_class.PipfileParser
elif path.endswith("Pipfile.lock"):
self.parser = parser_class.PipfileLockParser
elif path.endswith("setup.cfg"):
self.parser = parser_class.SetupCfgParser
elif path.endswith(filetypes.poetry_lock):
self.parser = parser_class.PoetryLockParser
elif path.endswith(filetypes.pyproject_toml):
self.parser = parser_class.PyprojectTomlParser
if not hasattr(self, "parser"):
raise errors.UnknownDependencyFileError
self.parser = self.parser(self, resolve=resolve)
@property
def resolved_dependencies(self):
deps = self.dependencies.copy()
for d in self.resolved_files:
if isinstance(d, DependencyFile):
deps.extend(d.resolved_dependencies)
return deps
def serialize(self):
"""
:return:
"""
return {
"file_type": self.file_type,
"content": self.content,
"path": self.path,
"sha": self.sha,
"dependencies": [dep.serialize() for dep in self.dependencies],
"resolved_dependencies": [dep.serialize() for dep in
self.resolved_dependencies]
}
@classmethod
def deserialize(cls, d):
"""
:param d:
:return:
"""
dependencies = [Dependency.deserialize(dep) for dep in
d.pop("dependencies", [])]
instance = cls(**d)
instance.dependencies = dependencies
return instance
def json(self): # pragma: no cover
"""
:return:
"""
return json.dumps(self.serialize(), indent=2, cls=DparseJSONEncoder)
def parse(self):
"""
:return:
"""
if self.parser.is_marked_file:
self.is_valid = False
return self
self.parser.parse()
self.is_valid = len(self.dependencies) > 0 or len(
self.resolved_files) > 0
return self

View File

@@ -0,0 +1,15 @@
class UnknownDependencyFileError(Exception):
"""
"""
def __init__(self, message="Unknown File type to parse"):
self.message = message
super().__init__(self.message)
class MalformedDependencyFileError(Exception):
def __init__(self, message="The dependency file is malformed. {info}",
info=""):
self.message = message.format(info=info)
super().__init__(self.message)

View File

@@ -0,0 +1,8 @@
requirements_txt = "requirements.txt"
conda_yml = "conda.yml"
setup_cfg = "setup.cfg"
tox_ini = "tox.ini"
pipfile = "Pipfile"
pipfile_lock = "Pipfile.lock"
poetry_lock = "poetry.lock"
pyproject_toml = "pyproject.toml"

View File

@@ -0,0 +1,534 @@
import os
from collections import OrderedDict
import re
import sys
from configparser import ConfigParser, NoOptionError
from pathlib import PurePath
from .errors import MalformedDependencyFileError
from .regex import HASH_REGEX
from .dependencies import DependencyFile, Dependency
from packaging.requirements import Requirement as PackagingRequirement,\
InvalidRequirement
from . import filetypes
from packaging.specifiers import SpecifierSet
from packaging.version import Version, InvalidVersion
import json
if sys.version_info >= (3, 11):
import tomllib
else:
import tomli as tomllib
# this is a backport from setuptools 26.1
def setuptools_parse_requirements_backport(strs): # pragma: no cover
# Copyright (C) 2016 Jason R Coombs <jaraco@jaraco.com>
#
# Permission is hereby granted, free of charge, to any person obtaining a
# copy of this software and associated documentation files
# (the "Software"), to deal in the Software without restriction, including
# without limitation the rights to use, copy, modify, merge, publish,
# distribute, sublicense, and/or sell copies of the Software, and to permit
# persons to whom the Software is furnished to do so, subject to the
# following conditions:
#
# The above copyright notice and this permission notice shall be included
# in all copies or substantial portions of the Software.
#
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
# OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
# IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
# CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
# TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
# SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
"""Yield ``Requirement`` objects for each specification in `strs`
`strs` must be a string, or a (possibly-nested) iterable thereof.
"""
# create a steppable iterator, so we can handle \-continuations
def yield_lines(strs):
"""Yield non-empty/non-comment lines of a string or sequence"""
if isinstance(strs, str):
for s in strs.splitlines():
s = s.strip()
# skip blank lines/comments
if s and not s.startswith('#'):
yield s
else:
for ss in strs:
for s in yield_lines(ss):
yield s
lines = iter(yield_lines(strs))
for line in lines:
# Drop comments -- a hash without a space may be in a URL.
if ' #' in line:
line = line[:line.find(' #')]
# If there is a line continuation, drop it, and append the next line.
if line.endswith('\\'):
line = line[:-2].strip()
line += next(lines)
yield PackagingRequirement(line)
class RequirementsTXTLineParser:
"""
"""
@classmethod
def parse(cls, line):
"""
:param line:
:return:
"""
try:
# setuptools requires a space before the comment.
# If this isn't the case, add it.
if "\t#" in line:
parsed, = setuptools_parse_requirements_backport(
line.replace("\t#", "\t #"))
else:
parsed, = setuptools_parse_requirements_backport(line)
except InvalidRequirement:
return None
dep = Dependency(
name=parsed.name,
specs=parsed.specifier,
line=line,
extras=parsed.extras,
dependency_type=filetypes.requirements_txt
)
return dep
class Parser:
"""
"""
def __init__(self, obj, resolve=False):
"""
:param obj:
"""
self.obj = obj
self._lines = None
self.resolve = resolve
def iter_lines(self, lineno=0):
"""
:param lineno:
:return:
"""
yield from self.lines[lineno:]
@property
def lines(self):
"""
:return:
"""
if self._lines is None:
self._lines = self.obj.content.splitlines()
return self._lines
@property
def is_marked_file(self):
"""
:return:
"""
for n, line in enumerate(self.iter_lines()):
for marker in self.obj.file_marker:
if marker in line:
return True
if n >= 2:
break
return False
def is_marked_line(self, line):
"""
:param line:
:return:
"""
for marker in self.obj.line_marker:
if marker in line:
return True
return False
@classmethod
def parse_hashes(cls, line):
"""
:param line:
:return:
"""
hashes = []
for match in re.finditer(HASH_REGEX, line):
hashes.append(line[match.start():match.end()])
return re.sub(HASH_REGEX, "", line).strip(), hashes
@classmethod
def parse_index_server(cls, line):
"""
:param line:
:return:
"""
groups = re.split(pattern=r"[=\s]+", string=line.strip(), maxsplit=100)
if len(groups) >= 2:
return groups[1] if groups[1].endswith("/") else groups[1] + "/"
return None
@classmethod
def resolve_file(cls, file_path, line):
"""
:param file_path:
:param line:
:return:
"""
line = line.replace("-r ", "").replace("--requirement ", "")
normalized_path = PurePath(file_path)
if " #" in line:
line = line.split("#")[0].strip()
return str(normalized_path.parent.joinpath(line))
class RequirementsTXTParser(Parser):
"""
"""
def parse(self):
"""
Parses a requirements.txt-like file
"""
index_server = None
for num, line in enumerate(self.iter_lines()):
line = line.rstrip()
if not line:
continue
if line.startswith('#'):
# comments are lines that start with # only
continue
if line.startswith('-i') or \
line.startswith('--index-url') or \
line.startswith('--extra-index-url'):
# this file is using a private index server, try to parse it
index_server = self.parse_index_server(line)
continue
elif self.obj.path and \
(line.startswith('-r') or
line.startswith('--requirement')):
req_file_path = self.resolve_file(self.obj.path, line)
if self.resolve and os.path.exists(req_file_path):
with open(req_file_path) as f:
content = f.read()
dep_file = DependencyFile(
content=content,
path=req_file_path,
resolve=True
)
dep_file.parse()
self.obj.resolved_files.append(dep_file)
else:
self.obj.resolved_files.append(req_file_path)
elif line.startswith('-f') or line.startswith('--find-links') or \
line.startswith('--no-index') or \
line.startswith('--allow-external') or \
line.startswith('--allow-unverified') or \
line.startswith('-Z') or \
line.startswith('--always-unzip'):
continue
elif self.is_marked_line(line):
continue
else:
try:
parseable_line = line
# multiline requirements are not parseable
if "\\" in line:
parseable_line = line.replace("\\", "")
for next_line in self.iter_lines(num + 1):
parseable_line += next_line.strip().replace("\\",
"")
line += "\n" + next_line
if "\\" in next_line:
continue
break
# ignore multiline requirements if they are marked
if self.is_marked_line(parseable_line):
continue
hashes = []
if "--hash" in parseable_line:
parseable_line, hashes = Parser.parse_hashes(
parseable_line)
req = RequirementsTXTLineParser.parse(parseable_line)
if req:
req.hashes = hashes
req.index_server = index_server
# replace the requirements line with the 'real' line
req.line = line
self.obj.dependencies.append(req)
except ValueError:
continue
class ToxINIParser(Parser):
"""
"""
def parse(self):
"""
:return:
"""
parser = ConfigParser()
parser.read_string(self.obj.content)
for section in parser.sections():
try:
content = parser.get(section=section, option="deps")
for n, line in enumerate(content.splitlines()):
if self.is_marked_line(line):
continue
if line:
req = RequirementsTXTLineParser.parse(line)
if req:
req.dependency_type = self.obj.file_type
self.obj.dependencies.append(req)
except NoOptionError:
pass
class CondaYMLParser(Parser):
"""
"""
def parse(self):
"""
:return:
"""
import yaml
try:
data = yaml.safe_load(self.obj.content)
if data and 'dependencies' in data and \
isinstance(data['dependencies'], list):
for dep in data['dependencies']:
if isinstance(dep, dict) and 'pip' in dep:
for n, line in enumerate(dep['pip']):
if self.is_marked_line(line):
continue
req = RequirementsTXTLineParser.parse(line)
if req:
req.dependency_type = self.obj.file_type
self.obj.dependencies.append(req)
except yaml.YAMLError:
pass
class PipfileParser(Parser):
def parse(self):
"""
Parse a Pipfile (as seen in pipenv)
:return:
"""
try:
data = tomllib.loads(self.obj.content)
if data:
for package_type in ['packages', 'dev-packages']:
if package_type in data:
for name, specs in data[package_type].items():
# skip on VCS dependencies
if not isinstance(specs, str):
continue
if specs == '*':
specs = ''
self.obj.dependencies.append(
Dependency(
name=name, specs=SpecifierSet(specs),
dependency_type=filetypes.pipfile,
line=''.join([name, specs]),
sections=[package_type]
)
)
except (tomllib.TOMLDecodeError, IndexError):
pass
class PipfileLockParser(Parser):
def parse(self):
"""
Parse a Pipfile.lock (as seen in pipenv)
:return:
"""
try:
data = json.loads(self.obj.content, object_pairs_hook=OrderedDict)
if data:
for package_type in ['default', 'develop']:
if package_type in data:
for name, meta in data[package_type].items():
# skip VCS dependencies
if 'version' not in meta:
continue
specs = meta['version']
hashes = meta['hashes']
self.obj.dependencies.append(
Dependency(
name=name, specs=SpecifierSet(specs),
dependency_type=filetypes.pipfile_lock,
hashes=hashes,
line=''.join([name, specs]),
sections=[package_type]
)
)
except ValueError as e:
raise MalformedDependencyFileError(info=str(e))
class SetupCfgParser(Parser):
def parse(self):
parser = ConfigParser()
parser.read_string(self.obj.content)
for section in parser.sections():
if section.name == 'options':
options = 'install_requires', 'setup_requires', 'test_require'
for name in options:
if parser.has_option('options', name):
content = section.get('options', name)
self._parse_content(content)
elif section == 'options.extras_require':
for _, content in parser.items('options.extras_require'):
self._parse_content(content)
def _parse_content(self, content):
for n, line in enumerate(content.splitlines()):
if self.is_marked_line(line):
continue
if line:
req = RequirementsTXTLineParser.parse(line)
if req:
req.dependency_type = self.obj.file_type
self.obj.dependencies.append(req)
class PoetryLockParser(Parser):
def parse(self):
"""
Parse a poetry.lock
"""
try:
from poetry.packages.locker import Locker
from pathlib import Path
lock_path = Path(self.obj.path)
repository = Locker(lock_path, {}).locked_repository()
for pkg in repository.packages:
self.obj.dependencies.append(
Dependency(
name=pkg.name, specs=SpecifierSet(f"=={pkg.version.text}"),
dependency_type=filetypes.poetry_lock,
line=pkg.to_dependency().to_pep_508(),
sections=list(pkg.dependency_group_names())
)
)
except Exception:
try:
data = tomllib.loads(self.obj.content)
pkg_key = 'package'
if data:
dependencies = data[pkg_key]
for dep in dependencies:
name = dep['name']
spec = "=={version}".format(
version=Version(dep['version']))
sections = [dep['category']] if "category" in dep else []
self.obj.dependencies.append(
Dependency(
name=name, specs=SpecifierSet(spec),
dependency_type=filetypes.poetry_lock,
line=''.join([name, spec]),
sections=sections
)
)
except Exception as e:
raise MalformedDependencyFileError(info=str(e))
class PyprojectTomlParser(Parser):
def parse(self) -> None:
"""Parse a pyproject.toml file.
Refer to https://setuptools.pypa.io/en/latest/userguide/pyproject_config.html
for configuration specification.
"""
try:
cfg = tomllib.loads(self.obj.content)
except (tomllib.TOMLDecodeError, IndexError) as e:
raise MalformedDependencyFileError(info=str(e))
if not cfg or "project" not in cfg:
return
sections = {
"dependencies": cfg["project"].get("dependencies", []),
**cfg["project"].get("optional-dependencies", {}),
}
for section, lines in sections.items():
for line in lines:
req = RequirementsTXTLineParser.parse(line)
if req:
req.dependency_type = self.obj.file_type
req.section = section
self.obj.dependencies.append(req)
def parse(content, file_type=None, path=None, sha=None, marker=((), ()),
parser=None, resolve=False):
"""
:param content:
:param file_type:
:param path:
:param sha:
:param marker:
:param parser:
:return:
"""
dep_file = DependencyFile(
content=content,
path=path,
sha=sha,
marker=marker,
file_type=file_type,
parser=parser,
resolve=resolve
)
return dep_file.parse()

View File

@@ -0,0 +1 @@
HASH_REGEX = r"--hash[=| ]\w+:\w+"

View File

@@ -0,0 +1,128 @@
import re
import json
import tempfile
import os
import sys
if sys.version_info >= (3, 11):
import tomllib
else:
import tomli as tomllib
class RequirementsTXTUpdater:
SUB_REGEX = r"^{}(?=\s*\r?\n?$)"
@classmethod
def update(cls, content, dependency, version, spec="==", hashes=()):
"""
Updates the requirement to the latest version for the given content
and adds hashes if necessary.
:param content: str, content
:return: str, updated content
"""
new_line = "{name}{spec}{version}".format(name=dependency.full_name,
spec=spec, version=version)
appendix = ''
# leave environment markers intact
if ";" in dependency.line:
# condense multiline, split out the env marker, strip comments
# and --hashes
new_line += ";" + \
dependency.line.splitlines()[0].split(";", 1)[1] \
.split("#")[0].split("--hash")[0].rstrip()
# add the comment
if "#" in dependency.line:
# split the line into parts: requirement and comment
parts = dependency.line.split("#")
requirement, comment = parts[0], "#".join(parts[1:])
# find all whitespaces between the requirement and the comment
whitespaces = (hex(ord('\t')), hex(ord(' ')))
trailing_whitespace = ''
for c in requirement[::-1]:
if hex(ord(c)) in whitespaces:
trailing_whitespace += c
else:
break
appendix += trailing_whitespace + "#" + comment
# if this is a hashed requirement, add a multiline break before the
# comment
if dependency.hashes and not new_line.endswith("\\"):
new_line += " \\"
# if this is a hashed requirement, add the hashes
if hashes:
for n, new_hash in enumerate(hashes):
new_line += "\n --hash={method}:{hash}".format(
method=new_hash['method'],
hash=new_hash['hash']
)
# append a new multiline break if this is not the last line
if len(hashes) > n + 1:
new_line += " \\"
new_line += appendix
regex = cls.SUB_REGEX.format(re.escape(dependency.line))
return re.sub(regex, new_line, content, flags=re.MULTILINE)
class CondaYMLUpdater(RequirementsTXTUpdater):
SUB_REGEX = r"{}(?=\s*\r?\n?$)"
class ToxINIUpdater(CondaYMLUpdater):
pass
class SetupCFGUpdater(CondaYMLUpdater):
pass
class PipfileUpdater:
@classmethod
def update(cls, content, dependency, version, spec="==", hashes=()):
data = tomllib.loads(content)
if data:
for package_type in ['packages', 'dev-packages']:
if package_type in data:
if dependency.full_name in data[package_type]:
data[package_type][
dependency.full_name] = "{spec}{version}".format(
spec=spec, version=version
)
try:
from pipenv.project import Project
except ImportError:
raise ImportError(
"Updating a Pipfile requires the pipenv extra to be installed."
" Install it with pip install dparse[pipenv]")
pipfile = tempfile.NamedTemporaryFile(delete=False)
pipfile.close()
p = Project(chdir=False)
p.write_toml(data=data, path=pipfile.name)
data = open(pipfile.name).read()
os.remove(pipfile.name)
return data
class PipfileLockUpdater:
@classmethod
def update(cls, content, dependency, version, spec="==", hashes=()):
data = json.loads(content)
if data:
for package_type in ['default', 'develop']:
if package_type in data:
if dependency.full_name in data[package_type]:
data[package_type][dependency.full_name] = {
'hashes': [
"{method}:{hash}".format(
hash=h['hash'],
method=h['method']
) for h in hashes
],
'version': "{spec}{version}".format(
spec=spec, version=version
)
}
return json.dumps(data, indent=4, separators=(',', ': ')) + "\n"