Skip to content

Instantly share code, notes, and snippets.

@EpicWink
Last active August 26, 2025 09:50
Show Gist options
  • Save EpicWink/a6239a5a5ed8052f1f72f22f745a0a6d to your computer and use it in GitHub Desktop.
Save EpicWink/a6239a5a5ed8052f1f72f22f745a0a6d to your computer and use it in GitHub Desktop.
Convert pip install report to pip requirements file
"""Construct pinned pip requirements file from pip install report."""
# See end of file for license
import typing as t
if t.TYPE_CHECKING:
import packaging.requirements
def _parse_requirements(
packages: t.Iterable[t.Dict[str, t.Any]],
) -> t.List[t.Tuple[str, t.List["packaging.requirements.Requirement"], t.List[str]]]:
"""Parse packages' requirements.
Args:
packages: packages to parse requirements of
Returns:
triplets with package's normalised name, parsed requirements, and
declared extras
"""
import packaging.utils
import packaging.requirements
packages_parsed = []
for package in packages:
requirements_parsed = []
for requirement_string in package["metadata"].get("requires_dist") or []:
requirement = packaging.requirements.Requirement(requirement_string)
requirements_parsed.append(requirement)
package_name = packaging.utils.canonicalize_name(
package["metadata"]["name"], validate=True,
)
packages_parsed.append((
package_name,
requirements_parsed,
package["metadata"].get("provides_extra") or [],
)) # fmt: skip
return packages_parsed
def _discover_selected_extras(
packages: t.Iterable[
t.Tuple[str, t.Iterable["packaging.requirements.Requirement"], t.Any],
],
pip_install_packages: t.Iterable[t.Dict[str, t.Any]],
pip_install_environment: t.Dict[str, str],
) -> t.Dict[str, t.Set[str]]:
"""Find requested extras for packages.
Args:
packages: triplets with package's normalised name and parsed
requirements
pip_install_packages: original packages from pip install report
pip_install_environment: environment which 'pip install' was
invoked in (ie marker values)
Returns:
mapping of packages to their selected extras. Packages with no
selected extras are not included
"""
import copy
import packaging.utils
package_names = set(p for p, _, _ in packages)
requested_extras = {} # type: dict[str, set[str]]
for package in pip_install_packages:
if package.get("requested") and package.get("requested_extras"):
package_name = packaging.utils.canonicalize_name(
package["metadata"]["name"], validate=True,
)
requested_extras[package_name] = set(package.get("requested_extras"))
previous_selected_extras = None # type: dict[str, set[str]] | None
while True:
selected_extras = copy.deepcopy(requested_extras)
for package_name, requirements, _ in packages:
for requirement in requirements:
requirement_name = packaging.utils.canonicalize_name(requirement.name)
if requirement_name not in package_names or not requirement.extras:
continue # marker evaluates false
if (
requirement.marker
and "extra" in str(requirement.marker)
and previous_selected_extras is not None # skip on first iteration
):
for extra in previous_selected_extras.get(package_name) or set():
if requirement.marker.evaluate(
environment={**pip_install_environment, "extra": extra},
context="lock_file",
):
break
else:
continue
if requirement_name not in selected_extras:
selected_extras[requirement_name] = set()
selected_extras[requirement_name].update(requirement.extras)
if selected_extras == previous_selected_extras:
break
previous_selected_extras = copy.deepcopy(selected_extras)
return selected_extras
def _build_reverse_dependencies(
packages: t.Iterable[
t.Tuple[str, t.Iterable["packaging.requirements.Requirement"], t.Iterable[str]],
],
selected_extras: t.Dict[str, t.Set[str]],
pip_install_environment: t.Dict[str, str],
) -> t.Dict[str, t.Set[str]]:
"""Construct reverse dependency graph.
Args:
packages: triplets with package's normalised name, parsed
requirements, and declared extras
selected_extras: mapping of packages to their selected extras
pip_install_environment: environment which 'pip install' was
invoked in (ie marker values)
Returns:
mapping of package names to the package's dependents
"""
import packaging.utils
import packaging.requirements
package_names = set(n for n, _, _ in packages)
reverse_requirements = {} # type: dict[str, set[str]]
for package_name, requirements, extras in packages:
selected_extras_for_package = selected_extras.get(package_name) or set()
for requirement in requirements:
requirement_name = packaging.utils.canonicalize_name(requirement.name)
if requirement_name not in package_names:
continue # marker evaluates false (eg extra not requested, wrong OS)
relevant_extra = None # type: str | None
if requirement.marker and "extra" in str(requirement.marker):
for extra in extras:
if requirement.marker.evaluate(
environment={**pip_install_environment, "extra": extra},
context="lock_file",
):
relevant_extra = extra
break
elif requirement.marker and not requirement.marker.evaluate(
environment=pip_install_environment,
context="lock_file",
):
continue
if relevant_extra and relevant_extra not in selected_extras_for_package:
continue # extra not directly requested
requester_name = package_name
if require_extra:
requester_name += f"[{require_extra}]"
if requirement_name not in reverse_requirements:
reverse_requirements[requirement_name] = set()
reverse_requirements[requirement_name].add(requester_name)
return reverse_requirements
def _build_pinned_requirements_lines(
pip_install_report: t.Dict[str, t.Any],
pip_install_command: t.Union[str, None] = None,
include_hashes: bool = True,
) -> t.Generator[str, None, None]:
"""Build pinned dependencies pip requirements file contents.
Args:
pip_install_report: pip install dry-run report
pip_install_command: pip install command
include_hashes: include hashes in output
Returns:
a generator of pip requirements file lines
"""
import os.path
import packaging.utils
if pip_install_report.get("version").split(".")[0] != "1":
raise ValueError(f"Unsupported version: {pip_install_report.get('version')}")
# Header
yield f"# This file is autogenerated by {os.path.basename(__file__)}"
yield "#"
for name in sorted(pip_install_report.get("environment") or {}):
yield f"# {name}: {pip_install_report['environment'][name]}"
yield "#"
yield f"# pip_version: {pip_install_report.get('pip_version') or '<unknown>'}"
if pip_install_command:
yield "#"
yield f"# command: {pip_install_command}"
yield ""
# Packages
packages = pip_install_report.get("install") or []
packages_requirements_parsed = _parse_requirements(packages)
selected_extras = _discover_selected_extras(
packages=packages_requirements_parsed,
pip_install_packages=packages,
pip_install_environment=pip_install_report.get("environment") or {},
)
dependents_by_package = _build_reverse_dependencies(
packages=packages_requirements_parsed,
selected_extras=selected_extras,
pip_install_environment=pip_install_report.get("environment") or {},
)
for package in packages:
package["normalised_name"] = packaging.utils.canonicalize_name(
name=package["metadata"]["name"],
)
for package in sorted(packages, key=lambda x: x["normalised_name"]):
if (
include_hashes
and package.get("requested")
and package.get("is_direct")
and "dir_info" in (package.get("download_info") or {})
and "archive_info" not in (package.get("download_info") or {})
):
continue # skip unhashable current directory
# Requirement line
line = package["normalised_name"]
if package["normalised_name"] in selected_extras:
line += f"[{','.join(sorted(selected_extras[package['normalised_name']]))}]"
line += f"=={package['metadata']['version']}"
# Hash line
if include_hashes and (package.get("download_info") or {}).get("archive_info"):
hashes = package["download_info"]["archive_info"].get("hashes") or {}
if hashes:
line += " \\"
yield line
algorithms = list(hashes)
algorithm = "sha256" if "sha256" in algorithms else algorithms[0]
yield f" --hash={algorithm}:{hashes[algorithm]}"
else:
yield line
else:
yield line
# Dependents lines
dependents = dependents_by_package.get(package["normalised_name"])
if len(dependents or []) > 1:
yield " # via"
for dependent in sorted(dependents):
yield f" # {dependent}"
elif dependents:
yield f" # via {list(dependents)[0]}"
def convert_pip_install_report_to_pip_requirements(
pip_install_report_json_stream: t.TextIO,
pinned_requirements_text_stream: t.TextIO,
pip_install_command: t.Union[str, None] = None,
include_hashes: bool = True,
) -> None:
import json
for line in _build_pinned_requirements_lines(
pip_install_report=json.loads(pip_install_report_json_stream.read()),
pip_install_command=pip_install_command,
include_hashes=include_hashes,
):
pinned_requirements_text_stream.write(line + "\n")
def main() -> None:
"""Run app from command line."""
import sys
import argparse
parser = argparse.ArgumentParser(description=__doc__)
parser.add_argument("report", help="pip install report JSON file path")
parser.add_argument("requirements", help="output pip requirements file path")
parser.add_argument("-c", "--pip-install-command", help=(
"source pip install command (ie the command which generated the pip install "
"report)"
)) # fmt: skip
parser.add_argument(
"-i", "--include-hashes",
action="store_true",
help="include file hashes in output",
) # fmt: skip
args = parser.parse_args()
if args.report == "-":
report_json_stream = sys.stdin
else:
report_json_stream = open(args.report, mode="r")
if args.requirements == "-":
requirements_text_stream = sys.stdout
else:
requirements_text_stream = open(args.requirements, mode="w")
with report_json_stream:
convert_pip_install_report_to_pip_requirements(
pip_install_report_json_stream=report_json_stream,
pinned_requirements_text_stream=requirements_text_stream,
pip_install_command=args.pip_install_command,
include_hashes=args.include_hashes,
)
if __name__ == "__main__":
main()
# MIT License
#
# Copyright (c) 2025 Laurie O
#
# Permission is hereby granted, free of charge, to any person obtaining a copy
# of this software and associated documentation files (the "Software"), to deal
# in the Software without restriction, including without limitation the rights
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
# copies of the Software, and to permit persons to whom the Software is
# furnished to do so, subject to the following conditions:
#
# The above copyright notice and this permission notice shall be included in all
# copies or substantial portions of the Software.
#
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
# SOFTWARE.
@EpicWink
Copy link
Author

Use like:

pip install --dry-run --ignore-installed --report report.json '.[full]'
python pip-install-report-to-requirements.py report.json requirements.txt -c "pip install '.[full]'"

Add -i to include hashes.

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment