diff --git a/examples/pip_parse/pip_parse_test.py b/examples/pip_parse/pip_parse_test.py index c532dff564..89e5eca254 100644 --- a/examples/pip_parse/pip_parse_test.py +++ b/examples/pip_parse/pip_parse_test.py @@ -50,18 +50,22 @@ def test_entry_point(self): def test_data(self): actual = os.environ.get("WHEEL_DATA_CONTENTS") self.assertIsNotNone(actual) - actual = self._remove_leading_dirs(actual.split(" ")) + actual = set(self._remove_leading_dirs(actual.split(" "))) + + s3cmd_bin = "bin/s3cmd" + if os.name == "nt": + s3cmd_bin += ".bat" - expected = [ - "bin/s3cmd", + expected = { + s3cmd_bin, "data/share/doc/packages/s3cmd/INSTALL.md", "data/share/doc/packages/s3cmd/LICENSE", "data/share/doc/packages/s3cmd/NEWS", "data/share/doc/packages/s3cmd/README.md", "data/share/man/man1/s3cmd.1", - ] + } - self.assertListEqual(actual, expected) + self.assertEqual(actual, expected) def test_dist_info(self): actual = os.environ.get("WHEEL_DIST_INFO_CONTENTS") diff --git a/python/private/py_executable.bzl b/python/private/py_executable.bzl index 6197c0c789..9c21e5d274 100644 --- a/python/private/py_executable.bzl +++ b/python/private/py_executable.bzl @@ -749,6 +749,14 @@ def _create_venv_windows(ctx, *, venv_ctx_rel_root, runtime, interpreter_actual_ link_to_path = interpreter_actual_path, files = depset([runtime.interpreter]), )) + + # This isn't strictly correct, but should work ok. + interpreter_symlinks.add(ExplicitSymlink( + runfiles_path = paths.join(paths.dirname(rf_path), "pythonw.exe"), + venv_path = paths.join(paths.dirname(venv_rel_path), "pythonw.exe"), + link_to_path = paths.join(paths.dirname(interpreter_actual_path), "pythonw.exe"), + files = depset(), + )) else: # It's OK to use declare_symlink here because an absolute path # will be written to it, so Bazel won't mangle it. diff --git a/python/private/pypi/BUILD.bazel b/python/private/pypi/BUILD.bazel index 02c06a8096..c46ea83874 100644 --- a/python/private/pypi/BUILD.bazel +++ b/python/private/pypi/BUILD.bazel @@ -24,6 +24,24 @@ exports_files( visibility = ["//visibility:public"], ) +alias( + name = "venv_entry_point_template", + actual = select({ + "@platforms//os:windows": "venv_entry_point_template.bat", + "//conditions:default": "venv_entry_point_template.sh", + }), + visibility = ["//visibility:public"], +) + +alias( + name = "venv_shebang_rewriter", + actual = select({ + "@platforms//os:windows": "venv_shebang_rewriter.ps1", + "//conditions:default": "venv_shebang_rewriter.sh", + }), + visibility = ["//visibility:public"], +) + exports_files( srcs = ["deps.bzl"], visibility = ["//tools/private/update_deps:__pkg__"], @@ -520,3 +538,15 @@ bzl_library( name = "whl_target_platforms_bzl", srcs = ["whl_target_platforms.bzl"], ) + +bzl_library( + name = "venv_entry_point_bzl", + srcs = ["venv_entry_point.bzl"], + visibility = ["//visibility:public"], +) + +bzl_library( + name = "venv_rewrite_shebang_bzl", + srcs = ["venv_rewrite_shebang.bzl"], + visibility = ["//visibility:public"], +) diff --git a/python/private/pypi/generate_whl_library_build_bazel.bzl b/python/private/pypi/generate_whl_library_build_bazel.bzl index 768b064a5d..a9a29081f7 100644 --- a/python/private/pypi/generate_whl_library_build_bazel.bzl +++ b/python/private/pypi/generate_whl_library_build_bazel.bzl @@ -23,6 +23,7 @@ _RENDER = { "data_exclude": render.list, "dependencies": render.list, "dependencies_by_platform": lambda x: render.dict(x, value_repr = render.list), + "entry_points": render.dict_dict, "extras": render.list, "group_deps": render.list, "include": str, diff --git a/python/private/pypi/venv_entry_point.bzl b/python/private/pypi/venv_entry_point.bzl new file mode 100644 index 0000000000..32cb6f55a5 --- /dev/null +++ b/python/private/pypi/venv_entry_point.bzl @@ -0,0 +1,50 @@ +"""Rule for generating venv entry point scripts.""" + +load("//python/private:attributes.bzl", "WINDOWS_CONSTRAINTS_ATTRS") +load("//python/private:common.bzl", "is_windows_platform") +load("//python/private:rule_builders.bzl", "ruleb") + +def _venv_entry_point_impl(ctx): + is_windows = is_windows_platform(ctx) + + out_name = ctx.label.name + python_exe = "" + if is_windows: + out_name += ".bat" + python_exe = "pythonw.exe" if ctx.attr.group == "gui_scripts" else "python.exe" + + out = ctx.actions.declare_file(out_name) + + ctx.actions.expand_template( + template = ctx.file._template, + output = out, + substitutions = { + "{ATTRIBUTE}": ctx.attr.attribute, + "{MODULE}": ctx.attr.module, + "{PYTHON_EXE}": python_exe, + }, + is_executable = True, + ) + + return [DefaultInfo( + files = depset([out]), + executable = out, + )] + +_builder = ruleb.Rule( + implementation = _venv_entry_point_impl, + executable = True, +) +_builder.attrs.update({ + "attribute": attr.string(mandatory = False, doc = "The attribute to call"), + "extras": attr.string(mandatory = False, doc = "The extras for the entry point"), + "group": attr.string(mandatory = False, doc = "The entry point group (e.g. console_scripts)"), + "module": attr.string(mandatory = True, doc = "The module to import"), + "_template": attr.label( + default = Label("//python/private/pypi:venv_entry_point_template"), + allow_single_file = True, + ), +}) +_builder.attrs.update(WINDOWS_CONSTRAINTS_ATTRS) + +venv_entry_point = _builder.build() diff --git a/python/private/pypi/venv_entry_point_template.bat b/python/private/pypi/venv_entry_point_template.bat new file mode 100644 index 0000000000..36a7dd3b41 --- /dev/null +++ b/python/private/pypi/venv_entry_point_template.bat @@ -0,0 +1,8 @@ +@setlocal enabledelayedexpansion & "%~dp0{PYTHON_EXE}" -x "%~f0" %* & exit /b !ERRORLEVEL! +# -*- coding: utf-8 -*- +import re +import sys +from {MODULE} import {ATTRIBUTE} +if __name__ == "__main__": + sys.argv[0] = re.sub(r"(-script\.pyw|\.exe)?$", "", sys.argv[0]) + sys.exit({ATTRIBUTE}()) diff --git a/python/private/pypi/venv_entry_point_template.sh b/python/private/pypi/venv_entry_point_template.sh new file mode 100644 index 0000000000..d40c31adc4 --- /dev/null +++ b/python/private/pypi/venv_entry_point_template.sh @@ -0,0 +1,10 @@ +#!/bin/sh +'''exec' "$(dirname "$0")/python3" "$0" "$@" +' ''' +# -*- coding: utf-8 -*- +import re +import sys +from {MODULE} import {ATTRIBUTE} +if __name__ == '__main__': + sys.argv[0] = re.sub(r'(-script\.pyw|\.exe)?$', '', sys.argv[0]) + sys.exit({ATTRIBUTE}()) diff --git a/python/private/pypi/venv_rewrite_shebang.bzl b/python/private/pypi/venv_rewrite_shebang.bzl new file mode 100644 index 0000000000..c653211850 --- /dev/null +++ b/python/private/pypi/venv_rewrite_shebang.bzl @@ -0,0 +1,82 @@ +"""Rule for rewriting portable shebangs.""" + +load("//python/private:attributes.bzl", "WINDOWS_CONSTRAINTS_ATTRS") +load("//python/private:common.bzl", "is_windows_platform", "runfiles_root_path") +load("//python/private:py_info.bzl", "PyInfoBuilder", "VenvSymlinkEntry", "VenvSymlinkKind") +load("//python/private:rule_builders.bzl", "ruleb") + +def _venv_rewrite_shebang_impl(ctx): + is_windows = is_windows_platform(ctx) + + out_name = ctx.label.name + if is_windows: + out_name += ".bat" + + out_file = ctx.actions.declare_file(out_name) + in_file = ctx.file.src + + action_args = ctx.actions.args() + rewriter_file = ctx.files._venv_shebang_rewriter[0] + inputs = depset([in_file, rewriter_file]) + + if rewriter_file.path.endswith(".ps1"): + action_exe = "powershell.exe" + action_args.add_all([ + "-ExecutionPolicy", + "Bypass", + "-NoProfile", + "-File", + rewriter_file, + ]) + else: + action_exe = ctx.attr._venv_shebang_rewriter[DefaultInfo].files_to_run + + action_args.add(in_file) + action_args.add(out_file) + action_args.add("windows" if is_windows else "unix") + + ctx.actions.run( + inputs = inputs, + outputs = [out_file], + executable = action_exe, + arguments = [action_args], + mnemonic = "PyVenvRewriteBin", + progress_message = "Rewriting venv bin script %{input}", + toolchain = None, + ) + + symlink = VenvSymlinkEntry( + kind = VenvSymlinkKind.BIN, + link_to_path = runfiles_root_path(ctx, out_file.short_path), + link_to_file = out_file, + venv_path = out_name, + package = ctx.attr.package, + version = ctx.attr.version, + files = depset([out_file]), + ) + builder = PyInfoBuilder.new() + builder.venv_symlinks.add([symlink]) + py_info = builder.build() + + return [ + DefaultInfo(files = depset([out_file]), executable = out_file), + py_info, + ] + +_builder = ruleb.Rule( + implementation = _venv_rewrite_shebang_impl, + executable = True, +) +_builder.attrs.update({ + "package": attr.string(), + "src": attr.label(mandatory = True, allow_single_file = True), + "version": attr.string(), + "_venv_shebang_rewriter": attr.label( + default = "//python/private/pypi:venv_shebang_rewriter", + allow_files = True, + cfg = "exec", + ), +}) +_builder.attrs.update(WINDOWS_CONSTRAINTS_ATTRS) + +venv_rewrite_shebang = _builder.build() diff --git a/python/private/pypi/venv_shebang_rewriter.ps1 b/python/private/pypi/venv_shebang_rewriter.ps1 new file mode 100644 index 0000000000..fb6077b407 --- /dev/null +++ b/python/private/pypi/venv_shebang_rewriter.ps1 @@ -0,0 +1,44 @@ +[CmdletBinding()] +param( + [Parameter(Position=0, Mandatory=$true)] + [string]$InFile, + + [Parameter(Position=1, Mandatory=$true)] + [string]$OutFile, + + [Parameter(Position=2, Mandatory=$true)] + [string]$TargetOs +) + +$ErrorActionPreference = "Stop" + +$firstLine = Get-Content -Path $InFile -TotalCount 1 -ErrorAction SilentlyContinue +$content = Get-Content -Path $InFile | Select-Object -Skip 1 + +$Utf8NoBom = New-Object System.Text.UTF8Encoding $False + +if ($TargetOs -eq "windows") { + if ($firstLine -match "^#!pythonw") { + $pythonExe = "pythonw.exe" + } else { + $pythonExe = "python.exe" + } + # A Batch-Python polyglot. Batch executes the first line and exits, + # while Python (via -x) ignores the first line and executes the rest. + $wrapper = "@setlocal enabledelayedexpansion & `"%~dp0$pythonExe`" -x `"%~f0`" %* & exit /b !ERRORLEVEL!" + [System.IO.File]::WriteAllText($OutFile, $wrapper + "`r`n", $Utf8NoBom) +} else { + # A Shell-Python polyglot. The shell executes the triple-quoted 'exec' + # command, re-running the script with python3 from the scripts directory. + # Python ignores the triple-quoted string and continues. + $wrapper = @" +#!/bin/sh +'''exec' "`$(dirname "`$0")/python3" "`$0" "`$@" +' ''' +"@ + [System.IO.File]::WriteAllText($OutFile, $wrapper + "`n", $Utf8NoBom) +} + +if ($null -ne $content) { + [System.IO.File]::AppendAllLines($OutFile, [string[]]$content, $Utf8NoBom) +} diff --git a/python/private/pypi/venv_shebang_rewriter.sh b/python/private/pypi/venv_shebang_rewriter.sh new file mode 100755 index 0000000000..d4391d3352 --- /dev/null +++ b/python/private/pypi/venv_shebang_rewriter.sh @@ -0,0 +1,27 @@ +#!/bin/sh +set -eu + +IN="$1" +OUT="$2" +TARGET_OS="$3" + +FIRST_LINE=$(head -n 1 "$IN") + +if [ "$TARGET_OS" = "windows" ]; then + case "$FIRST_LINE" in + "#!pythonw"*) PYTHON_EXE="pythonw.exe" ;; + *) PYTHON_EXE="python.exe" ;; + esac + # A Batch-Python polyglot. Batch executes the first line and exits, + # while Python (via -x) ignores the first line and executes the rest. + printf "@setlocal enabledelayedexpansion & \"%%~dp0$PYTHON_EXE\" -x \"%%~f0\" %%* & exit /b !ERRORLEVEL!\r\n" > "$OUT" +else + printf "#!/bin/sh\n" > "$OUT" + # A Shell-Python polyglot. The shell executes the triple-quoted 'exec' + # command, re-running the script with python3 from the scripts directory. + # Python ignores the triple-quoted string and continues. + printf "'''exec' \"\$(dirname \"\$0\")/python3\" \"\$0\" \"\$@\"\n' '''\n" >> "$OUT" +fi + +tail -n +2 "$IN" >> "$OUT" +chmod +x "$OUT" diff --git a/python/private/pypi/whl_extract.bzl b/python/private/pypi/whl_extract.bzl index 506be05481..2ebb61a83a 100644 --- a/python/private/pypi/whl_extract.bzl +++ b/python/private/pypi/whl_extract.bzl @@ -20,22 +20,8 @@ def whl_extract(rctx, *, whl_path, logger): supports_whl_extraction = rp_config.supports_whl_extraction, ) - # Fix permissions on extracted files. Some wheels have files without read permissions set, - # which causes errors when trying to read them later. - os_name = repo_utils.get_platforms_os_name(rctx) - if os_name != "windows": - # On Unix-like systems, recursively add read permissions to all files - # and ensure directories are traversable (need execute permission) - result = repo_utils.execute_unchecked( - rctx, - op = "Fixing wheel permissions {}".format(whl_path), - arguments = ["chmod", "-R", "a+rX", str(install_dir_path)], - logger = logger, - ) - if result.return_code != 0: - # It's possible chmod is not available or the filesystem doesn't support it. - # This is fine, we just want to try to fix permissions if possible. - logger.warn(lambda: "Failed to fix file permissions: {}".format(result.stderr)) + _maybe_fix_permissions(rctx, whl_path = whl_path, logger = logger) + metadata_file = find_whl_metadata( install_dir = install_dir_path, logger = logger, @@ -70,17 +56,36 @@ def whl_extract(rctx, *, whl_path, logger): # The prefix does not exist in the wheel, we can continue continue - for (src, dest) in merge_trees(src, rctx.path(dest_prefix)): + dest_dir = rctx.path(dest_prefix) + repo_utils.mkdir(rctx, dest_dir) + for (src, dest) in merge_trees(src, dest_dir): logger.debug(lambda: "Renaming: {} -> {}".format(src, dest)) - rctx.rename(src, dest) - - # TODO @aignas 2025-12-16: when moving scripts to `bin`, rewrite the #!python - # shebang to be something else, for inspiration look at the hermetic - # toolchain wrappers + repo_utils.rename(rctx, src, dest) # Ensure that there is no data dir left rctx.delete(data_dir) +# TODO: This can be removed when Bazel 8.6+ is the minimum supported version. +def _maybe_fix_permissions(rctx, *, whl_path, logger): + # Fix permissions on extracted files. Some wheels have files without read permissions set, + # which causes errors when trying to read them later. + # We apply this to the root directory to ensure that everything in bin/, site-packages/, + # etc. is readable and executable where appropriate. + os_name = repo_utils.get_platforms_os_name(rctx) + if os_name != "windows": + # On Unix-like systems, recursively add read permissions to all files + # and ensure directories are traversable (need execute permission) + result = repo_utils.execute_unchecked( + rctx, + op = "Fixing wheel permissions {}".format(whl_path), + arguments = ["chmod", "-R", "a+rX", "."], + logger = logger, + ) + if result.return_code != 0: + # It's possible chmod is not available or the filesystem doesn't support it. + # This is fine, we just want to try to fix permissions if possible. + logger.warn(lambda: "Failed to fix file permissions: {}".format(result.stderr)) + def merge_trees(src, dest): """Merge src into the destination path. diff --git a/python/private/pypi/whl_installer/wheel.py b/python/private/pypi/whl_installer/wheel.py index 4987c915cc..801fd0f3b9 100644 --- a/python/private/pypi/whl_installer/wheel.py +++ b/python/private/pypi/whl_installer/wheel.py @@ -20,6 +20,41 @@ import installer +class DoNothingCm: + """A context manager that does nothing when written to.""" + + def __enter__(self): + return self + + def __exit__(self, *args): + pass + + def write(self, data): + pass + + +class NoEntryPointsSchemeDictionaryDestination( + installer.destinations.SchemeDictionaryDestination +): + """ + A custom destination that prevents the `installer` package from automatically + generating scripts for `console_scripts` entry points. + + rules_python handles entry points via its own `venv_entry_point` targets. + If `installer` also generates these scripts in the `bin/` directory, it + causes a target naming collision because `whl_library_targets.bzl` will + try to create a `venv_rewrite_shebang` target with the same name. + + By overriding `for_script` to return a no-op dummy writer, we silently + discard the generated entry point scripts while still allowing `installer` + to process the rest of the wheel normally (including `.data/scripts` which + we do want to keep). + """ + + def for_script(self, name, module, attribute): + return DoNothingCm() + + class Wheel: """Representation of the compressed .whl file""" @@ -50,10 +85,11 @@ def unzip(self, directory: str) -> None: "scripts": "/bin", "data": "/data", } - destination = installer.destinations.SchemeDictionaryDestination( + + destination = NoEntryPointsSchemeDictionaryDestination( installation_schemes, # TODO Should entry_point scripts also be handled by installer rather than custom code? - interpreter="/dev/null", + interpreter="python", script_kind="posix", destdir=directory, bytecode_optimization_levels=[], diff --git a/python/private/pypi/whl_library.bzl b/python/private/pypi/whl_library.bzl index 13a8e6ff8e..529514578d 100644 --- a/python/private/pypi/whl_library.bzl +++ b/python/private/pypi/whl_library.bzl @@ -28,7 +28,7 @@ load(":pep508_requirement.bzl", "requirement") load(":pypi_repo_utils.bzl", "pypi_repo_utils") load(":urllib.bzl", "urllib") load(":whl_extract.bzl", "whl_extract") -load(":whl_metadata.bzl", "whl_metadata") +load(":whl_metadata.bzl", "parse_entry_points", "whl_metadata") _CPPFLAGS = "CPPFLAGS" _COMMAND_LINE_TOOLS_PATH_SLUG = "commandlinetools" @@ -277,6 +277,36 @@ def _extract_whl_py(rctx, *, python_interpreter, args, whl_path, environment, lo logger = logger, ) +def _get_entry_points(rctx, install_dir_path, metadata): + dist_info_dir = "{}-{}.dist-info".format( + metadata.name.replace("-", "_"), + metadata.version.replace("-", "_"), + ) + entry_points_txt = install_dir_path.get_child(dist_info_dir).get_child("entry_points.txt") + if entry_points_txt.exists: + return parse_entry_points(rctx.read(entry_points_txt)) + return {} + +def _move_scripts_needing_shebang_rewrite(rctx, entry_points): + bin_dir = rctx.path("bin") + if not bin_dir.exists: + return + + ep_names = {name.lower(): True for name in entry_points} + for script in bin_dir.readdir(): + if script.is_dir: + continue + if script.basename.lower() in ep_names: + rctx.delete(script) + continue + if script.basename.endswith(".exe") or script.basename.endswith(".dll"): + continue + content = rctx.read(script) + if content.startswith("#!python"): + rewrite_bin_dir = rctx.path("rewrite-bin") + repo_utils.mkdir(rctx, rewrite_bin_dir) + repo_utils.rename(rctx, script, rctx.path("rewrite-bin/" + script.basename)) + def _to_purl(*, index, metadata, filename): """ Produce a PyPI PURL from the metadata. @@ -436,6 +466,9 @@ def _whl_library_impl(rctx): ) namespace_package_files = pypi_repo_utils.find_namespace_package_files(rctx, install_dir_path) + entry_points = _get_entry_points(rctx, install_dir_path, metadata) + _move_scripts_needing_shebang_rewrite(rctx, entry_points) + build_file_contents = generate_whl_library_build_bazel( name = whl_path.basename, sdist_filename = sdist_filename, @@ -455,6 +488,7 @@ def _whl_library_impl(rctx): group_name = rctx.attr.group_name, namespace_package_files = namespace_package_files, extras = requirement(rctx.attr.requirement).extras, + entry_points = entry_points, purl = _to_purl( index = rctx.attr.index_url, metadata = metadata, diff --git a/python/private/pypi/whl_library_targets.bzl b/python/private/pypi/whl_library_targets.bzl index 4ed66cdddc..b3a52cd18c 100644 --- a/python/private/pypi/whl_library_targets.bzl +++ b/python/private/pypi/whl_library_targets.bzl @@ -31,6 +31,8 @@ load( ) load(":namespace_pkgs.bzl", _create_inits = "create_inits") load(":pep508_deps.bzl", "deps") +load(":venv_entry_point.bzl", "venv_entry_point") +load(":venv_rewrite_shebang.bzl", "venv_rewrite_shebang") # Files that are special to the Bazel processing of things. _BAZEL_REPO_FILE_GLOBS = [ @@ -43,6 +45,7 @@ _BAZEL_REPO_FILE_GLOBS = [ ] _IS_VENV_SITE_PACKAGES_YES = Label("//python/config_settings:_is_venvs_site_packages_yes") +_VENV_SITE_PACKAGES_FLAG = Label("//python/config_settings:venvs_site_packages") def whl_library_targets_from_requires( *, @@ -51,6 +54,7 @@ def whl_library_targets_from_requires( metadata_version = "", requires_dist = [], extras = [], + entry_points = {}, include = [], group_deps = [], **kwargs): @@ -67,6 +71,7 @@ def whl_library_targets_from_requires( requires_dist: {type}`list[str]` The list of `Requires-Dist` values from the whl `METADATA`. extras: {type}`list[str]` The list of requested extras. This essentially includes extra transitive dependencies in the final targets depending on the wheel `METADATA`. + entry_points: {type}`list[dict]` A list of parsed entry point definitions. include: {type}`list[str]` The list of packages to include. **kwargs: Extra args passed to the {obj}`whl_library_targets` """ @@ -82,6 +87,7 @@ def whl_library_targets_from_requires( name = name, dependencies = package_deps.deps, dependencies_with_markers = package_deps.deps_select, + entry_points = entry_points, tags = [ "pypi_name={}".format(metadata_name), "pypi_version={}".format(metadata_version), @@ -116,6 +122,7 @@ def whl_library_targets( filegroups = None, dependencies_by_platform = {}, dependencies_with_markers = {}, + entry_points = {}, group_deps = [], group_name = "", data = [], @@ -128,6 +135,8 @@ def whl_library_targets( copy_file = copy_file, py_binary = py_binary, py_library = py_library, + venv_entry_point = venv_entry_point, + venv_rewrite_shebang = venv_rewrite_shebang, env_marker_setting = env_marker_setting, create_inits = _create_inits, )): @@ -146,6 +155,7 @@ def whl_library_targets( dependencies by platform key. dependencies_with_markers: {type}`dict[str, str]` A marker to evaluate in order for the dep to be included. + entry_points: {type}`list[dict]` A list of parsed entry point definitions. filegroups: {type}`dict[str, list[str]] | None` A dictionary of the target names and the glob matches. If `None`, defaults will be used. group_name: {type}`str` name of the dependency group (if any) which @@ -180,6 +190,36 @@ def whl_library_targets( tags = sorted(tags) data = [] + data + bins_for_data_label = [] + + for ep_dict in entry_points.values(): + kwargs = dict(ep_dict) + ep_name = kwargs.pop("name") + ep_target_name = "bin/{}".format(ep_name) + rules.venv_entry_point( + name = ep_target_name, + **kwargs + ) + bins_for_data_label.append(ep_target_name) + data.append(ep_target_name) + + existing_bin_names = {ep["name"].lower(): None for ep in entry_points.values()} + for p in native.glob(["bin/*"], allow_empty = True): + existing_bin_names[p[len("bin/"):].lower()] = None + + for src_path in native.glob(["rewrite-bin/*"], allow_empty = True): + script_name = src_path[len("rewrite-bin/"):] + if script_name.lower() in existing_bin_names: + continue + rewrite_target_name = "bin/{}".format(script_name) + rules.venv_rewrite_shebang( + name = rewrite_target_name, + src = src_path, + package = name, + ) + bins_for_data_label.append(rewrite_target_name) + data.append(rewrite_target_name) + if filegroups == None: filegroups = { EXTRACTED_WHEEL_FILES: dict( @@ -199,9 +239,12 @@ def whl_library_targets( for filegroup_name, glob_kwargs in filegroups.items(): glob_kwargs = {"allow_empty": True} | glob_kwargs + srcs = native.glob(**glob_kwargs) + if filegroup_name == DATA_LABEL: + srcs = srcs + bins_for_data_label native.filegroup( name = filegroup_name, - srcs = native.glob(**glob_kwargs), + srcs = srcs, visibility = ["//visibility:public"], ) @@ -383,7 +426,7 @@ def whl_library_targets( ), tags = tags, visibility = impl_vis, - experimental_venvs_site_packages = Label("@rules_python//python/config_settings:venvs_site_packages"), + experimental_venvs_site_packages = _VENV_SITE_PACKAGES_FLAG, namespace_package_files = namespace_package_files, ) diff --git a/python/private/pypi/whl_metadata.bzl b/python/private/pypi/whl_metadata.bzl index 002e5773cc..2981a5d92f 100644 --- a/python/private/pypi/whl_metadata.bzl +++ b/python/private/pypi/whl_metadata.bzl @@ -111,3 +111,58 @@ def find_whl_metadata(*, install_dir, logger): else: logger.fail("The '*.dist-info' directory could not be found in '{}'".format(install_dir.basename)) return None + +def parse_entry_points(contents): + """Parses entry_points.txt contents and returns console_scripts and gui_scripts entries. + + Args: + contents: {type}`str` The contents of the entry_points.txt file. + + Returns: + {type}`dict[str, dict]` A dict keyed by the original entry point name. + """ + entries = {} + seen_lower_names = {} + current_group = None + current_group_lower = None + for line in contents.splitlines(): + line = line.strip() + if not line or line.startswith("#"): + continue + if line.startswith("[") and line.endswith("]"): + current_group = line[1:-1].strip() + current_group_lower = current_group.lower() + continue + + if current_group_lower in ("console_scripts", "gui_scripts"): + name, _, ref = line.partition("=") + name = name.strip() + + # Names are case-insensitive. + # See https://packaging.python.org/en/latest/specifications/entry-points/#data-model + # Entry points must be unique for a given name because they turn + # into files and may be on a case-insensitive file system. + lower_name = name.lower() + if lower_name in seen_lower_names: + continue + seen_lower_names[lower_name] = True + + # remove inline comments + ref, _, _ = ref.partition("#") + ref = ref.strip() + + extras = "" + if "[" in ref and ref.endswith("]"): + ref, _, extras_part = ref.partition("[") + extras = extras_part[:-1].strip() + ref = ref.strip() + + module, _, attribute = ref.partition(":") + entries[name] = { + "attribute": attribute.strip(), + "extras": extras, + "group": current_group, + "module": module.strip(), + "name": name, + } + return entries diff --git a/python/private/repo_utils.bzl b/python/private/repo_utils.bzl index 7ec45eda5b..ae2fc2e5d0 100644 --- a/python/private/repo_utils.bzl +++ b/python/private/repo_utils.bzl @@ -522,6 +522,38 @@ def _extract(mrctx, *, archive, supports_whl_extraction = False, **kwargs): if not mrctx.delete(archive): fail("Failed to remove the symlink after extracting") +def _rename(mrctx, src, dest): + """Rename a file or directory. + + TODO: remove when the earliest supported bazel version is at least 8.0. + + Args: + mrctx: module_ctx or repository_ctx object + src: {type}`path` the source path + dest: {type}`path` the destination path + """ + if hasattr(mrctx, "rename"): + mrctx.rename(src, dest) + return + + # Fallback for Bazel < 8.0 + os_name = _get_platforms_os_name(mrctx) + if os_name == "windows": + # On Windows, we use `cmd.exe /c move` to rename files/directories. + # We need to use backslashes for the paths. + res = mrctx.execute([ + "cmd.exe", + "/c", + "move", + str(src).replace("/", "\\"), + str(dest).replace("/", "\\"), + ]) + else: + res = mrctx.execute(["mv", str(src), str(dest)]) + + if res.return_code != 0: + fail("Failed to rename {} to {}: {}".format(src, dest, res.stderr)) + repo_utils = struct( # keep sorted execute_checked = _execute_checked, @@ -536,6 +568,7 @@ repo_utils = struct( norm_path = _norm_path, relative_to = _relative_to, is_relative_to = _is_relative_to, + rename = _rename, repo_root_relative_path = _repo_root_relative_path, which_checked = _which_checked, which_unchecked = _which_unchecked, diff --git a/python/private/text_util.bzl b/python/private/text_util.bzl index 28979d8981..eaccadf970 100644 --- a/python/private/text_util.bzl +++ b/python/private/text_util.bzl @@ -157,9 +157,33 @@ def _left_pad_zero(index, length): fail("index must be non-negative") return ("0" * length + str(index))[-length:] +def _render_dict_dict(d): + """Render a dict[str, dict] value without recursive function calls.""" + if not d: + return "{}" + + lines = ["{"] + for k, v in d.items(): + if not v: + v_str = "{}" + else: + inner_lines = ["{"] + for ik, iv in v.items(): + inner_lines.append(_indent("{}: {},".format(repr(ik), repr(iv)))) + inner_lines.append("}") + v_str = "\n".join(inner_lines) + + # We need to correctly indent the multi-line string v_str + # but _indent acts on every line except the first if not carefully handled. + # It's easier to just do: + lines.append(_indent("{}: {},".format(repr(k), v_str))) + lines.append("}") + return "\n".join(lines) + render = struct( alias = _render_alias, dict = _render_dict, + dict_dict = _render_dict_dict, call = _render_call, hanging_indent = _hanging_indent, indent = _indent, diff --git a/python/private/venv_runfiles.bzl b/python/private/venv_runfiles.bzl index a94f29f71c..45d4d24848 100644 --- a/python/private/venv_runfiles.bzl +++ b/python/private/venv_runfiles.bzl @@ -521,7 +521,7 @@ def get_venv_symlinks( venv_symlinks[venv_path] = VenvSymlinkEntry( kind = kind, link_to_path = link_to_path, - link_to_file = None, + link_to_file = files[0] if kind == VenvSymlinkKind.BIN and len(files) == 1 else None, package = package, version = version_str, venv_path = out_venv_path, diff --git a/tests/pypi/whl_library_targets/whl_library_targets_tests.bzl b/tests/pypi/whl_library_targets/whl_library_targets_tests.bzl index 60e1f3f3dd..ec28bfbb39 100644 --- a/tests/pypi/whl_library_targets/whl_library_targets_tests.bzl +++ b/tests/pypi/whl_library_targets/whl_library_targets_tests.bzl @@ -30,6 +30,8 @@ def _test_filegroups(env): def glob(include, *, exclude = [], allow_empty): _ = exclude # @unused env.expect.that_bool(allow_empty).equals(True) + if include == ["rewrite-bin/*"] or include == ["bin/*"]: + return [] return include whl_library_targets( @@ -39,7 +41,9 @@ def _test_filegroups(env): filegroup = lambda **kwargs: calls.append(kwargs), glob = glob, ), - rules = struct(), + rules = struct( + venv_rewrite_shebang = lambda **kwargs: None, + ), ) env.expect.that_collection(calls, expr = "filegroup calls").contains_exactly([ @@ -85,8 +89,11 @@ def _test_platforms(env): filegroups = {}, native = struct( config_setting = lambda **kwargs: calls.append(kwargs), + glob = lambda *args, **kwargs: [], + ), + rules = struct( + venv_rewrite_shebang = lambda **kwargs: None, ), - rules = struct(), ) env.expect.that_collection(calls).contains_exactly([ @@ -134,9 +141,12 @@ def _test_copy(env): filegroups = {}, copy_files = {"file_src": "file_dest"}, copy_executables = {"exec_src": "exec_dest"}, - native = struct(), + native = struct( + glob = lambda *args, **kwargs: [], + ), rules = struct( copy_file = lambda **kwargs: calls.append(kwargs), + venv_rewrite_shebang = lambda **kwargs: None, ), ) @@ -165,9 +175,11 @@ def _test_whl_and_library_deps_from_requires(env): m_glob = mocks.glob() - m_glob.results.append(["site-packages/foo/SRCS.py"]) - m_glob.results.append(["site-packages/foo/DATA.txt"]) - m_glob.results.append(["site-packages/foo/PYI.pyi"]) + m_glob.results.append([]) # bin + m_glob.results.append([]) # rewrite-bin + m_glob.results.append(["site-packages/foo/SRCS.py"]) # srcs + m_glob.results.append(["site-packages/foo/DATA.txt"]) # data + m_glob.results.append(["site-packages/foo/PYI.pyi"]) # pyi whl_library_targets_from_requires( name = "foo-0-py3-none-any.whl", @@ -193,6 +205,7 @@ def _test_whl_and_library_deps_from_requires(env): py_library = lambda **kwargs: py_library_calls.append(kwargs), env_marker_setting = lambda **kwargs: env_marker_setting_calls.append(kwargs), create_inits = lambda *args, **kwargs: ["_create_inits_target"], + venv_rewrite_shebang = lambda **kwargs: None, ), ) @@ -236,6 +249,16 @@ def _test_whl_and_library_deps_from_requires(env): }) # buildifier: @unsorted-dict-items env.expect.that_collection(m_glob.calls).contains_exactly([ + # bin call + mocks.glob_call( + ["bin/*"], + allow_empty = True, + ), + # rewrite-bin call + mocks.glob_call( + ["rewrite-bin/*"], + allow_empty = True, + ), # srcs call mocks.glob_call( ["site-packages/**/*.py"], @@ -271,6 +294,8 @@ def _test_whl_and_library_deps(env): filegroup_calls = [] py_library_calls = [] m_glob = mocks.glob() + m_glob.results.append([]) # bin + m_glob.results.append([]) # rewrite-bin m_glob.results.append(["site-packages/foo/SRCS.py"]) m_glob.results.append(["site-packages/foo/DATA.txt"]) m_glob.results.append(["site-packages/foo/PYI.pyi"]) @@ -300,6 +325,7 @@ def _test_whl_and_library_deps(env): rules = struct( py_library = lambda **kwargs: py_library_calls.append(kwargs), create_inits = lambda **kwargs: ["_create_inits_target"], + venv_rewrite_shebang = lambda **kwargs: None, ), ) @@ -369,6 +395,8 @@ def _test_group(env): py_library_calls = [] m_glob = mocks.glob() + m_glob.results.append([]) # bin + m_glob.results.append([]) # rewrite-bin m_glob.results.append(["site-packages/foo/srcs.py"]) m_glob.results.append(["site-packages/foo/data.txt"]) m_glob.results.append(["site-packages/foo/pyi.pyi"]) @@ -396,6 +424,7 @@ def _test_group(env): rules = struct( py_library = lambda **kwargs: py_library_calls.append(kwargs), create_inits = lambda **kwargs: ["_create_inits_target"], + venv_rewrite_shebang = lambda **kwargs: None, ), ) @@ -435,6 +464,8 @@ def _test_group(env): }) # buildifier: @unsorted-dict-items env.expect.that_collection(m_glob.calls, expr = "glob calls").contains_exactly([ + mocks.glob_call(["bin/*"], allow_empty = True), + mocks.glob_call(["rewrite-bin/*"], allow_empty = True), mocks.glob_call(["site-packages/**/*.py"], exclude = [], allow_empty = True), mocks.glob_call(["site-packages/**/*"], exclude = [ "**/*.py", diff --git a/tests/pypi/whl_metadata/whl_metadata_tests.bzl b/tests/pypi/whl_metadata/whl_metadata_tests.bzl index 329423a26c..8131b0f452 100644 --- a/tests/pypi/whl_metadata/whl_metadata_tests.bzl +++ b/tests/pypi/whl_metadata/whl_metadata_tests.bzl @@ -5,6 +5,7 @@ load("@rules_testing//lib:truth.bzl", "subjects") load( "//python/private/pypi:whl_metadata.bzl", "find_whl_metadata", + "parse_entry_points", "parse_whl_metadata", ) # buildifier: disable=bzl-visibility @@ -171,6 +172,80 @@ Requires-Dist: this will be ignored _tests.append(_test_parse_metadata_multiline_license) +def _test_parse_entry_points(env): + got = parse_entry_points("""\ +[something] +interesting # with comments + +[console_scripts] +foo = foomod:main +# One which depends on extras: +foobar = importable.foomod:main_bar [bar, baz] + + # With a comment at the end +foobarbaz = foomod:main.attr # comment + +# With extra and comment +foo_extra_comment = foomod:main [extra] # comment + +[something else] +not very much interesting +""") + env.expect.that_dict(got).contains_exactly({ + "foo": { + "attribute": "main", + "extras": "", + "group": "console_scripts", + "module": "foomod", + "name": "foo", + }, + "foo_extra_comment": { + "attribute": "main", + "extras": "extra", + "group": "console_scripts", + "module": "foomod", + "name": "foo_extra_comment", + }, + "foobar": { + "attribute": "main_bar", + "extras": "bar, baz", + "group": "console_scripts", + "module": "importable.foomod", + "name": "foobar", + }, + "foobarbaz": { + "attribute": "main.attr", + "extras": "", + "group": "console_scripts", + "module": "foomod", + "name": "foobarbaz", + }, + }) + +_tests.append(_test_parse_entry_points) + +def _test_parse_entry_points_deduplicate(env): + got = parse_entry_points("""\ +[console_scripts] +FooBar = foomod:main +foobar = othermod:main +fooBAR = another:main + +[gui_scripts] +FOOBAR = guimod:main +""") + env.expect.that_dict(got).contains_exactly({ + "FooBar": { + "attribute": "main", + "extras": "", + "group": "console_scripts", + "module": "foomod", + "name": "FooBar", + }, + }) + +_tests.append(_test_parse_entry_points_deduplicate) + def whl_metadata_test_suite(name): # buildifier: disable=function-docstring test_suite( name = name, diff --git a/tests/repos/whl_with_data1/BUILD.bazel b/tests/repos/whl_with_data1/BUILD.bazel index af49d1ebbf..7ef8ba4cd9 100644 --- a/tests/repos/whl_with_data1/BUILD.bazel +++ b/tests/repos/whl_with_data1/BUILD.bazel @@ -1 +1 @@ -exports_files(glob(["*"])) +exports_files(glob(["**"])) diff --git a/tests/repos/whl_with_data1/whl_with_data1-1.0.data/scripts/whl_with_data1_pythonw b/tests/repos/whl_with_data1/whl_with_data1-1.0.data/scripts/whl_with_data1_pythonw new file mode 100755 index 0000000000..6c7b3434c5 --- /dev/null +++ b/tests/repos/whl_with_data1/whl_with_data1-1.0.data/scripts/whl_with_data1_pythonw @@ -0,0 +1,9 @@ +#!pythonw +import sys + +# On Windows, pythonw doesn't have stdout/stderr streams, +# so output has to be written to a file. +with open(sys.argv[1], "w") as fp: + fp.write("hello from whl_with_data1_pythonw\n") + fp.write(sys.executable) + fp.write("\n") diff --git a/tests/repos/whl_with_data1/whl_with_data1-1.0.data/scripts/whl_with_data1_script b/tests/repos/whl_with_data1/whl_with_data1-1.0.data/scripts/whl_with_data1_script new file mode 100755 index 0000000000..8af40a9a55 --- /dev/null +++ b/tests/repos/whl_with_data1/whl_with_data1-1.0.data/scripts/whl_with_data1_script @@ -0,0 +1,5 @@ +#!python +import sys + +print("hello from whl_with_data1_script") +print(sys.executable) diff --git a/tests/repos/whl_with_data1/whl_with_data1-1.0.dist-info/RECORD b/tests/repos/whl_with_data1/whl_with_data1-1.0.dist-info/RECORD index a39e9ed7ad..10307c76a0 100644 --- a/tests/repos/whl_with_data1/whl_with_data1-1.0.dist-info/RECORD +++ b/tests/repos/whl_with_data1/whl_with_data1-1.0.dist-info/RECORD @@ -1,4 +1,5 @@ whl_with_data1-1.0.data/platlib/whl_with_data1/platlib_file.txt,sha256=123,123 +whl_with_data1-1.0.data/scripts/whl_with_data1_script,sha256=123,123 whl_with_data1-1.0.data/scripts/whl_script.sh,sha256=123,123 whl_with_data1-1.0.data/headers/whl_with_data1/header_file.h,sha256=123,123 whl_with_data1-1.0.data/purelib/whl_with_data1/data_file.txt,sha256=123,123 diff --git a/tests/repos/whl_with_data2/whl_with_data2-1.0.data/purelib/whl_with_data2/__init__.py b/tests/repos/whl_with_data2/whl_with_data2-1.0.data/purelib/whl_with_data2/__init__.py index e69de29bb2..45132c14d7 100644 --- a/tests/repos/whl_with_data2/whl_with_data2-1.0.data/purelib/whl_with_data2/__init__.py +++ b/tests/repos/whl_with_data2/whl_with_data2-1.0.data/purelib/whl_with_data2/__init__.py @@ -0,0 +1,6 @@ +import sys + + +def main(): + print("hello from whl_with_data2_bin") + print(sys.executable) diff --git a/tests/repos/whl_with_data2/whl_with_data2-1.0.dist-info/RECORD b/tests/repos/whl_with_data2/whl_with_data2-1.0.dist-info/RECORD index 5eeb915ba7..55c70740c8 100644 --- a/tests/repos/whl_with_data2/whl_with_data2-1.0.dist-info/RECORD +++ b/tests/repos/whl_with_data2/whl_with_data2-1.0.dist-info/RECORD @@ -10,3 +10,5 @@ whl_with_data2-1.0.data/scripts/overlap/both.sh,sha256=123,123 whl_with_data2-1.0.data/scripts/overlap/script2.sh,sha256=123,123 whl_with_data2-1.0.data/headers/overlap/both.h,sha256=123,123 whl_with_data2-1.0.data/headers/overlap/header2.h,sha256=123,123 +whl_with_data2-1.0.data/purelib/whl_with_data2/__init__.py,sha256=123,123 +whl_with_data2-1.0.dist-info/entry_points.txt,sha256=123,123 diff --git a/tests/repos/whl_with_data2/whl_with_data2-1.0.dist-info/entry_points.txt b/tests/repos/whl_with_data2/whl_with_data2-1.0.dist-info/entry_points.txt new file mode 100644 index 0000000000..8389a8a826 --- /dev/null +++ b/tests/repos/whl_with_data2/whl_with_data2-1.0.dist-info/entry_points.txt @@ -0,0 +1,2 @@ +[console_scripts] +whl_with_data2_bin = whl_with_data2:main diff --git a/tests/venv_site_packages_libs/BUILD.bazel b/tests/venv_site_packages_libs/BUILD.bazel index c99426b375..256c4f24b5 100644 --- a/tests/venv_site_packages_libs/BUILD.bazel +++ b/tests/venv_site_packages_libs/BUILD.bazel @@ -1,3 +1,4 @@ +load("@rules_python_internal//:rules_python_config.bzl", rp_config = "config") load("@rules_shell//shell:sh_test.bzl", "sh_test") load("//python:py_library.bzl", "py_library") load("//tests/support:py_reconfig.bzl", "py_reconfig_test") @@ -70,3 +71,21 @@ py_reconfig_test( ], }), ) + +py_reconfig_test( + name = "whl_scripts_runnable_test", + srcs = ["whl_scripts_runnable_test.py"], + bootstrap_impl = select({ + "@platforms//os:windows": "system_python", + "//conditions:default": "script", + }), + env = { + "BAZEL_8_OR_LATER": "1" if rp_config.bazel_8_or_later else "0", + }, + main = "whl_scripts_runnable_test.py", + venvs_site_packages = "yes", + deps = [ + "@whl_with_data1//:pkg", + "@whl_with_data2//:pkg", + ], +) diff --git a/tests/venv_site_packages_libs/whl_scripts_runnable_test.py b/tests/venv_site_packages_libs/whl_scripts_runnable_test.py new file mode 100644 index 0000000000..61b477f493 --- /dev/null +++ b/tests/venv_site_packages_libs/whl_scripts_runnable_test.py @@ -0,0 +1,122 @@ +import os +import subprocess +import sys +import tempfile +import unittest +from pathlib import Path + +BAZEL_8_OR_LATER = bool(int(os.environ.get("BAZEL_8_OR_LATER", "0"))) + + +class WhlScriptsRunnableTest(unittest.TestCase): + maxDiff = None + + def _get_script_path(self, name): + is_windows = sys.platform == "win32" + if is_windows: + bin_dir = Path(sys.prefix) / "Scripts" + pathexts = os.environ.get("PATHEXT", ".COM;.EXE;.BAT;.CMD").split(";") + for ext in [""] + [e.lower() for e in pathexts]: + script_path = bin_dir / f"{name}{ext}" + if script_path.exists(): + return script_path + return bin_dir / name + else: + bin_dir = Path(sys.prefix) / "bin" + script_path = bin_dir / name + return script_path + + def test_script_is_runnable(self): + script_path = self._get_script_path("whl_with_data1_script") + self.assertTrue(script_path.exists(), f"Script not found at {script_path}") + + result = subprocess.run( + [str(script_path)], + capture_output=True, + text=True, + check=True, + ) + + output = result.stdout.splitlines() + self.assertIn("hello from whl_with_data1_script", output) + + # The script prints sys.executable as its second line + # Depending on how it's invoked, it might have more output, + # but the user said it prints the hello message AND sys.executable. + script_executable = output[-1].strip() + self.assertEqual(script_executable, sys.executable) + + def test_entry_point_is_runnable(self): + script_path = self._get_script_path("whl_with_data2_bin") + self.assertTrue(script_path.exists(), f"Entry point not found at {script_path}") + + result = subprocess.run( + [str(script_path)], + capture_output=True, + text=True, + check=True, + ) + + output = result.stdout.splitlines() + self.assertIn("hello from whl_with_data2_bin", output) + + script_executable = output[-1].strip() + self.assertEqual(script_executable, sys.executable) + + # This should really check for 8.5 instead of 8+, but we test with 8.6 + # so it's close enough for our purposes. + @unittest.skipUnless( + BAZEL_8_OR_LATER, + "bazel 8.5 and lower uses wheel.py, which rewrites #!pythonw to #!python", + ) + def test_pythonw_script(self): + script_path = self._get_script_path("whl_with_data1_pythonw") + self.assertTrue(script_path.exists(), f"Script not found at {script_path}") + + with open(script_path, "r", encoding="utf-8") as f: + first_line = f.readline() + + is_windows = sys.platform == "win32" + if is_windows: + # On Windows, the shebang is replaced with a batch wrapper that + # invokes the interpreter. + self.assertIn("pythonw.exe", first_line) + self.assertTrue( + first_line.startswith("@setlocal") + or first_line.startswith("@echo off"), + f"Expected Windows batch wrapper, got {first_line}", + ) + else: + self.assertTrue( + first_line.startswith("#!/bin/sh"), + f"Expected #!/bin/sh, got {first_line}", + ) + + # For some reason, on Windows, the subprocess can't write + # to the temporary files unless mkstemp is used. + temp_fd, temp_str = tempfile.mkstemp() + try: + os.close(temp_fd) + out_path = Path(temp_str) + result = subprocess.run( + [str(script_path), str(out_path)], + capture_output=True, + text=True, + check=True, + ) + output = out_path.read_text().splitlines() + finally: + os.unlink(temp_str) + self.assertIn("hello from whl_with_data1_pythonw", output) + + script_executable = output[-1].strip() + + if is_windows: + self.assertTrue( + script_executable.endswith("pythonw.exe"), + f"Expected pythonw.exe, got {script_executable}", + ) + + +if __name__ == "__main__": + unittest.main()