Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat: detect architecture/libc from wheel #548

Open
wants to merge 2 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -65,3 +65,6 @@ wheelhoust-*
tests/integration/testpackage/testpackage/testprogram
tests/integration/testpackage/testpackage/testprogram_nodeps
tests/integration/sample_extension/src/sample_extension.c

# Downloaded by test script
tests/integration/patchelf-0.17.2.1-py2.py3-none-manylinux_2_5_x86_64.manylinux1_x86_64.musllinux_1_1_x86_64.whl
14 changes: 14 additions & 0 deletions noxfile.py
Original file line number Diff line number Diff line change
Expand Up @@ -62,7 +62,21 @@ def tests(session: nox.Session) -> None:
"""
posargs = session.posargs
extras = "coverage" if RUNNING_CI else "test"
session.install("-U", "pip")
session.install("-e", f".[{extras}]")
# for tests/integration/test_bundled_wheels.py::test_analyze_wheel_abi_static_exe
session.run(
"pip",
"download",
"--only-binary",
":all:",
"--no-deps",
"--platform",
"manylinux1_x86_64",
"-d",
"./tests/integration/",
"patchelf==0.17.2.1",
)
if RUNNING_CI:
posargs.extend(["--cov", "auditwheel", "--cov-branch"])
# pull manylinux images that will be used.
Expand Down
2 changes: 1 addition & 1 deletion src/auditwheel/architecture.py
Original file line number Diff line number Diff line change
Expand Up @@ -49,7 +49,7 @@ def is_superset(self, other: Architecture) -> bool:
return other.is_subset(self)

@staticmethod
def get_native_architecture(*, bits: int | None = None) -> Architecture:
def detect(*, bits: int | None = None) -> Architecture:
machine = platform.machine()
if sys.platform.startswith("win"):
machine = {"AMD64": "x86_64", "ARM64": "aarch64", "x86": "i686"}.get(
Expand Down
33 changes: 32 additions & 1 deletion src/auditwheel/error.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,8 +2,39 @@


class AuditwheelException(Exception):
pass
def __init__(self, msg: str):
super().__init__(msg)

@property
def message(self) -> str:
assert isinstance(self.args[0], str)
return self.args[0]


class InvalidLibc(AuditwheelException):
pass


class WheelToolsError(AuditwheelException):
pass


class NonPlatformWheel(AuditwheelException):
"""No ELF binaries in the wheel"""

def __init__(self, architecture: str | None, libraries: list[str] | None) -> None:
if architecture is None or not libraries:
msg = (
"This does not look like a platform wheel, no ELF executable "
"or shared library file (including compiled Python C extension) "
"found in the wheel archive"
)
else:
libraries_str = "\n\t".join(libraries)
msg = (
"Invalid binary wheel: no ELF executable or shared library file "
"(including compiled Python C extension) with a "
f"{architecture!r} architecure found. The following "
f"ELF files were found:\n\t{libraries_str}\n"
)
super().__init__(msg)
62 changes: 41 additions & 21 deletions src/auditwheel/lddtree.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,8 @@
from elftools.elf.sections import NoteSection

from .architecture import Architecture
from .libc import Libc, get_libc
from .error import InvalidLibc
from .libc import Libc

log = logging.getLogger(__name__)
__all__ = ["DynamicExecutable", "DynamicLibrary", "ldd"]
Expand Down Expand Up @@ -80,6 +81,7 @@
@dataclass(frozen=True)
class DynamicExecutable:
interpreter: str | None
libc: Libc | None
path: str
realpath: Path
platform: Platform
Expand Down Expand Up @@ -295,7 +297,9 @@


@functools.lru_cache
def load_ld_paths(root: str = "/", prefix: str = "") -> dict[str, list[str]]:
def load_ld_paths(
libc: Libc | None, root: str = "/", prefix: str = ""
) -> dict[str, list[str]]:
"""Load linker paths from common locations

This parses the ld.so.conf and LD_LIBRARY_PATH env var.
Expand Down Expand Up @@ -323,7 +327,6 @@
# on a per-ELF basis so it can get turned into the right thing.
ldpaths["env"] = parse_ld_paths(env_ldpath, path="")

libc = get_libc()
if libc == Libc.MUSL:
# from https://git.musl-libc.org/cgit/musl/tree/ldso
# /dynlink.c?id=3f701faace7addc75d16dea8a6cd769fa5b3f260#n1063
Expand Down Expand Up @@ -436,31 +439,43 @@
},
}
"""
if not ldpaths:
ldpaths = load_ld_paths().copy()

_first = _all_libs is None
if _all_libs is None:
_all_libs = {}

log.debug("ldd(%s)", path)

interpreter: str | None = None
libc: Libc | None = None
needed: set[str] = set()
rpaths: list[str] = []
runpaths: list[str] = []

with open(path, "rb") as f:
elf = ELFFile(f)

# get the platform
platform = _get_platform(elf)

# If this is the first ELF, extract the interpreter.
if _first:
for segment in elf.iter_segments():
if segment.header.p_type != "PT_INTERP":
continue

interp = segment.get_interp_name()
log.debug(" interp = %s", interp)
interpreter = normpath(root + interp)
soname = os.path.basename(interpreter)
_all_libs[soname] = DynamicLibrary(
soname,
interpreter,
Path(readlink(interpreter, root, prefixed=True)),
platform,
)
# if we have an interpreter and it's not MUSL, assume GLIBC
libc = Libc.MUSL if soname.startswith("ld-musl-") else Libc.GLIBC
if ldpaths is None:
ldpaths = load_ld_paths(libc).copy()
# XXX: Should read it and scan for /lib paths.
ldpaths["interp"] = [
normpath(root + os.path.dirname(interp)),
Expand All @@ -471,14 +486,10 @@
log.debug(" ldpaths[interp] = %s", ldpaths["interp"])
break

# get the platform
platform = _get_platform(elf)

# Parse the ELF's dynamic tags.
for segment in elf.iter_segments():
if segment.header.p_type != "PT_DYNAMIC":
continue

for t in segment.iter_tags():
if t.entry.d_tag == "DT_RPATH":
rpaths = parse_ld_paths(t.rpath, path=str(path), root=root)
Expand All @@ -497,14 +508,31 @@
del elf

if _first:
# get the libc based on dependencies
for soname in needed:
if soname.startswith(("libc.musl-", "ld-musl-")):
if libc is None:
libc = Libc.MUSL
if libc != Libc.MUSL:
msg = f"found a dependency on MUSL but the libc is already set to {libc}"
raise InvalidLibc(msg)

Check warning on line 518 in src/auditwheel/lddtree.py

View check run for this annotation

Codecov / codecov/patch

src/auditwheel/lddtree.py#L517-L518

Added lines #L517 - L518 were not covered by tests
elif soname == "libc.so.6" or soname.startswith(("ld-linux-", "ld64.so.")):
if libc is None:
libc = Libc.GLIBC
if libc != Libc.GLIBC:
msg = f"found a dependency on GLIBC but the libc is already set to {libc}"
raise InvalidLibc(msg)

Check warning on line 524 in src/auditwheel/lddtree.py

View check run for this annotation

Codecov / codecov/patch

src/auditwheel/lddtree.py#L523-L524

Added lines #L523 - L524 were not covered by tests
if ldpaths is None:
ldpaths = load_ld_paths(libc).copy()
# Propagate the rpaths used by the main ELF since those will be
# used at runtime to locate things.
ldpaths["rpath"] = rpaths
ldpaths["runpath"] = runpaths
log.debug(" ldpaths[rpath] = %s", rpaths)
log.debug(" ldpaths[runpath] = %s", runpaths)

# Search for the libs this ELF uses.
assert ldpaths is not None

all_ldpaths = (
ldpaths["rpath"]
+ rpaths
Expand Down Expand Up @@ -541,17 +569,9 @@
dependency.needed,
)

if interpreter is not None:
soname = os.path.basename(interpreter)
_all_libs[soname] = DynamicLibrary(
soname,
interpreter,
Path(readlink(interpreter, root, prefixed=True)),
platform,
)

return DynamicExecutable(
interpreter,
libc,
str(path) if display is None else display,
path,
platform,
Expand Down
80 changes: 72 additions & 8 deletions src/auditwheel/libc.py
Original file line number Diff line number Diff line change
@@ -1,24 +1,88 @@
from __future__ import annotations

import logging
import os
import re
import subprocess
from dataclasses import dataclass
from enum import IntEnum
from pathlib import Path

from .error import InvalidLibc
from .musllinux import find_musl_libc

logger = logging.getLogger(__name__)


@dataclass(frozen=True, order=True)
class LibcVersion:
major: int
minor: int


class Libc(IntEnum):
GLIBC = (1,)
MUSL = (2,)

def get_current_version(self) -> LibcVersion:
if self == Libc.MUSL:
return _get_musl_version(_find_musl_libc())
return _get_glibc_version()

@staticmethod
def detect() -> Libc:
# check musl first, default to GLIBC
try:
_find_musl_libc()
logger.debug("Detected musl libc")
return Libc.MUSL
except InvalidLibc:
logger.debug("Falling back to GNU libc")
return Libc.GLIBC


def _find_musl_libc() -> Path:
try:
(dl_path,) = list(Path("/lib").glob("libc.musl-*.so.1"))
except ValueError:
msg = "musl libc not detected"
logger.debug("%s", msg)
raise InvalidLibc(msg) from None

return dl_path


def get_libc() -> Libc:
def _get_musl_version(ld_path: Path) -> LibcVersion:
try:
find_musl_libc()
logger.debug("Detected musl libc")
return Libc.MUSL
except InvalidLibc:
logger.debug("Falling back to GNU libc")
return Libc.GLIBC
ld = subprocess.run(
[ld_path], check=False, errors="strict", stderr=subprocess.PIPE
).stderr
except FileNotFoundError as err:
msg = "failed to determine musl version"
logger.exception("%s", msg)
raise InvalidLibc(msg) from err

match = re.search(r"Version (?P<major>\d+).(?P<minor>\d+).(?P<patch>\d+)", ld)
if not match:
msg = f"failed to parse musl version from string {ld!r}"
raise InvalidLibc(msg) from None

return LibcVersion(int(match.group("major")), int(match.group("minor")))


def _get_glibc_version() -> LibcVersion:
# CS_GNU_LIBC_VERSION is only for glibc and shall return e.g. "glibc 2.3.4"
try:
version_string: str | None = os.confstr("CS_GNU_LIBC_VERSION")
assert version_string is not None
_, version = version_string.rsplit()
except (AssertionError, AttributeError, OSError, ValueError) as err:
# os.confstr() or CS_GNU_LIBC_VERSION not available (or a bad value)...
msg = "failed to determine glibc version"
raise InvalidLibc(msg) from err

m = re.match(r"(?P<major>[0-9]+)\.(?P<minor>[0-9]+)", version)
if not m:
msg = f"failed to parse glibc version from string {version!r}"
raise InvalidLibc(msg)

return LibcVersion(int(m.group("major")), int(m.group("minor")))
Loading