From a6be239fd849538b3f5bf0d516d13abf2a4b1d9c Mon Sep 17 00:00:00 2001 From: Enrique Saurez Date: Thu, 4 Jun 2026 09:12:12 -0700 Subject: [PATCH] [build] E: Build libxslt.so and libexslt.so MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Produce position-independent libxslt.so and libexslt.so alongside the existing static .a archives, wired as a real DT_NEEDED chain on top of esaurez/libxml2's libxml2.so: libxslt.so -> NEEDED libxml2.so libexslt.so -> NEEDED libxslt.so, NEEDED libxml2.so Only each .so's own .a is embedded via --whole-archive; the lower layers (libxml2, libz) are NOT bundled, so the Nanvix dynamic loader pulls them in transitively at dlopen time. This eliminates the multi-megabyte per-module duplication a self-contained build would cause and exercises the DT_NEEDED chain support shipped in esaurez/nanvix#27 in a real-world setting. Concretely: * `--with-pic`, `-fPIC` in CFLAGS — same .o files reusable for .a and .so. * Keep `--disable-shared` (libtool has no rules for i686-nanvix); the .so files are linked manually with `-shared -fPIC -nostdlib`. * The new SHAREDLIB targets use `-Wl,--whole-archive .a -Wl,--no-whole-archive -lxml2 [-lxslt]`, setting DT_SONAME=libxslt.so / DT_SONAME=libexslt.so. * `make test` extended to verify each .so has the expected SONAME and exports its public API entry point. * `.nanvix/z.py` `_BUILD_OUTPUTS` and `release()` ship both static and shared variants. Sizes (stripped, DT_NEEDED chain vs the discarded self-contained prototype): libxslt.so 296 KB (was 1.8 MB) libexslt.so 92 KB (was 1.9 MB) Runtime dependencies: * esaurez/nanvix#27 — `.init_array` invocation + DT_NEEDED chain walking in the user-space loader. * esaurez/libxml2#1 — the libxml2.so this PR's binaries reference must be present in the buildroot. This implies a sequenced rollout: merge esaurez/libxml2#1 first, cut a new nanvix/libxml2 release, then this PR's CI build can resolve `-lxml2` to libxml2.so. Until then, CI continues to satisfy `-lxml2` against the existing libxml2.a in the release tarball, which produces a libxslt.so without a DT_NEEDED libxml2.so entry. The end-state expects libxml2.so to be present. End-to-end validation (DT_NEEDED chain successfully resolved by the Nanvix loader at dlopen time) is performed downstream in esaurez/lxml#1 and the CPython lxml integration. Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> --- .nanvix/Makefile.nanvix | 66 ++++++++++++++++++++++++++++++++++++++--- .nanvix/z.py | 13 ++++++-- 2 files changed, 73 insertions(+), 6 deletions(-) diff --git a/.nanvix/Makefile.nanvix b/.nanvix/Makefile.nanvix index 03ab2a57..301f1ff9 100644 --- a/.nanvix/Makefile.nanvix +++ b/.nanvix/Makefile.nanvix @@ -6,10 +6,20 @@ # NANVIX_TOOLCHAIN= [target] # # Targets: -# all Build libxslt.a, libexslt.a, and the test ELF +# all Build libxslt.{a,so}, libexslt.{a,so}, and the test ELF # test Verify functional-test prerequisites (build artifacts). # The runtime execution is driven by z.py on the host. # clean Remove build artifacts +# +# Dependencies: +# The shared-library build requires: +# * The Nanvix dynamic loader to honour `.init_array` constructors and +# DT_NEEDED resolution chains (esaurez/nanvix#27). +# * `libxml2.so` to be present in the buildroot, shipped by the +# companion esaurez/libxml2 release that ships libxml2.so. +# libxslt.so is linked with DT_NEEDED libxml2.so, and libexslt.so is +# linked with DT_NEEDED libxslt.so libxml2.so, so the loader pulls +# the lower-level libraries automatically at dlopen time. # =========================================================================== # Global Variables @@ -28,10 +38,12 @@ EXE = .elf STATICLIB_XSLT := libxslt/.libs/libxslt.a STATICLIB_EXSLT := libexslt/.libs/libexslt.a +SHAREDLIB_XSLT := libxslt/.libs/libxslt.so +SHAREDLIB_EXSLT := libexslt/.libs/libexslt.so TEST_SRC := .nanvix/test/test_libxslt.c TEST_ELF := test_libxslt$(EXE) -_NANVIX_DOCKER_BUILD_GOALS := all $(STATICLIB_XSLT) $(STATICLIB_EXSLT) $(TEST_ELF) +_NANVIX_DOCKER_BUILD_GOALS := all $(STATICLIB_XSLT) $(STATICLIB_EXSLT) $(SHAREDLIB_XSLT) $(SHAREDLIB_EXSLT) $(TEST_ELF) _NANVIX_GOALS := $(or $(MAKECMDGOALS),$(.DEFAULT_GOAL)) # Ensure required variables are defined. @@ -66,8 +78,13 @@ endif # Build Targets # =========================================================================== -all: $(STATICLIB_XSLT) $(STATICLIB_EXSLT) $(TEST_ELF) +all: $(STATICLIB_XSLT) $(STATICLIB_EXSLT) $(SHAREDLIB_XSLT) $(SHAREDLIB_EXSLT) $(TEST_ELF) +# Build the static archives with -fPIC so the same objects can be +# linked into position-independent .so files below. autotools' +# libtool shared-library detection does not know about i686-nanvix, +# so we keep --disable-shared and link the .so files ourselves from +# the .a archives. $(STATICLIB_XSLT) $(STATICLIB_EXSLT): sh -c '\ export PATH="$(NANVIX_TOOLCHAIN)/bin:$$PATH" && \ @@ -77,15 +94,46 @@ $(STATICLIB_XSLT) $(STATICLIB_EXSLT): --enable-static --disable-shared --disable-maintainer-mode \ --without-python --without-crypto \ --without-debugger --without-plugins \ + --with-pic \ --with-libxml-prefix="$(BUILDROOT_PATH)" \ --with-libxml-include-prefix="$(BUILDROOT_PATH)/include/libxml2" \ --with-libxml-libs-prefix="$(BUILDROOT_PATH)/lib" \ - CFLAGS="-I$(BUILDROOT_PATH)/include -I$(BUILDROOT_PATH)/include/libxml2" \ + CFLAGS="-I$(BUILDROOT_PATH)/include -I$(BUILDROOT_PATH)/include/libxml2 -fPIC" \ LDFLAGS="-L$(BUILDROOT_PATH)/lib" \ LIBS="-lxml2 -lz" && \ make -C libxslt -j$(NPROC) && \ make -C libexslt -j$(NPROC)' +# libxslt.so: links against libxml2.so (DT_NEEDED libxml2.so), with +# libxslt's own .o files embedded via --whole-archive. libxml2 itself +# is NOT embedded — the loader will dlopen libxml2.so transitively +# when libxslt.so is loaded, so the libxml2 code is shared across +# every consumer. libposix/libc/libm symbols stay unresolved and +# bind at dlopen time against the host executable's `.dynsym`. +$(SHAREDLIB_XSLT): $(STATICLIB_XSLT) + sh -c '\ + export PATH="$(NANVIX_TOOLCHAIN)/bin:$$PATH" && \ + i686-nanvix-gcc -shared -fPIC -nostdlib \ + -Wl,-soname,libxslt.so -Wl,-z,noexecstack \ + -L$(BUILDROOT_PATH)/lib \ + -Wl,--whole-archive $(STATICLIB_XSLT) -Wl,--no-whole-archive \ + -lxml2 \ + -o $(SHAREDLIB_XSLT)' + +# libexslt.so: links against libxslt.so (DT_NEEDED libxslt.so), which +# transitively brings in libxml2.so. Only libexslt's own .o files are +# embedded. libposix/libc/libm symbols stay unresolved and bind at +# dlopen time against the host executable. +$(SHAREDLIB_EXSLT): $(STATICLIB_EXSLT) $(SHAREDLIB_XSLT) + sh -c '\ + export PATH="$(NANVIX_TOOLCHAIN)/bin:$$PATH" && \ + i686-nanvix-gcc -shared -fPIC -nostdlib \ + -Wl,-soname,libexslt.so -Wl,-z,noexecstack \ + -L$(BUILDROOT_PATH)/lib -Llibxslt/.libs \ + -Wl,--whole-archive $(STATICLIB_EXSLT) -Wl,--no-whole-archive \ + -lxslt -lxml2 \ + -o $(SHAREDLIB_EXSLT)' + $(TEST_ELF): $(TEST_SRC) @test -f $(STATICLIB_XSLT) || { echo " FAIL: $(STATICLIB_XSLT) not found; run 'build' first"; exit 1; } @test -f $(STATICLIB_EXSLT) || { echo " FAIL: $(STATICLIB_EXSLT) not found; run 'build' first"; exit 1; } @@ -113,7 +161,17 @@ test: @echo "=== libxslt functional tests ===" @test -f $(STATICLIB_XSLT) || { echo " FAIL: $(STATICLIB_XSLT) not found"; exit 1; } @test -f $(STATICLIB_EXSLT) || { echo " FAIL: $(STATICLIB_EXSLT) not found"; exit 1; } + @test -f $(SHAREDLIB_XSLT) || { echo " FAIL: $(SHAREDLIB_XSLT) not found"; exit 1; } + @test -f $(SHAREDLIB_EXSLT) || { echo " FAIL: $(SHAREDLIB_EXSLT) not found"; exit 1; } @test -s $(TEST_ELF) || { echo " FAIL: $(TEST_ELF) missing or empty"; exit 1; } + @$(NANVIX_TOOLCHAIN)/bin/i686-nanvix-readelf -d $(SHAREDLIB_XSLT) | grep -q 'SONAME.*libxslt.so' \ + || { echo " FAIL: SONAME=libxslt.so not set on $(SHAREDLIB_XSLT)"; exit 1; } + @$(NANVIX_TOOLCHAIN)/bin/i686-nanvix-readelf -d $(SHAREDLIB_EXSLT) | grep -q 'SONAME.*libexslt.so' \ + || { echo " FAIL: SONAME=libexslt.so not set on $(SHAREDLIB_EXSLT)"; exit 1; } + @$(NANVIX_TOOLCHAIN)/bin/i686-nanvix-nm -D $(SHAREDLIB_XSLT) | grep -q 'T xsltParseStylesheetDoc' \ + || { echo " FAIL: xsltParseStylesheetDoc missing from $(SHAREDLIB_XSLT) .dynsym"; exit 1; } + @$(NANVIX_TOOLCHAIN)/bin/i686-nanvix-nm -D $(SHAREDLIB_EXSLT) | grep -q 'T exsltRegisterAll' \ + || { echo " FAIL: exsltRegisterAll missing from $(SHAREDLIB_EXSLT) .dynsym"; exit 1; } @echo " OK: build artifacts present (runtime execution is driven by z.py on the host)" @echo "=== libxslt functional test prerequisites PASSED ===" diff --git a/.nanvix/z.py b/.nanvix/z.py index e6a28120..ca224a38 100644 --- a/.nanvix/z.py +++ b/.nanvix/z.py @@ -34,7 +34,9 @@ _BUILD_OUTPUTS = [ "test_libxslt.elf", "libxslt/.libs/libxslt.a", + "libxslt/.libs/libxslt.so", "libexslt/.libs/libexslt.a", + "libexslt/.libs/libexslt.so", ] IS_WINDOWS = sys.platform == "win32" @@ -306,10 +308,12 @@ def release(self) -> None: xslt_inc.mkdir(parents=True) exslt_inc.mkdir(parents=True) - # Copy static libraries. + # Copy static and shared libraries. for name, src_dir in [ ("libxslt.a", repo / "libxslt" / ".libs"), + ("libxslt.so", repo / "libxslt" / ".libs"), ("libexslt.a", repo / "libexslt" / ".libs"), + ("libexslt.so", repo / "libexslt" / ".libs"), ]: src = src_dir / name if not src.is_file(): @@ -341,7 +345,12 @@ def release(self) -> None: with tarfile.open(str(tarball), "r:gz") as tf: members = tf.getnames() - for expected in ("sysroot/lib/libxslt.a", "sysroot/lib/libexslt.a"): + for expected in ( + "sysroot/lib/libxslt.a", + "sysroot/lib/libxslt.so", + "sysroot/lib/libexslt.a", + "sysroot/lib/libexslt.so", + ): if expected not in members: raise ValueError(f"Package missing {expected}")