From 694577b7f97b7d0ffa67a23c03877f32fe0cc546 Mon Sep 17 00:00:00 2001 From: Igor Pecovnik Date: Sun, 17 May 2026 16:48:37 +0200 Subject: [PATCH 01/12] extensions/nvidia: auto-detect highest nvidia-dkms available per distro MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Drops the hardcoded NVIDIA_DRIVER_VERSION=580 default (with its honest @TODO comment about per-release / Debian-vs-Ubuntu drift). The post_install hook now resolves the package set against the chroot's actual apt index at install time: 1. If NVIDIA_DRIVER_VERSION is set explicitly (env or config), pin to it — operator override always wins. 2. Otherwise, ask apt for the highest `nvidia-dkms-` available in the target distribution/release. Common Ubuntu shape across noble / resolute / etc. — versions vary (535, 550, 560, 580, ...). 3. Fall through to the unversioned Debian metapackage `nvidia-dkms` if no numeric variants exist (bookworm, trixie). 4. None of the above — skip with a warning instead of crashing the build on an opaque 'package not found'. Closes the long-standing @TODO and removes the silent build failures on releases that don't ship nvidia-dkms-580 specifically. --- extensions/nvidia.sh | 51 ++++++++++++++++++++++++++++++++++++++++---- 1 file changed, 47 insertions(+), 4 deletions(-) diff --git a/extensions/nvidia.sh b/extensions/nvidia.sh index 7636f72a89af..14d745a84300 100644 --- a/extensions/nvidia.sh +++ b/extensions/nvidia.sh @@ -12,16 +12,59 @@ function extension_finish_config__build_nvidia_kernel_module() { fi declare -g MODULES_BLACKLIST="nouveau" declare -g INSTALL_HEADERS="yes" - declare -g NVIDIA_DRIVER_VERSION="${NVIDIA_DRIVER_VERSION:-"580"}" # @TODO: this might vary per-release and Debian/Ubuntu - display_alert "Forcing INSTALL_HEADERS=yes; using nVidia driver version ${NVIDIA_DRIVER_VERSION}" "${EXTENSION}" "debug" + # NVIDIA_DRIVER_VERSION is intentionally NOT defaulted here. The + # post_install hook below asks apt (inside the chroot, after apt + # sources are wired up) which nvidia-dkms- is actually + # available for the target distribution/release and picks the + # highest one. Debian-style unversioned `nvidia-dkms` is the + # fall-back when no numbered variants exist (Debian bookworm, + # trixie). Set NVIDIA_DRIVER_VERSION via env or config to pin. + display_alert "Forcing INSTALL_HEADERS=yes" "${EXTENSION}" "debug" } function post_install_kernel_debs__build_nvidia_kernel_module() { [[ "${INSTALL_HEADERS}" != "yes" ]] || [[ "${KERNEL_HAS_WORKING_HEADERS}" != "yes" ]] && return 0 - display_alert "Install nVidia packages, build kernel module in chroot" "${EXTENSION}" "info" + + # Resolve which nvidia-dkms / nvidia-driver package(s) to install. + # Three cases: + # 1. Operator pinned NVIDIA_DRIVER_VERSION (env/config) → trust it. + # 2. Auto-detect: highest `nvidia-dkms-` in the chroot's apt + # index. This is the common Ubuntu shape — 535, 550, 560, + # 580, … depending on release and snapshot. + # 3. Fall through to the unversioned Debian metapackage + # `nvidia-dkms` when no numeric variants exist. + # If none of the three resolve, skip with a warning rather than + # blowing the build up with an opaque "package not found". + local nvidia_dkms_pkg nvidia_driver_pkg + if [[ -n "${NVIDIA_DRIVER_VERSION:-}" ]]; then + nvidia_dkms_pkg="nvidia-dkms-${NVIDIA_DRIVER_VERSION}" + nvidia_driver_pkg="nvidia-driver-${NVIDIA_DRIVER_VERSION}" + display_alert "Using pinned NVIDIA_DRIVER_VERSION" "${NVIDIA_DRIVER_VERSION}" "info" + else + local latest + latest=$(chroot_sdcard "apt-cache pkgnames 'nvidia-dkms-' 2>/dev/null \ + | grep -E '^nvidia-dkms-[0-9]+\$' \ + | sed 's/nvidia-dkms-//' \ + | sort -nr | head -1") + if [[ -n "$latest" ]]; then + NVIDIA_DRIVER_VERSION="$latest" + nvidia_dkms_pkg="nvidia-dkms-${NVIDIA_DRIVER_VERSION}" + nvidia_driver_pkg="nvidia-driver-${NVIDIA_DRIVER_VERSION}" + display_alert "Auto-detected nvidia-dkms for ${DISTRIBUTION}/${RELEASE}" "${NVIDIA_DRIVER_VERSION}" "info" + elif chroot_sdcard "apt-cache pkgnames nvidia-dkms 2>/dev/null | grep -qx nvidia-dkms"; then + nvidia_dkms_pkg="nvidia-dkms" + nvidia_driver_pkg="nvidia-driver" + display_alert "Using unversioned nvidia-dkms metapackage" "${DISTRIBUTION}/${RELEASE}" "info" + else + display_alert "No nvidia-dkms package in ${DISTRIBUTION}/${RELEASE} apt sources" "skipping nVidia install" "warn" + return 0 + fi + fi + + display_alert "Install nVidia packages, build kernel module in chroot" "${EXTENSION} (${nvidia_dkms_pkg})" "info" # chroot_sdcard_apt_get_install() is in lib/logging/runners.sh which handles "running" of stuff nicely. # chroot_sdcard_apt_get_install() -> chroot_sdcard_apt_get() -> chroot_sdcard() -> run_host_command_logged_raw() # it handles bash-specific quoting issues, apt proxies, logging, and errors. declare -ag if_error_find_files_sdcard=("/var/lib/dkms/nvidia/*/build/make.log") - chroot_sdcard_apt_get_install "nvidia-dkms-${NVIDIA_DRIVER_VERSION}" "nvidia-driver-${NVIDIA_DRIVER_VERSION}" + chroot_sdcard_apt_get_install "${nvidia_dkms_pkg}" "${nvidia_driver_pkg}" } From 7a7ee6a2d09bc226ed11055c9d8199c0d4b36fe5 Mon Sep 17 00:00:00 2001 From: Igor Pecovnik Date: Sun, 17 May 2026 17:16:27 +0200 Subject: [PATCH 02/12] nvidia: runtime auto-disable on hosts without NVIDIA hardware MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The previous solution lived in packages/bsp/common/usr/lib/armbian/armbian-firstrun as a single line: [[ -n "$(dmesg | grep "No NVIDIA GPU found")" ]] && \ sudo apt-get -y -qq purge nvidia-dkms-510 nvidia-driver-510 \ nvidia-settings nvidia-common \ >> /dev/null Two reasons it was unreliable: 1. dmesg-grep for "No NVIDIA GPU found" only sees the line if the driver actually bound far enough to print it. On many boots the line never appears (driver couldn't load at all) or has already rotated out of the kernel ring buffer by the time firstrun runs. 2. Hardcoded nvidia-dkms-510 / nvidia-driver-510 — wrong on every distro/release that ships a different driver branch, and especially wrong now that the install path auto-picks the highest available version. Replace it with a build-time-installed detector + systemd one-shot under extensions/nvidia.sh: - /usr/lib/armbian/armbian-nvidia-autodetect probes the PCI bus directly (lspci -nn, vendor 10de). Works regardless of whether any driver module loaded. If no NVIDIA hardware found: a. drops /etc/modprobe.d/armbian-nvidia-disabled.conf (blacklist nvidia / nvidia_drm / nvidia_modeset / nvidia_uvm) so the driver doesn't try to load on the next boot. b. dpkg-query's the actually-installed nvidia-dkms-* / nvidia-driver-* / nvidia-settings / nvidia-common packages (no hardcoded version!) and apt-purges them. DKMS stops rebuilding the module on every kernel update. - /etc/systemd/system/armbian-nvidia-autodetect.service Type=oneshot, runs Before=display-manager.service / graphical.target. WantedBy=multi-user.target — fires every boot. Cheap (early exit when NVIDIA present), idempotent (no-op on a system where the packages are already purged), and handles hot-pluggable scenarios (eGPU added later → reverse direction handled by removing the modprobe.d file manually). Removes the dmesg-grep line from armbian-firstrun and leaves a breadcrumb pointing at the new location. --- extensions/nvidia.sh | 120 ++++++++++++++++++ .../common/usr/lib/armbian/armbian-firstrun | 13 +- 2 files changed, 132 insertions(+), 1 deletion(-) diff --git a/extensions/nvidia.sh b/extensions/nvidia.sh index 14d745a84300..b9a6843182f7 100644 --- a/extensions/nvidia.sh +++ b/extensions/nvidia.sh @@ -67,4 +67,124 @@ function post_install_kernel_debs__build_nvidia_kernel_module() { # it handles bash-specific quoting issues, apt proxies, logging, and errors. declare -ag if_error_find_files_sdcard=("/var/lib/dkms/nvidia/*/build/make.log") chroot_sdcard_apt_get_install "${nvidia_dkms_pkg}" "${nvidia_driver_pkg}" + + # Install the runtime hardware-detection helper. On hosts that + # happen to have NVIDIA hardware this is a no-op; on hosts that + # don't, it blacklists the modules and purges the packages so + # DKMS doesn't rebuild them on every kernel update. + install_armbian_nvidia_autodetect_helper +} + +# ----------------------------------------------------------------------------- +# Runtime auto-disable of the driver on hosts without NVIDIA hardware. +# +# Replaces a dmesg-grep one-liner that used to live in +# packages/bsp/common/usr/lib/armbian/armbian-firstrun. The old approach was +# unreliable for two reasons: +# 1. It looked for "No NVIDIA GPU found" in dmesg — that line is only +# printed if the driver actually attempted to bind and failed, and is +# already rotated out of the ring buffer on many boots. +# 2. It purged a hardcoded version (nvidia-dkms-510) — wrong on every +# distro/release that ships a different driver branch, and especially +# wrong now that the install path auto-picks the highest available. +# +# This installs a small detector + systemd one-shot that: +# - probes the PCI bus directly (lspci, vendor 0x10de) — works regardless +# of whether the driver loaded, +# - blacklists nvidia / nvidia_drm / nvidia_modeset / nvidia_uvm via +# /etc/modprobe.d so they don't load on the next boot, +# - dpkg-query's the actually-installed nvidia-dkms-* / nvidia-driver-* / +# nvidia-settings / nvidia-common packages (no hardcoded version) and +# apt-purges them. +# ----------------------------------------------------------------------------- +function install_armbian_nvidia_autodetect_helper() { + display_alert "Installing runtime NVIDIA hardware detector" "${EXTENSION}" "info" + + mkdir -p "${SDCARD}/usr/lib/armbian" "${SDCARD}/etc/systemd/system" + + cat <<- 'AUTODETECT_SH' > "${SDCARD}/usr/lib/armbian/armbian-nvidia-autodetect" + #!/bin/sh + # armbian-nvidia-autodetect — installed by build/extensions/nvidia.sh. + # + # On hosts WITH an NVIDIA GPU (PCI vendor 10de): no-op. + # On hosts WITHOUT one: blacklist the modules and purge the nvidia + # packages so DKMS doesn't keep rebuilding the kernel module on + # every kernel update. + # + # Detection is via lspci (queries the PCI bus directly). Earlier + # attempts used `dmesg | grep "No NVIDIA GPU found"` which only + # fires if the driver bound far enough to print that line, and + # falls off the ring buffer. + + set -e + + # Need lspci. It's part of pciutils — present on every desktop + # image, but be defensive on hand-built minimal flavours. + if ! command -v lspci > /dev/null 2>&1; then + exit 0 + fi + + # NVIDIA PCI vendor ID is 0x10de. Match the literal "[10de:" in + # `lspci -nn` output so non-VGA NVIDIA devices (Tegra USB-C, + # audio over HDMI, etc.) also count. + if lspci -nn 2>/dev/null | grep -qiE '\[10de:'; then + exit 0 + fi + + # No NVIDIA hardware. Belt and suspenders: + # 1. modprobe.d blacklist — takes effect on the next boot and + # is idempotent if we get killed mid-purge. + # 2. apt purge — removes the package set so DKMS doesn't burn + # cycles rebuilding modules that will never load. + cat > /etc/modprobe.d/armbian-nvidia-disabled.conf <<-EOF + # Installed by armbian-nvidia-autodetect: no NVIDIA GPU on this host. + # Delete this file to re-enable the driver. + blacklist nvidia + blacklist nvidia_drm + blacklist nvidia_modeset + blacklist nvidia_uvm + EOF + + # dpkg-query the package set actually installed (no hardcoded + # version — varies per distro / extension config). Returns + # empty on a second run, which makes the purge a no-op. + NVIDIA_PKGS=$(dpkg-query -W -f='${binary:Package}\n' \ + 'nvidia-dkms-*' 'nvidia-driver-*' \ + 'nvidia-settings' 'nvidia-common' 2>/dev/null | tr '\n' ' ') + if [ -n "$NVIDIA_PKGS" ]; then + DEBIAN_FRONTEND=noninteractive apt-get -y -qq purge $NVIDIA_PKGS >/dev/null 2>&1 || true + DEBIAN_FRONTEND=noninteractive apt-get -y -qq autoremove --purge >/dev/null 2>&1 || true + fi + AUTODETECT_SH + chmod 0755 "${SDCARD}/usr/lib/armbian/armbian-nvidia-autodetect" + + cat <<- 'AUTODETECT_SERVICE' > "${SDCARD}/etc/systemd/system/armbian-nvidia-autodetect.service" + [Unit] + Description=Detect NVIDIA hardware; disable driver if absent + Documentation=https://github.com/armbian/build/blob/main/extensions/nvidia.sh + # Run BEFORE anything that might try to use the GPU (display + # manager, console framebuffer init). After local fs so the + # script's writes and dpkg state are available. + After=local-fs.target + Before=display-manager.service graphical.target + + [Service] + Type=oneshot + ExecStart=/usr/lib/armbian/armbian-nvidia-autodetect + # Stay activated so the unit shows green in `systemctl status` + # after a successful run — without this the unit would always + # read as inactive (dead). + RemainAfterExit=yes + + [Install] + WantedBy=multi-user.target + AUTODETECT_SERVICE + + # Enable the unit so it fires at every boot. Cheap when NVIDIA is + # present (early exit on the lspci check) and idempotent when not + # (apt-purge is a no-op on a system where the packages are already + # gone). Running every boot means hot-pluggable scenarios (eGPU, + # Thunderbolt) get re-evaluated. + chroot_sdcard "systemctl enable armbian-nvidia-autodetect.service" || \ + display_alert "Could not enable armbian-nvidia-autodetect.service in chroot" "${EXTENSION}" "warn" } diff --git a/packages/bsp/common/usr/lib/armbian/armbian-firstrun b/packages/bsp/common/usr/lib/armbian/armbian-firstrun index e50131dfad46..62eb4587eb76 100755 --- a/packages/bsp/common/usr/lib/armbian/armbian-firstrun +++ b/packages/bsp/common/usr/lib/armbian/armbian-firstrun @@ -97,7 +97,18 @@ case "$1" in ;; x86|arm64) - [[ -n "$(dmesg | grep "No NVIDIA GPU found")" ]] && sudo apt-get -y -qq purge nvidia-dkms-510 nvidia-driver-510 nvidia-settings nvidia-common >> /dev/null + # NVIDIA hardware detection moved out of firstrun. The + # previous one-liner here greped dmesg for "No NVIDIA GPU + # found" and purged a hardcoded nvidia-dkms-510, both of + # which were unreliable: the dmesg line is only emitted + # if the driver bound far enough to print it (and the + # line falls off the ring buffer on busy boots), and the + # version pin is wrong for every release that doesn't + # happen to ship 510. The autodetect now lives in + # /usr/lib/armbian/armbian-nvidia-autodetect (installed + # by build/extensions/nvidia.sh), runs via systemd, and + # uses lspci + dpkg-query for proper hardware/package + # detection. ;; *) From f81623088f4d86533fe9d1e7d9d57760861ca29a Mon Sep 17 00:00:00 2001 From: Igor Pecovnik Date: Sun, 17 May 2026 17:20:07 +0200 Subject: [PATCH 03/12] extensions/nvidia: pull pciutils so lspci is guaranteed at runtime MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The runtime armbian-nvidia-autodetect helper in this extension calls lspci to probe the PCI bus for an NVIDIA card. lspci lives in pciutils, which isn't in the Debian/Ubuntu base install and isn't guaranteed to be pulled by every desktop metapackage transitively. The helper defensively no-ops when lspci is missing — which would leave images without auto-disable on no-GPU hosts (the exact thing this PR is meant to fix). Append pciutils to PACKAGE_LIST_ADDITIONAL in extension_finish_config so it lands in the rootfs alongside the other build-time prerequisites. --- extensions/nvidia.sh | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/extensions/nvidia.sh b/extensions/nvidia.sh index b9a6843182f7..6a83b81159e4 100644 --- a/extensions/nvidia.sh +++ b/extensions/nvidia.sh @@ -12,6 +12,12 @@ function extension_finish_config__build_nvidia_kernel_module() { fi declare -g MODULES_BLACKLIST="nouveau" declare -g INSTALL_HEADERS="yes" + # pciutils ships /usr/bin/lspci. Required by the runtime + # armbian-nvidia-autodetect helper that this extension installs in + # post_install_kernel_debs — without it the helper short-circuits + # (defensively) and the auto-disable on no-GPU hosts never fires. + # Not always pulled transitively on minimal-base + GNOME flavours. + declare -g PACKAGE_LIST_ADDITIONAL+=" pciutils" # NVIDIA_DRIVER_VERSION is intentionally NOT defaulted here. The # post_install hook below asks apt (inside the chroot, after apt # sources are wired up) which nvidia-dkms- is actually From 2f6bdf1677c644082c28c98283bdce71fdde327d Mon Sep 17 00:00:00 2001 From: Igor Pecovnik Date: Sun, 17 May 2026 17:52:06 +0200 Subject: [PATCH 04/12] extensions/nvidia: also purge unversioned nvidia-dkms / nvidia-driver MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The runtime autodetect's dpkg-query argument list used only the globs 'nvidia-dkms-*' and 'nvidia-driver-*' for the numbered Ubuntu shape. The trailing dash makes those globs miss the bare 'nvidia-dkms' / 'nvidia-driver' metapackages — which the install branch deliberately falls through to on Debian (case 3 of the resolver added in this PR). Add the exact names alongside the globs so the purge covers both shapes. Without this fix a Debian image installed with the extension on a host that turns out to have no NVIDIA GPU would correctly drop the modprobe blacklist but leave the package set behind, defeating the DKMS-rebuild-avoidance half of the autodisable design. --- extensions/nvidia.sh | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/extensions/nvidia.sh b/extensions/nvidia.sh index 6a83b81159e4..6f4fe94d0fc3 100644 --- a/extensions/nvidia.sh +++ b/extensions/nvidia.sh @@ -154,8 +154,14 @@ function install_armbian_nvidia_autodetect_helper() { # dpkg-query the package set actually installed (no hardcoded # version — varies per distro / extension config). Returns # empty on a second run, which makes the purge a no-op. + # Glob 'nvidia-dkms-*' requires the trailing dash so it won't + # match the bare 'nvidia-dkms' / 'nvidia-driver' metapackages + # Debian ships (and which the install branch above can pick + # under case-3). List those exact names alongside the globs + # so the purge covers both shapes. NVIDIA_PKGS=$(dpkg-query -W -f='${binary:Package}\n' \ 'nvidia-dkms-*' 'nvidia-driver-*' \ + 'nvidia-dkms' 'nvidia-driver' \ 'nvidia-settings' 'nvidia-common' 2>/dev/null | tr '\n' ' ') if [ -n "$NVIDIA_PKGS" ]; then DEBIAN_FRONTEND=noninteractive apt-get -y -qq purge $NVIDIA_PKGS >/dev/null 2>&1 || true From cb85d16171dfe842b23b928be4ebded6c3c7c860 Mon Sep 17 00:00:00 2001 From: Igor Pecovnik Date: Sun, 17 May 2026 18:03:27 +0200 Subject: [PATCH 05/12] extensions/nvidia: || true on the apt-cache pipeline so case-3 is reachable MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit chroot_sdcard wraps its argument with `bash -e -o pipefail -c …`, so the pipeline `apt-cache pkgnames … | grep … | sed … | sort | head -1` returns 1 when grep finds no numbered nvidia-dkms- entries — the exact Debian shape that the install resolver's case-3 fall-through was designed to handle. Under the build framework's outer `set -e` (compile.sh) the substitution `latest=$(chroot_sdcard …)` then aborts the build at that assignment, which means case-3 (unversioned `nvidia-dkms` metapackage) was unreachable in practice. Append `|| true` to the inner pipeline so the substitution always succeeds with `$latest` empty on no-match, and the `if/elif` chain below can pick case-2 (number found), case-3 (Debian fallback) or case-4 (skip with warn) on real data. Reproduced and verified locally — without `|| true` the assignment aborts; with it, latest='' and the fallback executes. --- extensions/nvidia.sh | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/extensions/nvidia.sh b/extensions/nvidia.sh index 6f4fe94d0fc3..6f18ab3a7205 100644 --- a/extensions/nvidia.sh +++ b/extensions/nvidia.sh @@ -48,10 +48,17 @@ function post_install_kernel_debs__build_nvidia_kernel_module() { display_alert "Using pinned NVIDIA_DRIVER_VERSION" "${NVIDIA_DRIVER_VERSION}" "info" else local latest + # chroot_sdcard wraps the inner command with `bash -e -o + # pipefail -c …`, so this pipeline returns 1 when grep finds + # no numbered nvidia-dkms-N packages (Debian / fall-through + # case). Under the framework's outer set -e the substitution + # would abort the build before we get to test $latest, making + # case-3 below unreachable. `|| true` keeps the substitution + # successful with $latest empty so the fall-through fires. latest=$(chroot_sdcard "apt-cache pkgnames 'nvidia-dkms-' 2>/dev/null \ | grep -E '^nvidia-dkms-[0-9]+\$' \ | sed 's/nvidia-dkms-//' \ - | sort -nr | head -1") + | sort -nr | head -1 || true") if [[ -n "$latest" ]]; then NVIDIA_DRIVER_VERSION="$latest" nvidia_dkms_pkg="nvidia-dkms-${NVIDIA_DRIVER_VERSION}" From 8291c0d70431b8daf56c83cceaa47dbead30df1e Mon Sep 17 00:00:00 2001 From: Igor Pecovnik Date: Sun, 17 May 2026 18:15:33 +0200 Subject: [PATCH 06/12] extensions/nvidia: drop PACKAGE_LIST_ADDITIONAL+=pciutils (readonly + redundant) Two reasons: 1. PACKAGE_LIST_ADDITIONAL is sealed `readonly` by the time extension_finish_config__* hooks run, so the `declare -g PACKAGE_LIST_ADDITIONAL+=" pciutils"` line aborted the build with: /armbian/extensions/nvidia.sh: line 20: declare: PACKAGE_LIST_ADDITIONAL: readonly variable 2. It was redundant anyway. `pciutils` is already listed in config/cli/common/main/packages.additional, which ships in every non-minimal CLI image. This extension early-returns on BUILD_MINIMAL=yes, so we never reach a context that wouldn't have pciutils already present. Replace the now-broken line with a comment pointing at the canonical source so a future maintainer doesn't try to add it again. --- extensions/nvidia.sh | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/extensions/nvidia.sh b/extensions/nvidia.sh index 6f18ab3a7205..44e375406198 100644 --- a/extensions/nvidia.sh +++ b/extensions/nvidia.sh @@ -12,12 +12,12 @@ function extension_finish_config__build_nvidia_kernel_module() { fi declare -g MODULES_BLACKLIST="nouveau" declare -g INSTALL_HEADERS="yes" - # pciutils ships /usr/bin/lspci. Required by the runtime - # armbian-nvidia-autodetect helper that this extension installs in - # post_install_kernel_debs — without it the helper short-circuits - # (defensively) and the auto-disable on no-GPU hosts never fires. - # Not always pulled transitively on minimal-base + GNOME flavours. - declare -g PACKAGE_LIST_ADDITIONAL+=" pciutils" + # pciutils (provides /usr/bin/lspci used by the runtime + # armbian-nvidia-autodetect helper) is already in + # config/cli/common/main/packages.additional and ships in every + # non-minimal image. This extension early-returns on + # BUILD_MINIMAL=yes, so we never reach this point without it. + # No explicit install required here. # NVIDIA_DRIVER_VERSION is intentionally NOT defaulted here. The # post_install hook below asks apt (inside the chroot, after apt # sources are wired up) which nvidia-dkms- is actually From e4105ae6460b939641c1d25f638e3046d2dc91d6 Mon Sep 17 00:00:00 2001 From: Igor Pecovnik Date: Sun, 17 May 2026 19:36:05 +0200 Subject: [PATCH 07/12] extensions/nvidia: move autodetect install to post_family_tweaks hook MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit post_install_kernel_debs runs BEFORE armbian-bsp-cli is installed in the chroot (per its own docstring at the call site). Anything we wrote into /usr/lib/armbian/ or /etc/systemd/system/ there was getting clobbered by the BSP install or swept by later rootfs steps — operator reported the autodetect script + service simply weren't in the resulting rootfs even though the firstrun edit shipped (because firstrun ships through the BSP package which is dpkg-tracked). Split the responsibilities: - post_install_kernel_debs__build_nvidia_kernel_module keeps doing the apt-get install of nvidia-dkms-* / nvidia-driver-* (works fine before BSP — dependencies resolve, dkms builds). - post_family_tweaks__build_nvidia_kernel_module_autodetect (NEW) calls install_armbian_nvidia_autodetect_helper. post_family_tweaks fires AFTER `install_artifact_deb_chroot "armbian-bsp-cli"` so /usr/lib/armbian/ already exists with BSP-owned content and our untracked drop sits beside it without being overwritten. The autodetect remains extension-gated (only on images built with the nvidia extension enabled), not BSP-common — per operator preference, to avoid every SBC's bsp-cli carrying nvidia-related plumbing it has no use for. --- extensions/nvidia.sh | 14 ++++++++++---- 1 file changed, 10 insertions(+), 4 deletions(-) diff --git a/extensions/nvidia.sh b/extensions/nvidia.sh index 44e375406198..ca334ed308ff 100644 --- a/extensions/nvidia.sh +++ b/extensions/nvidia.sh @@ -80,11 +80,17 @@ function post_install_kernel_debs__build_nvidia_kernel_module() { # it handles bash-specific quoting issues, apt proxies, logging, and errors. declare -ag if_error_find_files_sdcard=("/var/lib/dkms/nvidia/*/build/make.log") chroot_sdcard_apt_get_install "${nvidia_dkms_pkg}" "${nvidia_driver_pkg}" +} - # Install the runtime hardware-detection helper. On hosts that - # happen to have NVIDIA hardware this is a no-op; on hosts that - # don't, it blacklists the modules and purges the packages so - # DKMS doesn't rebuild them on every kernel update. +# Hook docs (lib/functions/rootfs/distro-agnostic.sh): post_install_kernel_debs +# explicitly fires BEFORE the BSP is installed. Anything we write under +# /usr/lib/armbian/ or /etc/systemd/system/ there gets clobbered by the +# BSP install or by later rootfs sweeps. post_family_tweaks fires AFTER +# `install_artifact_deb_chroot "armbian-bsp-cli"` (around line 454), so +# this is the right hook for writing extension-owned auxiliary files +# into the chroot's final filesystem. +function post_family_tweaks__build_nvidia_kernel_module_autodetect() { + [[ "${INSTALL_HEADERS}" != "yes" ]] || [[ "${KERNEL_HAS_WORKING_HEADERS}" != "yes" ]] && return 0 install_armbian_nvidia_autodetect_helper } From 43f505704d28a369028380e13f96db3e2b44af93 Mon Sep 17 00:00:00 2001 From: Igor Pecovnik Date: Sun, 17 May 2026 23:34:52 +0200 Subject: [PATCH 08/12] extensions/nvidia: dump apt state when nvidia-dkms detection fails The case-3 fallback ("No nvidia-dkms package in ... apt sources") hits on noble even though nvidia-dkms-* lives in restricted/, where the rootfs is supposed to include the restricted component. Without seeing the chroot's apt state at the moment of failure, there's no way to tell whether: - restricted is missing from sources.list.d at all, - it's listed but the indices were never fetched, - or apt-cache pkgnames is filtering everything else for some arch / component reason. Before the existing "skipping nVidia install" warn, dump: - `apt-cache pkgnames | grep -c ^nvidia` from inside the chroot - listing of /etc/apt/sources.list.d/ - sources files that mention "restricted" - apt/lists entries containing restricted/multiverse (proves whether indices were refreshed) All purely diagnostic; no behaviour change on the happy path. --- extensions/nvidia.sh | 13 ++++++++++++- 1 file changed, 12 insertions(+), 1 deletion(-) diff --git a/extensions/nvidia.sh b/extensions/nvidia.sh index ca334ed308ff..766e95fc9332 100644 --- a/extensions/nvidia.sh +++ b/extensions/nvidia.sh @@ -47,7 +47,7 @@ function post_install_kernel_debs__build_nvidia_kernel_module() { nvidia_driver_pkg="nvidia-driver-${NVIDIA_DRIVER_VERSION}" display_alert "Using pinned NVIDIA_DRIVER_VERSION" "${NVIDIA_DRIVER_VERSION}" "info" else - local latest + local latest pkgnames_raw sources_files sources_has_restricted apt_lists_sample # chroot_sdcard wraps the inner command with `bash -e -o # pipefail -c …`, so this pipeline returns 1 when grep finds # no numbered nvidia-dkms-N packages (Debian / fall-through @@ -69,6 +69,17 @@ function post_install_kernel_debs__build_nvidia_kernel_module() { nvidia_driver_pkg="nvidia-driver" display_alert "Using unversioned nvidia-dkms metapackage" "${DISTRIBUTION}/${RELEASE}" "info" else + # Detection failed. Dump enough state to diagnose without + # needing to re-enter the chroot manually. + pkgnames_raw=$(chroot_sdcard "apt-cache pkgnames 2>/dev/null | grep -c '^nvidia' || true") + sources_files=$(chroot_sdcard "ls /etc/apt/sources.list.d/ 2>/dev/null | tr '\n' ' '") + sources_has_restricted=$(chroot_sdcard "grep -lE '(^|\s)restricted(\s|\$)' /etc/apt/sources.list /etc/apt/sources.list.d/* 2>/dev/null | tr '\n' ' '") + apt_lists_sample=$(chroot_sdcard "ls /var/lib/apt/lists/ 2>/dev/null | grep -E 'restricted|multiverse' | head -5 | tr '\n' ' '") + display_alert "nvidia-dkms detection failed" "${DISTRIBUTION}/${RELEASE}" "warn" + display_alert " apt-cache pkgnames | grep ^nvidia count" "${pkgnames_raw}" "warn" + display_alert " sources.list.d entries" "${sources_files:-}" "warn" + display_alert " files mentioning 'restricted'" "${sources_has_restricted:-}" "warn" + display_alert " apt/lists entries containing restricted/multiverse" "${apt_lists_sample:-}" "warn" display_alert "No nvidia-dkms package in ${DISTRIBUTION}/${RELEASE} apt sources" "skipping nVidia install" "warn" return 0 fi From 0eccba1323bcbdc9b65cbfee356a92c98d36c2cc Mon Sep 17 00:00:00 2001 From: Igor Pecovnik Date: Sun, 17 May 2026 23:37:27 +0200 Subject: [PATCH 09/12] extensions/nvidia: || true on every debug-dump chroot pipeline chroot_sdcard wraps the inner command with `bash -e -o pipefail -c`. The debug-dump pipelines added in the previous commit had grep/ls calls that legitimately return rc=1 when nothing matches; pipefail propagates that as the pipeline's exit, the outer set -e aborts the build mid-function, and bash emits a confusing pop_var_context: head of shell_variables not a function context instead of the actual diagnostic. Tail every chroot_sdcard "..." with `|| true` so empty matches stay rc=0 and the diagnostic lines actually print. Also simplify the "restricted" probe from `grep -lE '(^|\s)restricted(\s|$)'` to `grep -lF restricted` - the regex form was both fragile under nested double-quote escaping and overkill for what we need (presence of the literal word in a sources file). --- extensions/nvidia.sh | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) diff --git a/extensions/nvidia.sh b/extensions/nvidia.sh index 766e95fc9332..db54011842e7 100644 --- a/extensions/nvidia.sh +++ b/extensions/nvidia.sh @@ -71,10 +71,16 @@ function post_install_kernel_debs__build_nvidia_kernel_module() { else # Detection failed. Dump enough state to diagnose without # needing to re-enter the chroot manually. + # Every chroot_sdcard pipeline below ends in `|| true`. The + # inner bash runs with -e -o pipefail; grep / find return 1 + # when nothing matches, which pipefail propagates and would + # abort the outer build in a way that triggers bash's + # pop_var_context warning instead of just reporting the + # diagnostic. pkgnames_raw=$(chroot_sdcard "apt-cache pkgnames 2>/dev/null | grep -c '^nvidia' || true") - sources_files=$(chroot_sdcard "ls /etc/apt/sources.list.d/ 2>/dev/null | tr '\n' ' '") - sources_has_restricted=$(chroot_sdcard "grep -lE '(^|\s)restricted(\s|\$)' /etc/apt/sources.list /etc/apt/sources.list.d/* 2>/dev/null | tr '\n' ' '") - apt_lists_sample=$(chroot_sdcard "ls /var/lib/apt/lists/ 2>/dev/null | grep -E 'restricted|multiverse' | head -5 | tr '\n' ' '") + sources_files=$(chroot_sdcard "ls /etc/apt/sources.list.d/ 2>/dev/null | tr '\n' ' ' || true") + sources_has_restricted=$(chroot_sdcard "grep -lF restricted /etc/apt/sources.list /etc/apt/sources.list.d/* 2>/dev/null | tr '\n' ' ' || true") + apt_lists_sample=$(chroot_sdcard "ls /var/lib/apt/lists/ 2>/dev/null | grep -E 'restricted|multiverse' | head -5 | tr '\n' ' ' || true") display_alert "nvidia-dkms detection failed" "${DISTRIBUTION}/${RELEASE}" "warn" display_alert " apt-cache pkgnames | grep ^nvidia count" "${pkgnames_raw}" "warn" display_alert " sources.list.d entries" "${sources_files:-}" "warn" From f0a1d2c862b7fb9f234c4c7066ce9871a1618103 Mon Sep 17 00:00:00 2001 From: Igor Pecovnik Date: Sun, 17 May 2026 23:53:38 +0200 Subject: [PATCH 10/12] extensions/nvidia: refresh chroot apt indices before pkgnames lookup apt-cache pkgnames reads from /var/cache/apt/pkgcache.bin, which is built from /var/lib/apt/lists/. If the rootfs was cached before `restricted` was added to ubuntu.sources, or if the framework hasn't run `apt-get update` since the final sources.list was finalized, the indices for the restricted component are simply absent - and pkgnames returns nothing for nvidia-dkms-*, even though sources.list lists the component. Verified locally that on a stale chroot the pkgnames pipeline returns empty; after `apt-get update`, it returns the full nvidia-dkms-N set. `apt-get update -qq || true`: quiet on success, doesn't abort the build if the proxy hiccups or one of the suite indices fails (apt returns non-zero on partial failures). --- extensions/nvidia.sh | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/extensions/nvidia.sh b/extensions/nvidia.sh index db54011842e7..fbf247dc1150 100644 --- a/extensions/nvidia.sh +++ b/extensions/nvidia.sh @@ -48,6 +48,16 @@ function post_install_kernel_debs__build_nvidia_kernel_module() { display_alert "Using pinned NVIDIA_DRIVER_VERSION" "${NVIDIA_DRIVER_VERSION}" "info" else local latest pkgnames_raw sources_files sources_has_restricted apt_lists_sample + # Refresh the chroot's apt indices first. The rootfs cache may + # have been built before `restricted` was added to sources, or + # the framework may not have run `apt-get update` since the + # final sources.list was written. Without fresh indices, + # `apt-cache pkgnames` returns nothing for restricted-only + # packages (nvidia-dkms-*) even when sources.list lists the + # component. Idempotent and quick if indices are already current. + display_alert "Refreshing apt indices in chroot" "${EXTENSION}" "debug" + chroot_sdcard "apt-get update -qq" || true + # chroot_sdcard wraps the inner command with `bash -e -o # pipefail -c …`, so this pipeline returns 1 when grep finds # no numbered nvidia-dkms-N packages (Debian / fall-through From dd8fd56b5e1888fe152df610bb9886189e4e25c8 Mon Sep 17 00:00:00 2001 From: Igor Pecovnik Date: Mon, 18 May 2026 06:36:46 +0200 Subject: [PATCH 11/12] extensions/nvidia: clear modprobe blacklist when GPU detected If a previous boot ran on the same rootfs without NVIDIA hardware, the autodetect helper wrote /etc/modprobe.d/armbian-nvidia-disabled.conf to keep the kernel modules from auto-loading. When the same rootfs later boots with NVIDIA hardware (card added, SSD swapped into a GPU-equipped host), the early `exit 0` left that file in place, so the modules still wouldn't load even though they're present and the GPU is wired up. The detector was effectively one-way. When lspci finds [10de:], clear the blacklist file if it exists (rm -f is idempotent for the common case where it never existed). Log via systemd-cat so the action shows up in `journalctl -u armbian-nvidia-autodetect` for triage. Deliberately /not/ auto-reinstalling NVIDIA packages on the recovery path - proprietary driver auto-install without operator consent, and without guaranteed network/apt-sources, is out of scope for a boot-time detector. If packages were previously purged the operator runs apt install manually; the freshly-cleared blacklist file makes that work the next boot. --- extensions/nvidia.sh | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/extensions/nvidia.sh b/extensions/nvidia.sh index fbf247dc1150..a94481888f93 100644 --- a/extensions/nvidia.sh +++ b/extensions/nvidia.sh @@ -174,6 +174,20 @@ function install_armbian_nvidia_autodetect_helper() { # `lspci -nn` output so non-VGA NVIDIA devices (Tegra USB-C, # audio over HDMI, etc.) also count. if lspci -nn 2>/dev/null | grep -qiE '\[10de:'; then + # Hardware is present. If a previous boot ran on a host + # without NVIDIA, the modprobe blacklist file is still on + # disk and would keep the driver from loading even now. + # Remove it so the modules can bind on the next boot. rm -f + # is idempotent on the common case where the file never + # existed. Package reinstall is intentionally NOT attempted + # here - apt-installing proprietary NVIDIA drivers without + # user consent and without guaranteed network/apt-sources + # is out of scope for a boot-time detector. If packages were + # previously purged, the operator runs apt install manually. + if [ -f /etc/modprobe.d/armbian-nvidia-disabled.conf ]; then + rm -f /etc/modprobe.d/armbian-nvidia-disabled.conf + echo "armbian-nvidia-autodetect: NVIDIA hardware detected; cleared modprobe blacklist" | systemd-cat -t armbian-nvidia-autodetect 2>/dev/null || true + fi exit 0 fi From 74e7146d43c3bd48c8c9546900fd57e695d7afb2 Mon Sep 17 00:00:00 2001 From: Igor Pecovnik Date: Mon, 18 May 2026 06:52:45 +0200 Subject: [PATCH 12/12] extensions/nvidia: pre-ship modprobe blacklist; remove + modprobe at boot when GPU is found MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Previous flow on a host without an NVIDIA GPU: - First boot: kernel modules auto-load from initrd udev → probe fails → noisy dmesg ("nvidia: probe failed", DKMS rebuild artefacts in journal, etc.) - armbian-nvidia-autodetect runs in userspace → writes blacklist + purges packages - Second boot: clean Inverting the default so the first boot is also clean: 1. Build-time write of /etc/modprobe.d/armbian-nvidia-disabled.conf BEFORE the apt install. nvidia-dkms postinst triggers update-initramfs which now bakes the blacklist into initramfs, so initrd udev doesn't try to load nvidia* at all. 2. Boot-time autodetect: - lspci finds [10de:] → rm -f blacklist file + modprobe nvidia_drm modeset=1 (pulls nvidia + nvidia_modeset via deps; Wayland-friendly KMS). Display-manager (we're Before= it) sees the driver loaded. - No [10de:] → keep blacklist + purge packages (unchanged). Self-healing for hosts that gain a GPU later: rootfs blacklist file deleted on first NVIDIA-detected boot; next kernel upgrade regenerates initramfs from the (now blacklist-free) rootfs, so subsequent boots are clean directly from initrd. Until that kernel upgrade, the runtime modprobe covers the gap each boot. || true on modprobe handles the edge case where packages were previously purged on a no-GPU run, then the operator swapped in a GPU but hasn't re-apt-installed. Operator runs apt install manually in that case; the cleared blacklist makes it work on next boot. --- extensions/nvidia.sh | 42 ++++++++++++++++++++++++++++++++---------- 1 file changed, 32 insertions(+), 10 deletions(-) diff --git a/extensions/nvidia.sh b/extensions/nvidia.sh index a94481888f93..bfef919f1d13 100644 --- a/extensions/nvidia.sh +++ b/extensions/nvidia.sh @@ -31,6 +31,23 @@ function extension_finish_config__build_nvidia_kernel_module() { function post_install_kernel_debs__build_nvidia_kernel_module() { [[ "${INSTALL_HEADERS}" != "yes" ]] || [[ "${KERNEL_HAS_WORKING_HEADERS}" != "yes" ]] && return 0 + # Pre-ship the modprobe blacklist BEFORE installing nvidia packages. + # nvidia-dkms postinst triggers update-initramfs; with the file already + # in /etc/modprobe.d/, the regenerated initramfs has the blacklist + # baked in. Result: no spurious "nvidia: probe failed" lines on hosts + # without an NVIDIA GPU during the first boot. The boot-time + # armbian-nvidia-autodetect helper removes this file (and modprobes + # nvidia_drm) when lspci does see [10de:]. + mkdir -p "${SDCARD}/etc/modprobe.d" + cat <<- EOF > "${SDCARD}/etc/modprobe.d/armbian-nvidia-disabled.conf" + # Installed by build/extensions/nvidia.sh. + # Removed at boot by armbian-nvidia-autodetect when [10de:] is present. + blacklist nvidia + blacklist nvidia_drm + blacklist nvidia_modeset + blacklist nvidia_uvm + EOF + # Resolve which nvidia-dkms / nvidia-driver package(s) to install. # Three cases: # 1. Operator pinned NVIDIA_DRIVER_VERSION (env/config) → trust it. @@ -174,20 +191,25 @@ function install_armbian_nvidia_autodetect_helper() { # `lspci -nn` output so non-VGA NVIDIA devices (Tegra USB-C, # audio over HDMI, etc.) also count. if lspci -nn 2>/dev/null | grep -qiE '\[10de:'; then - # Hardware is present. If a previous boot ran on a host - # without NVIDIA, the modprobe blacklist file is still on - # disk and would keep the driver from loading even now. - # Remove it so the modules can bind on the next boot. rm -f - # is idempotent on the common case where the file never - # existed. Package reinstall is intentionally NOT attempted - # here - apt-installing proprietary NVIDIA drivers without - # user consent and without guaranteed network/apt-sources - # is out of scope for a boot-time detector. If packages were - # previously purged, the operator runs apt install manually. + # Hardware is present. The build framework ships a default + # /etc/modprobe.d/armbian-nvidia-disabled.conf so initrd udev + # doesn't try to load nvidia* on no-GPU hosts. Now that we've + # confirmed there IS a GPU, clear the file and modprobe so + # display-manager (we are Before= it) starts with the driver + # loaded. The rootfs deletion self-heals initramfs on the + # next kernel upgrade — until then, initrd stays stale but + # this runtime modprobe covers the gap each boot. + # + # rm -f is idempotent. modprobe nvidia_drm with modeset=1 + # pulls nvidia + nvidia_modeset via dependencies and gives + # Wayland-friendly KMS in one shot. || true on the modprobe + # in case the package was previously purged and isn't + # installed - the operator handles re-install separately. if [ -f /etc/modprobe.d/armbian-nvidia-disabled.conf ]; then rm -f /etc/modprobe.d/armbian-nvidia-disabled.conf echo "armbian-nvidia-autodetect: NVIDIA hardware detected; cleared modprobe blacklist" | systemd-cat -t armbian-nvidia-autodetect 2>/dev/null || true fi + modprobe nvidia_drm modeset=1 2>/dev/null || true exit 0 fi