From: Darrick J. Wong <djwong@xxxxxxxxxx> Create a systemd service and activate it automatically. Signed-off-by: "Darrick J. Wong" <djwong@xxxxxxxxxx> --- scrub/Makefile | 18 +++++++ scrub/xfs_scrubbed.in | 9 +++ scrub/xfs_scrubbed.rules | 7 +++ scrub/xfs_scrubbed@xxxxxxxxxxx | 103 ++++++++++++++++++++++++++++++++++++++++ scrub/xfs_scrubbed_start | 17 +++++++ 5 files changed, 153 insertions(+), 1 deletion(-) create mode 100644 scrub/xfs_scrubbed.rules create mode 100644 scrub/xfs_scrubbed@xxxxxxxxxxx create mode 100755 scrub/xfs_scrubbed_start diff --git a/scrub/Makefile b/scrub/Makefile index 7d4fa0ddc09685..731810d7c7fd9a 100644 --- a/scrub/Makefile +++ b/scrub/Makefile @@ -29,8 +29,16 @@ SYSTEMD_SERVICES=\ xfs_scrub_all.service \ xfs_scrub_all_fail.service \ xfs_scrub_all.timer \ - system-xfs_scrub.slice + system-xfs_scrub.slice \ + xfs_scrubbed@.service OPTIONAL_TARGETS += $(SYSTEMD_SERVICES) + +ifeq ($(HAVE_UDEV),yes) + XFS_SCRUBBED_UDEV_RULES = xfs_scrubbed.rules + XFS_SCRUBBED_HELPER = xfs_scrubbed_start + INSTALL_SCRUB += install-udev-scrubbed + OPTIONAL_TARGETS += $(XFS_SCRUBBED_HELPER) +endif endif ifeq ($(HAVE_CROND),yes) INSTALL_SCRUB += install-crond @@ -185,6 +193,14 @@ install-udev: $(UDEV_RULES) $(INSTALL) -m 644 $$i $(UDEV_RULE_DIR)/64-$$i; \ done +install-udev-scrubbed: $(XFS_SCRUBBED_HELPER) + $(INSTALL) -m 755 -d $(UDEV_DIR) + $(INSTALL) -m 755 $(XFS_SCRUBBED_HELPER) $(UDEV_DIR) + $(INSTALL) -m 755 -d $(UDEV_RULE_DIR) + for i in $(XFS_SCRUBBED_UDEV_RULES); do \ + $(INSTALL) -m 644 $$i $(UDEV_RULE_DIR)/64-$$i; \ + done + install-dev: -include .dep diff --git a/scrub/xfs_scrubbed.in b/scrub/xfs_scrubbed.in index a4e073b3098f7a..9df6f45e53ad80 100644 --- a/scrub/xfs_scrubbed.in +++ b/scrub/xfs_scrubbed.in @@ -19,6 +19,7 @@ import gc from concurrent.futures import ProcessPoolExecutor import ctypes.util import collections +import time try: # Not all systems will have this json schema validation libarary, @@ -994,6 +995,14 @@ def main(): pass args.event_queue.shutdown() + + # See the service mode comments in xfs_scrub.c for why we sleep and + # compress all nonzero exit codes to 1. + if 'SERVICE_MODE' in os.environ: + time.sleep(2) + if ret != 0: + ret = 1 + return ret if __name__ == '__main__': diff --git a/scrub/xfs_scrubbed.rules b/scrub/xfs_scrubbed.rules new file mode 100644 index 00000000000000..c651126d5373a1 --- /dev/null +++ b/scrub/xfs_scrubbed.rules @@ -0,0 +1,7 @@ +# SPDX-License-Identifier: GPL-2.0-or-later +# +# Copyright (c) 2024-2025 Oracle. All rights reserved. +# Author: Darrick J. Wong <djwong@xxxxxxxxxx> +# +# Start autonomous self healing automatically +ACTION=="add", SUBSYSTEM=="xfs", ENV{TYPE}=="mount", RUN+="xfs_scrubbed_start" diff --git a/scrub/xfs_scrubbed@xxxxxxxxxxx b/scrub/xfs_scrubbed@xxxxxxxxxxx new file mode 100644 index 00000000000000..9656bdb3cd9a9d --- /dev/null +++ b/scrub/xfs_scrubbed@xxxxxxxxxxx @@ -0,0 +1,103 @@ +# SPDX-License-Identifier: GPL-2.0-or-later +# +# Copyright (c) 2024-2025 Oracle. All Rights Reserved. +# Author: Darrick J. Wong <djwong@xxxxxxxxxx> + +[Unit] +Description=Self Healing of XFS Metadata for %f +Documentation=man:xfs_scrubbed(8) + +# Explicitly require the capabilities that this program needs +ConditionCapability=CAP_SYS_ADMIN +ConditionCapability=CAP_DAC_OVERRIDE + +# Must be a mountpoint +ConditionPathIsMountPoint=%f +RequiresMountsFor=%f + +[Service] +Type=exec +Environment=SERVICE_MODE=1 +ExecStart=@pkg_libexec_dir@/xfs_scrubbed --log %f +SyslogIdentifier=%N + +# Run scrub with minimal CPU and IO priority so that nothing else will starve. +IOSchedulingClass=idle +CPUSchedulingPolicy=idle +CPUAccounting=true +Nice=19 + +# Create the service underneath the scrub background service slice so that we +# can control resource usage. +Slice=system-xfs_scrub.slice + +# No realtime CPU scheduling +RestrictRealtime=true + +# Dynamically create a user that isn't root +DynamicUser=true + +# Make the entire filesystem readonly, but don't hide /home and don't use a +# private bind mount like xfs_scrub. We don't want to pin the filesystem, +# because we want umount to work correctly and this service to stop +# automatically. +ProtectSystem=strict +ProtectHome=no +PrivateTmp=true +PrivateDevices=true + +# Don't let scrub complain about paths in /etc/projects that have been hidden +# by our sandboxing. scrub doesn't care about project ids anyway. +InaccessiblePaths=-/etc/projects + +# No network access +PrivateNetwork=true +ProtectHostname=true +RestrictAddressFamilies=none +IPAddressDeny=any + +# Don't let the program mess with the kernel configuration at all +ProtectKernelLogs=true +ProtectKernelModules=true +ProtectKernelTunables=true +ProtectControlGroups=true +ProtectProc=invisible +RestrictNamespaces=true + +# Hide everything in /proc, even /proc/mounts +ProcSubset=pid + +# Only allow the default personality Linux +LockPersonality=true + +# No writable memory pages +MemoryDenyWriteExecute=true + +# Don't let our mounts leak out to the host +PrivateMounts=true + +# Restrict system calls to the native arch and only enough to get things going +SystemCallArchitectures=native +SystemCallFilter=@system-service +SystemCallFilter=~@privileged +SystemCallFilter=~@resources +SystemCallFilter=~@mount + +# xfs_scrubbed needs these privileges to open the rootdir and monitor +CapabilityBoundingSet=CAP_SYS_ADMIN CAP_DAC_OVERRIDE +AmbientCapabilities=CAP_SYS_ADMIN CAP_DAC_OVERRIDE +NoNewPrivileges=true + +# xfs_scrubbed doesn't create files +UMask=7777 + +# No access to hardware /dev files except for block devices +ProtectClock=true +DevicePolicy=closed + +[Install] +WantedBy=multi-user.target +# If someone tries to enable the template itself, translate that into enabling +# this service on the root directory at systemd startup time. In the +# initramfs, the udev rules in xfs_scrubbed.rules run before systemd starts. +DefaultInstance=- diff --git a/scrub/xfs_scrubbed_start b/scrub/xfs_scrubbed_start new file mode 100755 index 00000000000000..82530cf7862717 --- /dev/null +++ b/scrub/xfs_scrubbed_start @@ -0,0 +1,17 @@ +#!/bin/sh + +# SPDX-License-Identifier: GPL-2.0-or-later +# +# Copyright (c) 2024-2025 Oracle. All Rights Reserved. +# Author: Darrick J. Wong <djwong@xxxxxxxxxx> + +# Start the xfs_scrubbed service when the filesystem is mounted + +command -v systemctl || exit 0 + +grep "^$SOURCE[[:space:]]" /proc/mounts | while read source mntpt therest; do + inst="$(systemd-escape --path "$mntpt")" + systemctl restart --no-block "xfs_scrubbed@$inst" && break +done + +exit 0