On Mar 1, 2018, at 11:23 AM, Darrick J. Wong <darrick.wong@xxxxxxxxxx> wrote: > > From: Darrick J. Wong <darrick.wong@xxxxxxxxxx> > > Implement online fsck for ext* filesystems which live on LVM-managed > logical volumes. The basic strategy mirrors that of e2croncheck -- > create a snapshot, fsck the snapshot, report whatever errors appear, > remove snapshot. Unlike e2croncheck, this utility accepts any LVM > device path, knows about snapshots running out of space, and can call > fstrim having validated that the fs metadata is ok. One high-level note - in my lvcheck script, there was an option to disable background checking if the system was running on a battery: # determine whether the machine is on AC power function on_ac_power() { local any_known=no # try sysfs power class first if [ -d /sys/class/power_supply ]; then for psu in /sys/class/power_supply/*; do if [ -r "$psu/type" ]; then type=$(cat "$psu/type") # ignore batteries [ "$type" = "Battery" ] && continue online=$(cat "$psu/online") [ "$online" = 1 ] && return 0 [ "$online" = 0 ] && any_known=yes fi done [ "$any_known" = "yes" ] && return 1 fi # else fall back to AC adapters in /proc if [ -d /proc/acpi/ac_adapter ]; then for ac in /proc/acpi/ac_adapter/*; do if [ -r "$ac/state" ]; then grep -q on-line "$ac/state" && return 0 grep -q off-line "$ac/state" && any_known=yes elif [ -r "$ac/status" ]; then grep -q on-line "$ac/status" && return 0 grep -q off-line "$ac/status" && any_known=yes fi done [ "$any_known" = "yes" ] && return 1 fi if [ "$AC_UNKNOWN" == "CONTINUE" ]; then return 0 # assume on AC power elif [ "$AC_UNKNOWN" == "ABORT" ]; then return 1 # assume on battery else log err "Invalid value for AC_UNKNOWN in the config file" exit 1 fi } More comments inline below. > diff --git a/scrub/e2scrub.in b/scrub/e2scrub.in > new file mode 100644 > index 0000000..32d11c8 > --- /dev/null > +++ b/scrub/e2scrub.in > @@ -0,0 +1,182 @@ > +#!/bin/bash > + > +# Copyright (C) 2018 Oracle. All Rights Reserved. > +# > +# Author: Darrick J. Wong <darrick.wong@xxxxxxxxxx> > +# > +# This program is free software; you can redistribute it and/or > +# modify it under the terms of the GNU General Public License > +# as published by the Free Software Foundation; either version 2 > +# of the License, or (at your option) any later version. > +# > +# This program is distributed in the hope that it would be useful, > +# but WITHOUT ANY WARRANTY; without even the implied warranty of > +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the > +# GNU General Public License for more details. > +# > +# You should have received a copy of the GNU General Public License > +# along with this program; if not, write the Free Software Foundation, > +# Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA. > + > +# Automatically check a LVM-managed filesystem online. > +# We use lvm snapshots to do this, which means that we can only > +# check filesystems in VGs that have at least 256mb (or so) of > +# free space. > + > +snap_size_mb=256 > +fstrim=0 > +e2fsck_opts="" > +conffile="@root_sysconfdir@/e2scrub.conf" > + > +test -f "${conffile}" && . "${conffile}" > + > +print_help() { > + echo "Usage: $0 [-t] device" > + echo > + echo "device must be a LVM-managed block device" > + echo "-t: Run fstrim if successful." > +} > + > +exitcode() { > + ret="$1" > + > + exit "${ret}" > +} > + > +prog_path() { > + path="$1" > + displayname="$2" > + > + if ! type -P "${path}" && [ -n "${displayname}" ]; then > + echo "${displayname}: Command not found." > + exitcode 8 > + fi > +} > + > +LVS_PROG="$(prog_path "@root_sbindir@/lvs" "lvs")" > +BLKID_PROG="$(prog_path "@root_sbindir@/blkid" "blkid")" > +LVCREATE_PROG="$(prog_path "@root_sbindir@/lvcreate" "lvcreate")" > +LVREMOVE_PROG="$(prog_path "@root_sbindir@/lvremove" "lvremove")" > +FSTRIM_PROG="$(prog_path "@root_sbindir@/fstrim")" > +UDEVADM_PROG="$(prog_path "@root_sbindir@/udevadm")" > +SLEEP_PROG="$(prog_path "@root_bindir@/sleep")" Rather than hard-coding the pathnames for all of these commands, it would be a lot easier to just use them by name and set an explicit PATH= to find them. That avoids the need to do all of the substitutions, and makes the script much less ugly. > +while getopts "t" opt; do > + case "${opt}" in > + "t") fstrim=1;; > + *) print_help; exitcode 2;; > + esac > +done > +shift "$((OPTIND - 1))" > + > +dev="$1" > +if [ -z "${dev}" ]; then > + print_help > + exitcode 1 > +elif [ ! -b "${dev}" ]; then > + echo "${dev}: Not a block device?" > + print_help > + exitcode 16 > +fi > + > +# Make sure this is an LVM device we can snapshot > +vg="$("${LVS_PROG}" --noheadings -o vg_name "${dev}" 2> /dev/null | sed -e 's/^ //g')" > +lv="$("${LVS_PROG}" --noheadings -o lv_name "${dev}" 2> /dev/null | sed -e 's/^ //g')" > +if [ -z "${vg}" ] || [ -z "${lv}" ]; then > + echo "${dev}: Not a LVM device." > + exitcode 16 > +fi > +start_time="$(date +'%Y%m%d%H%M%S')" > +snap="${lv}.e2scrub" > +snap_dev="/dev/${vg}/${snap}" > +fstype="$("${BLKID_PROG}" -p -s TYPE "${dev}" | sed -e 's/^.*TYPE="\(.*\)".*$/\1/g')" > + > +case "${fstype}" in > +"ext2"|"ext3"|"ext4") > + ;; > +*) > + echo "${dev}: Filesystem of type ${fstype} not supported." > + exitcode 16 > + ;; > +esac > + > +teardown() { > + # Remove and wait for removal to succeed. > + "${LVREMOVE_PROG}" -f "${vg}/${snap}" 3>&- > + while [ -b "${snap_dev}" ] && [ "$?" -eq "5" ]; do > + /bin/sleep 0.5 > + "${LVREMOVE_PROG}" -f "${vg}/${snap}" 3>&- > + done > +} > + > +check() { > + # First we preen the filesystem to recover the journal, then > + # we see if e2fsck tries any non-optimization repairs. If > + # either of these two returns a non-zero status (errors fixed > + # or remaining) then this fs is bad. > + E2FSCK_FIXES_ONLY=1 > + export E2FSCK_FIXES_ONLY > + ${DBG} "@root_sbindir@/e2fsck" -p ${e2fsck_opts} "${snap_dev}" || return 1 > + ${DBG} "@root_sbindir@/e2fsck" -fy ${e2fsck_opts} "${snap_dev}" || return This could run two full e2fsck checks for each filesystem. To recover only the journal, it would be better to use "e2fsck -E journal_only"? > +mark_corrupt() { > + ${DBG} "@root_sbindir@/tune2fs" -C 16000 -T "19000101" "${dev}" This won't actually do anything if the time/mount-based checks are disabled (which has been the default for a long time already, not that I agree with it). You need to add something like "-i 720" to force a check on the filesystem on the next mount. Also, this clobbers the mount count unnecessarily. Finally, using "19000101" has the possibility to underflow in a different timezone, and is a negative signed timestamp as well (but is stored as unsigned value) so it may be problematic. Better to use "19700102" or similar, which is just as likely to cause a check. > +} > + > +setup() { > + # Create the snapshot, wait for device to appear > + teardown > /dev/null 2> /dev/null > + "${LVCREATE_PROG}" -s -L "${snap_size_mb}m" -n "${snap}" "${vg}/${lv}" 3>&- > + test $? -ne 0 && return 1 > + test -x "${UDEVADM_PROG}" && "${UDEVADM_PROG}" settle > + return 0 > +} > + > +trap "teardown" EXIT INT QUIT TERM > +if ! setup; then > + echo "Snapshot of ${dev} FAILED, will not check!" > + exitcode 1 > +fi > + > +# Check and react > +if check; then > + echo "Scrub of ${dev} succeeded." > + mark_clean > + > + if [ "${fstrim}" -eq 1 ] && [ -x "${FSTRIM_PROG}" ]; then > + dir="$(lsblk -o MOUNTPOINT -n "${dev}")" > + if [ -d "${dir}" ]; then > + # NB: fstrim fails with snapshot present > + trap '' EXIT > + teardown > + "${FSTRIM_PROG}" -v "${dir}" > + fi > + fi > + > + ret=0 > +else > + # fsck failed. Check if the snapshot is invalid; if so, make a > + # note of that at the end of the log. This isn't necessarily a > + # failure because the mounted fs could have overflowed the > + # snapshot with regular disk writes /or/ our repair process > + # could have done it by repairing too much. > + # > + # If it's really corrupt we ought to fsck at next boot. > + is_invalid="$("${LVS_PROG}" -o lv_snapshot_invalid --noheadings "${snap_dev}")" > + if [ -n "${is_invalid}" ]; then > + echo "Scrub of ${dev} FAILED due to invalid snapshot." > + ret=8 > + else > + echo "Scrub of ${dev} FAILED! Reboot soon to fsck." > + mark_corrupt > + ret=6 > + fi > +fi > + > +exitcode "${ret}" Cheers, Andreas
Attachment:
signature.asc
Description: Message signed with OpenPGP