Re: [PATCH] fix bitmap corruption on close_range() with CLOSE_RANGE_UNSHARE

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



On Mon, Aug 05, 2024 at 07:54:29PM GMT, Al Viro wrote:
> On Mon, Aug 05, 2024 at 09:22:08AM +0200, Christian Brauner wrote:
> 
> > Really, it doesn't have to be pretty but these repros in there really
> > have been helpful finding such corruptions when run with a proper k*san
> > config.
> 
> See below; so far it survives beating (and close_range_test passes with
> the patch, while failing the last test on mainline).  BTW, EXPECT_...
> alone is sufficient for it to whine, but it doesn't actually fail the
> test, so don't we need exit(EXIT_FAILURE) on (at least some of) the
> previous tests?  Hadn't played with the kselftest before, so...

The selftest infrastrcuture is a bit weird and I'm no expert myself.
So any failure in EXPECT_*() will cause the test to fail but will
continue running the whole test.
Using ASSERT_*() instead of EXPECT_*() will also cause the test to fail
but will also stop the test immediately.

So really no matter if EXPEC_*() or ASSERT_*() the end result should be
that the test run fails:

diff --git a/tools/testing/selftests/core/close_range_test.c b/tools/testing/selftests/core/close_range_test.c
index 991c473e3859..3f7257487b85 100644
--- a/tools/testing/selftests/core/close_range_test.c
+++ b/tools/testing/selftests/core/close_range_test.c
@@ -37,6 +37,8 @@ TEST(core_close_range)
        int i, ret;
        int open_fds[101];

+       EXPECT_NE(0, 0);
+
        for (i = 0; i < ARRAY_SIZE(open_fds); i++) {
                int fd;


> ./close_range_test
TAP version 13
1..7
# Starting 7 tests from 1 test cases.
#  RUN           global.core_close_range ...
# close_range_test.c:40:core_close_range:Expected 0 (0) != 0 (0)
# core_close_range: Test failed
#          FAIL  global.core_close_range
not ok 1 global.core_close_range
#  RUN           global.close_range_unshare ...
#            OK  global.close_range_unshare
ok 2 global.close_range_unshare
#  RUN           global.close_range_unshare_capped ...
#            OK  global.close_range_unshare_capped
ok 3 global.close_range_unshare_capped
#  RUN           global.close_range_cloexec ...
#            OK  global.close_range_cloexec
ok 4 global.close_range_cloexec
#  RUN           global.close_range_cloexec_unshare ...
#            OK  global.close_range_cloexec_unshare
ok 5 global.close_range_cloexec_unshare
#  RUN           global.close_range_cloexec_syzbot ...
#            OK  global.close_range_cloexec_syzbot
ok 6 global.close_range_cloexec_syzbot
#  RUN           global.close_range_cloexec_unshare_syzbot ...
#            OK  global.close_range_cloexec_unshare_syzbot
ok 7 global.close_range_cloexec_unshare_syzbot
# FAILED: 6 / 7 tests passed.
# Totals: pass:6 fail:1 xfail:0 xpass:0 skip:0 error:0

> 
> diff --git a/fs/file.c b/fs/file.c
> index a11e59b5d602..655338effe9c 100644
> --- a/fs/file.c
> +++ b/fs/file.c
> @@ -46,27 +46,23 @@ static void free_fdtable_rcu(struct rcu_head *rcu)
>  #define BITBIT_NR(nr)	BITS_TO_LONGS(BITS_TO_LONGS(nr))
>  #define BITBIT_SIZE(nr)	(BITBIT_NR(nr) * sizeof(long))
>  
> +#define fdt_words(fdt) ((fdt)->max_fds / BITS_PER_LONG) // words in ->open_fds
>  /*
>   * Copy 'count' fd bits from the old table to the new table and clear the extra
>   * space if any.  This does not copy the file pointers.  Called with the files
>   * spinlock held for write.
>   */
> -static void copy_fd_bitmaps(struct fdtable *nfdt, struct fdtable *ofdt,
> -			    unsigned int count)
> +static inline void copy_fd_bitmaps(struct fdtable *nfdt, struct fdtable *ofdt,
> +			    unsigned int copy_words)
>  {
> -	unsigned int cpy, set;
> -
> -	cpy = count / BITS_PER_BYTE;
> -	set = (nfdt->max_fds - count) / BITS_PER_BYTE;
> -	memcpy(nfdt->open_fds, ofdt->open_fds, cpy);
> -	memset((char *)nfdt->open_fds + cpy, 0, set);
> -	memcpy(nfdt->close_on_exec, ofdt->close_on_exec, cpy);
> -	memset((char *)nfdt->close_on_exec + cpy, 0, set);
> -
> -	cpy = BITBIT_SIZE(count);
> -	set = BITBIT_SIZE(nfdt->max_fds) - cpy;
> -	memcpy(nfdt->full_fds_bits, ofdt->full_fds_bits, cpy);
> -	memset((char *)nfdt->full_fds_bits + cpy, 0, set);
> +	unsigned int nwords = fdt_words(nfdt);
> +
> +	bitmap_copy_and_extend(nfdt->open_fds, ofdt->open_fds,
> +			copy_words * BITS_PER_LONG, nwords * BITS_PER_LONG);
> +	bitmap_copy_and_extend(nfdt->close_on_exec, ofdt->close_on_exec,
> +			copy_words * BITS_PER_LONG, nwords * BITS_PER_LONG);
> +	bitmap_copy_and_extend(nfdt->full_fds_bits, ofdt->full_fds_bits,
> +			copy_words, nwords);
>  }
>  
>  /*
> @@ -84,7 +80,7 @@ static void copy_fdtable(struct fdtable *nfdt, struct fdtable *ofdt)
>  	memcpy(nfdt->fd, ofdt->fd, cpy);
>  	memset((char *)nfdt->fd + cpy, 0, set);
>  
> -	copy_fd_bitmaps(nfdt, ofdt, ofdt->max_fds);
> +	copy_fd_bitmaps(nfdt, ofdt, fdt_words(ofdt));
>  }
>  
>  /*
> @@ -379,7 +375,7 @@ struct files_struct *dup_fd(struct files_struct *oldf, unsigned int max_fds, int
>  		open_files = sane_fdtable_size(old_fdt, max_fds);
>  	}
>  
> -	copy_fd_bitmaps(new_fdt, old_fdt, open_files);
> +	copy_fd_bitmaps(new_fdt, old_fdt, open_files / BITS_PER_LONG);
>  
>  	old_fds = old_fdt->fd;
>  	new_fds = new_fdt->fd;
> diff --git a/include/linux/bitmap.h b/include/linux/bitmap.h
> index 8c4768c44a01..d3b66d77df7a 100644
> --- a/include/linux/bitmap.h
> +++ b/include/linux/bitmap.h
> @@ -270,6 +270,18 @@ static inline void bitmap_copy_clear_tail(unsigned long *dst,
>  		dst[nbits / BITS_PER_LONG] &= BITMAP_LAST_WORD_MASK(nbits);
>  }
>  
> +static inline void bitmap_copy_and_extend(unsigned long *to,
> +					  const unsigned long *from,
> +					  unsigned int count, unsigned int size)
> +{
> +	unsigned int copy = BITS_TO_LONGS(count);
> +
> +	memcpy(to, from, copy * sizeof(long));
> +	if (count % BITS_PER_LONG)
> +		to[copy - 1] &= BITMAP_LAST_WORD_MASK(count);
> +	memset(to + copy, 0, bitmap_size(size) - copy * sizeof(long));
> +}
> +
>  /*
>   * On 32-bit systems bitmaps are represented as u32 arrays internally. On LE64
>   * machines the order of hi and lo parts of numbers match the bitmap structure.
> diff --git a/tools/testing/selftests/core/close_range_test.c b/tools/testing/selftests/core/close_range_test.c
> index 991c473e3859..12b4eb9d0434 100644
> --- a/tools/testing/selftests/core/close_range_test.c
> +++ b/tools/testing/selftests/core/close_range_test.c
> @@ -589,4 +589,39 @@ TEST(close_range_cloexec_unshare_syzbot)
>  	EXPECT_EQ(close(fd3), 0);
>  }
>  
> +TEST(close_range_bitmap_corruption)
> +{
> +	pid_t pid;
> +	int status;
> +	struct __clone_args args = {
> +		.flags = CLONE_FILES,
> +		.exit_signal = SIGCHLD,
> +	};
> +
> +	/* get the first 128 descriptors open */
> +	for (int i = 2; i < 128; i++)
> +		EXPECT_GE(dup2(0, i), 0);
> +
> +	/* get descriptor table shared */
> +	pid = sys_clone3(&args, sizeof(args));
> +	ASSERT_GE(pid, 0);
> +
> +	if (pid == 0) {
> +		/* unshare and truncate descriptor table down to 64 */
> +		if (sys_close_range(64, ~0U, CLOSE_RANGE_UNSHARE))
> +			exit(EXIT_FAILURE);
> +
> +		ASSERT_EQ(fcntl(64, F_GETFD), -1);
> +		/* ... and verify that the range 64..127 is not
> +		   stuck "fully used" according to secondary bitmap */
> +		EXPECT_EQ(dup(0), 64)
> +			exit(EXIT_FAILURE);
> +		exit(EXIT_SUCCESS);
> +	}
> +
> +	EXPECT_EQ(waitpid(pid, &status, 0), pid);
> +	EXPECT_EQ(true, WIFEXITED(status));
> +	EXPECT_EQ(0, WEXITSTATUS(status));
> +}
> +
>  TEST_HARNESS_MAIN




[Index of Archives]     [Linux Ext4 Filesystem]     [Union Filesystem]     [Filesystem Testing]     [Ceph Users]     [Ecryptfs]     [NTFS 3]     [AutoFS]     [Kernel Newbies]     [Share Photos]     [Security]     [Netfilter]     [Bugtraq]     [Yosemite News]     [MIPS Linux]     [ARM Linux]     [Linux Security]     [Linux Cachefs]     [Reiser Filesystem]     [Linux RAID]     [NTFS 3]     [Samba]     [Device Mapper]     [CEPH Development]

  Powered by Linux