On 6/10/20 6:33 PM, Harshad Shirwadkar wrote: > Make sure that user requested memory via BLKTRACESETUP is within > bounds. This can be easily exploited by setting really large values > for buf_size and buf_nr in BLKTRACESETUP ioctl. > > blktrace program has following hardcoded values for bufsize and bufnr: > BUF_SIZE=(512 * 1024) > BUF_NR=(4) > > This is very easy to exploit. Setting buf_size / buf_nr in userspace > program to big values make kernel go oom. > > This patch adds a new new sysfs tunable called "blktrace_max_alloc" > with the default value as: > blktrace_max_alloc=(1024 * 1024 * 16) > > Verified that the fix makes BLKTRACESETUP return -E2BIG if the > buf_size * buf_nr crosses the configured upper bound in the device's > sysfs file. Verified that the bound checking is turned off when we > write 0 to the sysfs file. > > Signed-off-by: Harshad Shirwadkar <harshadshirwadkar@xxxxxxxxx> > --- > Documentation/block/queue-sysfs.rst | 8 ++++++++ > block/blk-settings.c | 1 + > block/blk-sysfs.c | 27 +++++++++++++++++++++++++++ > include/linux/blkdev.h | 5 +++++ > kernel/trace/blktrace.c | 8 ++++++++ > 5 files changed, 49 insertions(+) > > diff --git a/Documentation/block/queue-sysfs.rst b/Documentation/block/queue-sysfs.rst > index 6a8513af9201..ef4eec68cd24 100644 > --- a/Documentation/block/queue-sysfs.rst > +++ b/Documentation/block/queue-sysfs.rst > @@ -251,4 +251,12 @@ devices are described in the ZBC (Zoned Block Commands) and ZAC > do not support zone commands, they will be treated as regular block devices > and zoned will report "none". > > +blktrace_max_alloc (RW) > +---------- nit:-based on the pattern present in the same file how about following? blk_trace_max_alloc (RW) ------------------------ > +BLKTRACESETUP ioctl takes the number of buffers and the size of each > +buffer as an argument and uses these values to allocate kernel memory > +for tracing purpose. This is the limit on the maximum kernel memory > +that can be allocated by BLKTRACESETUP ioctl. The default limit is > +16MB. Value of 0 indicates that this bound checking is disabled. > + > Jens Axboe <jens.axboe@xxxxxxxxxx>, February 2009 > diff --git a/block/blk-settings.c b/block/blk-settings.c > index 9a2c23cd9700..38535aa146f4 100644 > --- a/block/blk-settings.c > +++ b/block/blk-settings.c > @@ -60,6 +60,7 @@ void blk_set_default_limits(struct queue_limits *lim) > lim->io_opt = 0; > lim->misaligned = 0; > lim->zoned = BLK_ZONED_NONE; > + lim->blktrace_max_alloc = BLKTRACE_MAX_ALLOC; > } > EXPORT_SYMBOL(blk_set_default_limits); > > diff --git a/block/blk-sysfs.c b/block/blk-sysfs.c > index 02643e149d5e..e849e80930c4 100644 > --- a/block/blk-sysfs.c > +++ b/block/blk-sysfs.c > @@ -496,6 +496,26 @@ static ssize_t queue_wb_lat_store(struct request_queue *q, const char *page, > return count; > } > > +static ssize_t queue_blktrace_max_alloc_show(struct request_queue *q, char *page) > +{ > + return queue_var_show(q->limits.blktrace_max_alloc, page); > +} > + > +static ssize_t queue_blktrace_max_alloc_store(struct request_queue *q, > + const char *page, > + size_t count) > +{ > + unsigned long blktrace_max_alloc; > + int ret; > + > + ret = queue_var_store(&blktrace_max_alloc, page, count); > + if (ret < 0) > + return ret; > + > + q->limits.blktrace_max_alloc = blktrace_max_alloc; > + return count; > +} > + > static ssize_t queue_wc_show(struct request_queue *q, char *page) > { > if (test_bit(QUEUE_FLAG_WC, &q->queue_flags)) > @@ -731,6 +751,12 @@ static struct queue_sysfs_entry queue_wb_lat_entry = { > .store = queue_wb_lat_store, > }; > > +static struct queue_sysfs_entry queue_blktrace_max_alloc_entry = { > + .attr = {.name = "blktrace_max_alloc", .mode = 0644 }, > + .show = queue_blktrace_max_alloc_show, > + .store = queue_blktrace_max_alloc_store, > +}; > + > #ifdef CONFIG_BLK_DEV_THROTTLING_LOW > static struct queue_sysfs_entry throtl_sample_time_entry = { > .attr = {.name = "throttle_sample_time", .mode = 0644 }, > @@ -779,6 +805,7 @@ static struct attribute *queue_attrs[] = { > #ifdef CONFIG_BLK_DEV_THROTTLING_LOW > &throtl_sample_time_entry.attr, > #endif > + &queue_blktrace_max_alloc_entry.attr, Is above attribute needs to be under CONFIG_BLK_DEV_IO_TRACE guard ? > NULL, > }; > > diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h > index 8fd900998b4e..3a72b0fd723e 100644 > --- a/include/linux/blkdev.h > +++ b/include/linux/blkdev.h > @@ -309,6 +309,10 @@ enum blk_queue_state { > #define BLK_SCSI_MAX_CMDS (256) > #define BLK_SCSI_CMD_PER_LONG (BLK_SCSI_MAX_CMDS / (sizeof(long) * 8)) > > +#define BLKTRACE_MAX_BUFSIZ (1024 * 1024) nit:- macro name ending with SIZ is unusual for include/linux/blkdev.h. Consider renaming it to BLKTRACE_MAX_BUF_SIZE. > +#define BLKTRACE_MAX_BUFNR 16 nit:- just like BLKTRACE_MAX_BUFSIZ value of BLKTRACE_MAX_BUFNR needs to be guarded by () ? > +#define BLKTRACE_MAX_ALLOC ((BLKTRACE_MAX_BUFSIZ) * (BLKTRACE_MAX_BUFNR)) nit:- The macro BLKTRACE_MAX_BUFSIZ already has () we can get rid of the extra inner () for BLKTRACE_MAX_BUFSIZ in BLKTRACE_MAX_ALLOC. > + > /* > * Zoned block device models (zoned limit). > */ > @@ -322,6 +326,7 @@ struct queue_limits { > unsigned long bounce_pfn; > unsigned long seg_boundary_mask; > unsigned long virt_boundary_mask; > + unsigned long blktrace_max_alloc; > > unsigned int max_hw_sectors; > unsigned int max_dev_sectors; > diff --git a/kernel/trace/blktrace.c b/kernel/trace/blktrace.c > index ea47f2084087..de028bdbb148 100644 > --- a/kernel/trace/blktrace.c > +++ b/kernel/trace/blktrace.c > @@ -477,11 +477,19 @@ static int do_blk_trace_setup(struct request_queue *q, char *name, dev_t dev, > { > struct blk_trace *bt = NULL; > struct dentry *dir = NULL; > + u32 alloc_siz; nit:- please use full names 's/alloc_siz/alloc_size/' > int ret; > > if (!buts->buf_size || !buts->buf_nr) > return -EINVAL; > > + if (check_mul_overflow(buts->buf_size, buts->buf_nr, &alloc_siz)) > + return -E2BIG; > + > + if (q->limits.blktrace_max_alloc && > + alloc_siz > q->limits.blktrace_max_alloc) > + return -E2BIG; > + > if (!blk_debugfs_root) > return -ENOENT; > >