On Thu, Jul 02, 2020 at 03:00:37PM +0000, Martin Wilck wrote: > On Wed, 2020-07-01 at 17:39 -0500, Benjamin Marzinski wrote: > > If kpartx is used on top of shared storage, and a device has its > > partition table changed on one machine, and then kpartx is run on > > another, it may not see the new data, because the cache still > > contains > > the old data, and there is nothing to tell the machine running kpartx > > to > > invalidate it. To solve this, kpartx should read the devices using > > direct io. > > > > One issue with how this code has been updated is that the original > > code > > for getblock() always read 1024 bytes. The new code reads a logical > > sector size chunk of the device, and returns a pointer to the 512 > > byte > > sector that the caller asked for, within that (possibly larger) > > chunk. > > This means that if the logical sector size is 512, then the code is > > now > > only reading 512 bytes. Looking through the code for the various > > partition types, I can't see a case where more than 512 bytes is > > needed > > and getblock() is used. If anyone has a reason why this code should > > be > > reading 1024 bytes at minmum, I can certainly change this. But when > > I > > looked, I couldn't find a case where reading 512 bytes would cause a > > problem. > > > > Signed-off-by: Benjamin Marzinski <bmarzins@xxxxxxxxxx> > > The value 1024 comes from first partx in util-linux, where Christophe > pulled the first kpartx from: > http://ftp.be.debian.org/pub/linux/utils/util-linux/v2.10/util-linux-2.10m.tar.gz > I, too, see no reason for reading more than a single block. > > Two nits below. Here's one more, I'd have preferred two separate > patches for these two different issues. But never mind. > > Regards, > Martin > > > --- > > kpartx/dasd.c | 7 ++++--- > > kpartx/gpt.c | 22 ++++++++++------------ > > kpartx/kpartx.c | 46 +++++++++++++++++++++++++++++++++++----------- > > kpartx/kpartx.h | 2 ++ > > 4 files changed, 51 insertions(+), 26 deletions(-) > > > > diff --git a/kpartx/dasd.c b/kpartx/dasd.c > > index 14b9d3aa..f0398645 100644 > > --- a/kpartx/dasd.c > > +++ b/kpartx/dasd.c > > @@ -22,6 +22,7 @@ > > * along with this program. If not, see < > > http://www.gnu.org/licenses/>;. > > */ > > > > +#define _GNU_SOURCE > > #include <stdio.h> > > #include <stdlib.h> > > #include <unistd.h> > > @@ -117,13 +118,13 @@ read_dasd_pt(int fd, __attribute__((unused)) > > struct slice all, > > > > sprintf(pathname, "/dev/.kpartx-node-%u-%u", > > (unsigned int)major(dev), (unsigned > > int)minor(dev)); > > - if ((fd_dasd = open(pathname, O_RDONLY)) == -1) { > > + if ((fd_dasd = open(pathname, O_RDONLY | O_DIRECT)) == > > -1) { > > /* Devicenode does not exist. Try to create one > > */ > > if (mknod(pathname, 0600 | S_IFBLK, dev) == -1) > > { > > /* Couldn't create a device node */ > > return -1; > > } > > - fd_dasd = open(pathname, O_RDONLY); > > + fd_dasd = open(pathname, O_RDONLY | O_DIRECT); > > /* > > * The file will vanish when the last process > > (we) > > * has ceased to access it. > > @@ -175,7 +176,7 @@ read_dasd_pt(int fd, __attribute__((unused)) > > struct slice all, > > * Get volume label, extract name and type. > > */ > > > > - if (!(data = (unsigned char *)malloc(blocksize))) > > + if (aligned_malloc((void **)&data, blocksize, NULL)) > > goto out; > > > > > > diff --git a/kpartx/gpt.c b/kpartx/gpt.c > > index 785b34ea..f7fefb70 100644 > > --- a/kpartx/gpt.c > > +++ b/kpartx/gpt.c > > @@ -243,8 +243,7 @@ alloc_read_gpt_entries(int fd, gpt_header * gpt) > > > > if (!count) return NULL; > > > > - pte = (gpt_entry *)malloc(count); > > - if (!pte) > > + if (aligned_malloc((void **)&pte, get_sector_size(fd), &count)) > > return NULL; > > memset(pte, 0, count); > > > > @@ -269,12 +268,11 @@ static gpt_header * > > alloc_read_gpt_header(int fd, uint64_t lba) > > { > > gpt_header *gpt; > > - gpt = (gpt_header *) > > - malloc(sizeof (gpt_header)); > > - if (!gpt) > > + size_t size = sizeof (gpt_header); > > + if (aligned_malloc((void **)&gpt, get_sector_size(fd), &size)) > > return NULL; > > - memset(gpt, 0, sizeof (*gpt)); > > - if (!read_lba(fd, lba, gpt, sizeof (gpt_header))) { > > + memset(gpt, 0, size); > > + if (!read_lba(fd, lba, gpt, size)) { > > free(gpt); > > return NULL; > > } > > @@ -498,6 +496,7 @@ find_valid_gpt(int fd, gpt_header ** gpt, > > gpt_entry ** ptes) > > gpt_header *pgpt = NULL, *agpt = NULL; > > gpt_entry *pptes = NULL, *aptes = NULL; > > legacy_mbr *legacymbr = NULL; > > + size_t size = sizeof(legacy_mbr); > > uint64_t lastlba; > > if (!gpt || !ptes) > > return 0; > > @@ -526,11 +525,10 @@ find_valid_gpt(int fd, gpt_header ** gpt, > > gpt_entry ** ptes) > > } > > > > /* This will be added to the EFI Spec. per Intel after v1.02. > > */ > > - legacymbr = malloc(sizeof (*legacymbr)); > > - if (legacymbr) { > > - memset(legacymbr, 0, sizeof (*legacymbr)); > > - read_lba(fd, 0, (uint8_t *) legacymbr, > > - sizeof (*legacymbr)); > > + if (aligned_malloc((void **)&legacymbr, get_sector_size(fd), > > + &size) == 0) { > > + memset(legacymbr, 0, size); > > + read_lba(fd, 0, (uint8_t *) legacymbr, size); > > good_pmbr = is_pmbr_valid(legacymbr); > > free(legacymbr); > > legacymbr=NULL; > > diff --git a/kpartx/kpartx.c b/kpartx/kpartx.c > > index d3620c5c..4716dd4d 100644 > > --- a/kpartx/kpartx.c > > +++ b/kpartx/kpartx.c > > @@ -19,6 +19,7 @@ > > * cva, 2002-10-26 > > */ > > > > +#define _GNU_SOURCE > > #include <stdio.h> > > #include <fcntl.h> > > #include <errno.h> > > @@ -41,7 +42,6 @@ > > > > #define SIZE(a) (sizeof(a)/sizeof((a)[0])) > > > > -#define READ_SIZE 1024 > > #define MAXTYPES 64 > > #define MAXSLICES 256 > > #define DM_TARGET "linear" > > @@ -388,7 +388,7 @@ main(int argc, char **argv){ > > set_delimiter(mapname, delim); > > } > > > > - fd = open(device, O_RDONLY); > > + fd = open(device, O_RDONLY | O_DIRECT); > > > > if (fd == -1) { > > perror(device); > > @@ -690,9 +690,9 @@ xmalloc (size_t size) { > > */ > > > > static int > > -sseek(int fd, unsigned int secnr) { > > +sseek(int fd, unsigned int secnr, int secsz) { > > off64_t in, out; > > - in = ((off64_t) secnr << 9); > > + in = ((off64_t) secnr * secsz); > > out = 1; > > > > if ((out = lseek64(fd, in, SEEK_SET)) != in) > > @@ -703,6 +703,21 @@ sseek(int fd, unsigned int secnr) { > > return 0; > > } > > > > +int > > +aligned_malloc(void **mem_p, size_t align, size_t *size_p) > > +{ > > + size_t pgsize = getpagesize(); > > Nit: I'd use a static variable here and call getpagesize() only once. > > > + size_t size = pgsize; > > + if (!mem_p || !align || (size_p && !*size_p)) > > + return EINVAL; > > + > > + if (size_p) > > + *size_p = size = ((*size_p + align - 1) / align) * > > align; > > It would be cleaner to set *size_p only after successful return from > posix_memalign(). Sure. I can change these. -Ben > > + > > + return posix_memalign(mem_p, pgsize, size); > > +} > > > > > > + > > +/* always in sector size blocks */ > > static > > struct block { > > unsigned int secnr; > > @@ -710,30 +725,39 @@ struct block { > > struct block *next; > > } *blockhead; > > > > +/* blknr is always in 512 byte blocks */ > > char * > > -getblock (int fd, unsigned int secnr) { > > +getblock (int fd, unsigned int blknr) { > > + unsigned int secsz = get_sector_size(fd); > > + unsigned int blks_per_sec = secsz / 512; > > + unsigned int secnr = blknr / blks_per_sec; > > + unsigned int blk_off = (blknr % blks_per_sec) * 512; > > struct block *bp; > > > > for (bp = blockhead; bp; bp = bp->next) > > > > if (bp->secnr == secnr) > > - return bp->block; > > + return bp->block + blk_off; > > > > - if (sseek(fd, secnr)) > > + if (sseek(fd, secnr, secsz)) > > return NULL; > > > > bp = xmalloc(sizeof(struct block)); > > bp->secnr = secnr; > > bp->next = blockhead; > > blockhead = bp; > > - bp->block = (char *) xmalloc(READ_SIZE); > > + if (aligned_malloc((void **)&bp->block, secsz, NULL)) { > > + fprintf(stderr, "aligned_malloc failed\n"); > > + exit(1); > > + } > > > > - if (read(fd, bp->block, READ_SIZE) != READ_SIZE) { > > + if (read(fd, bp->block, secsz) != secsz) { > > fprintf(stderr, "read error, sector %d\n", secnr); > > - bp->block = NULL; > > + blockhead = bp->next; > > + return NULL; > > } > > > > - return bp->block; > > + return bp->block + blk_off; > > } > > > > int > > diff --git a/kpartx/kpartx.h b/kpartx/kpartx.h > > index 67edeb82..727632c1 100644 > > --- a/kpartx/kpartx.h > > +++ b/kpartx/kpartx.h > > @@ -1,6 +1,7 @@ > > #ifndef _KPARTX_H > > #define _KPARTX_H > > > > +#include <stddef.h> > > #include <stdint.h> > > #include <sys/ioctl.h> > > > > @@ -61,6 +62,7 @@ extern ptreader read_mac_pt; > > extern ptreader read_sun_pt; > > extern ptreader read_ps3_pt; > > > > +int aligned_malloc(void **mem_p, size_t align, size_t *size_p); > > char *getblock(int fd, unsigned int secnr); > > > > static inline unsigned int > > -- > Dr. Martin Wilck <mwilck@xxxxxxxx>, Tel. +49 (0)911 74053 2107 > SUSE Software Solutions Germany GmbH > HRB 36809, AG Nürnberg GF: Felix > Imendörffer > -- dm-devel mailing list dm-devel@xxxxxxxxxx https://www.redhat.com/mailman/listinfo/dm-devel