Hi All, Short intro: I'm an anaconda (The Fedora installer) developer, and anaconda uses parted for its partitioning. While testing partitionable mdraid I noticed that the kernels view of the partition table never changes even though I was successfully making commit_to_os() calls. This has let to me diving into libparted's commit_to_os() code for Linux and there are multiple issues hiding in there: 1) Parted reads /sys/block/foo/range to determine how many partitions the device type supports and then makes BLKPG ioctl's to update the kernels view of the partition table for partitions which fall into this range. However for example /sys/block/sda/range contains 16, there are 2 issue with libparted using this number: 1) scsi major's only support 15 partitions, 1 of the range of 16 is reserved for the whole device, yet libparted will try to notify the kernel about 16 partitions if present 2) If the major's partition minor's run out, the kernel will switch to the mdp major for the other partitions, iow range no longer limits the number of partitions. 2) libparted assumes the user knows what he is doing, and will ignore -ebusy errors for partitions, assuming that the user is smart enough to only change unused partitions. Parted does this without checking if the partitions which return ebusy actually are unchanged causing REAL errors to get unreported (BAD, really really BAD) 3) because of 1) libparted will only sync 1 partition on /dev/md# devices (would be 0 if not for the of by 1 bug as all md#p# partitions use the mdp major), and it fails to even do that without reporting an error. ### 1) we can fix by simply not checking /sys/block/foo/range, but instead just syncing max partitions. 2) is more troublesome, we could just make -EBUSY n error, but that may annoy / bug some users. OTOH in certain cases libparted already falls back to BLKRRPART which will return EBUSY so users should already be prepared to handle EBUSY 3) Could be fixed by making libparted recognize mdraid as a device type and except mdraid from using BLKPG, like it already is doing with DASD, but it might be better to just get rid of using BLKPG al together. See below. An even bigger problem IMHO is the use of the BLKPG ioctl instead of BLKRRPART at all. What this does is tell the kernel parted's view of the partition table and make it use that, instead of telling the kernel to reread the partition table. According to the parted sources this is done for the case where the kernel does not know the disklabel type. However as soon as the system is rebooted, the system will be using the kernel's view. So IMHO it would be much better to always use the kernels view and just always call BLKRRPART in commit_to_os(), this would solve all of the above issues, *and* make the way the system views the partition table consistent between just after running parted and after a reboot. I've attached a patch which removes the use of the BLKPG ioctl, notice that this also removes a lot of special case code and workarounds, which existence to me clearly indicates that using the BLKPG ioctl is a bad idea. Regards, Hans
diff -up parted-1.9.0/libparted/arch/linux.c~ parted-1.9.0/libparted/arch/linux.c --- parted-1.9.0/libparted/arch/linux.c~ 2009-08-26 06:40:11.000000000 +0200 +++ parted-1.9.0/libparted/arch/linux.c 2009-08-26 06:51:21.000000000 +0200 @@ -41,7 +41,6 @@ #include <libdevmapper.h> #endif -#include "blkpg.h" #include "../architecture.h" #include "dirname.h" @@ -587,22 +586,6 @@ _get_linux_version () return kver = KERNEL_VERSION (major, minor, teeny); } -static int -_have_devfs () -{ - static int have_devfs = -1; - struct stat sb; - - if (have_devfs != -1) - return have_devfs; - - /* the presence of /dev/.devfsd implies that DevFS is active */ - if (stat("/dev/.devfsd", &sb) < 0) - return have_devfs = 0; - - return have_devfs = S_ISCHR(sb.st_mode) ? 1 : 0; -} - static void _device_set_sector_size (PedDevice* dev) { @@ -2189,176 +2172,6 @@ linux_partition_is_busy (const PedPartit return 0; } -static int -_blkpg_part_command (PedDevice* dev, struct blkpg_partition* part, int op) -{ - LinuxSpecific* arch_specific = LINUX_SPECIFIC (dev); - struct blkpg_ioctl_arg ioctl_arg; - - ioctl_arg.op = op; - ioctl_arg.flags = 0; - ioctl_arg.datalen = sizeof (struct blkpg_partition); - ioctl_arg.data = (void*) part; - - return ioctl (arch_specific->fd, BLKPG, &ioctl_arg) == 0; -} - -static int -_blkpg_add_partition (PedDisk* disk, const PedPartition *part) -{ - struct blkpg_partition linux_part; - const char* vol_name; - char* dev_name; - - PED_ASSERT(disk != NULL, return 0); - PED_ASSERT(disk->dev->sector_size % PED_SECTOR_SIZE_DEFAULT == 0, - return 0); - - if (ped_disk_type_check_feature (disk->type, - PED_DISK_TYPE_PARTITION_NAME)) - vol_name = ped_partition_get_name (part); - else - vol_name = NULL; - - dev_name = _device_get_part_path (disk->dev, part->num); - if (!dev_name) - return 0; - - memset (&linux_part, 0, sizeof (linux_part)); - linux_part.start = part->geom.start * disk->dev->sector_size; - /* see fs/partitions/msdos.c:msdos_partition(): "leave room for LILO" */ - if (part->type & PED_PARTITION_EXTENDED) - linux_part.length = part->geom.length == 1 ? 512 : 1024; - else - linux_part.length = part->geom.length * disk->dev->sector_size; - linux_part.pno = part->num; - strncpy (linux_part.devname, dev_name, BLKPG_DEVNAMELTH); - if (vol_name) - strncpy (linux_part.volname, vol_name, BLKPG_VOLNAMELTH); - - free (dev_name); - - if (!_blkpg_part_command (disk->dev, &linux_part, - BLKPG_ADD_PARTITION)) { - return ped_exception_throw ( - PED_EXCEPTION_ERROR, - PED_EXCEPTION_IGNORE_CANCEL, - _("Error informing the kernel about modifications to " - "partition %s -- %s. This means Linux won't know " - "about any changes you made to %s until you reboot " - "-- so you shouldn't mount it or use it in any way " - "before rebooting."), - linux_part.devname, - strerror (errno), - linux_part.devname) - == PED_EXCEPTION_IGNORE; - } - - return 1; -} - -static int -_blkpg_remove_partition (PedDisk* disk, int n) -{ - struct blkpg_partition linux_part; - - memset (&linux_part, 0, sizeof (linux_part)); - linux_part.pno = n; - return _blkpg_part_command (disk->dev, &linux_part, - BLKPG_DEL_PARTITION); -} - -/* - * The number of partitions that a device can have depends on the kernel. - * If we don't find this value in /sys/block/DEV/range, we will use our own - * value. - */ -static unsigned int -_device_get_partition_range(PedDevice* dev) -{ - int range, r; - char path[128]; - FILE* fp; - bool ok; - - r = snprintf(path, sizeof(path), "/sys/block/%s/range", - last_component(dev->path)); - if(r < 0 || r >= sizeof(path)) - return MAX_NUM_PARTS; - - fp = fopen(path, "r"); - if(!fp) - return MAX_NUM_PARTS; - - ok = fscanf(fp, "%d", &range) == 1; - fclose(fp); - - /* (range <= 0) is none sense.*/ - return ok && range > 0 ? range : MAX_NUM_PARTS; -} - -/* - * Sync the partition table in two step process: - * 1. Remove all of the partitions from the kernel's tables, but do not attempt - * removal of any partition for which the corresponding ioctl call fails. - * 2. Add all the partitions that we hold in disk. - * - * To achieve this two step process we must calculate the minimum number of - * maximum possible partitions between what linux supports and what the label - * type supports. EX: - * - * number=MIN(max_parts_supported_in_linux,max_parts_supported_in_msdos_tables) - */ -static int -_disk_sync_part_table (PedDisk* disk) -{ - PED_ASSERT(disk != NULL, return 0); - PED_ASSERT(disk->dev != NULL, return 0); - int lpn; - - /* lpn = largest partition number. */ - if(ped_disk_get_max_supported_partition_count(disk, &lpn)) - lpn = PED_MIN(lpn, _device_get_partition_range(disk->dev)); - else - lpn = _device_get_partition_range(disk->dev); - - /* Its not possible to support largest_partnum < 0. - * largest_partnum == 0 would mean does not support partitions. - * */ - if(lpn < 0) - return 0; - - int *rets = ped_malloc(sizeof(int) * lpn); - int *errnums = ped_malloc(sizeof(int) * lpn); - int ret = 1; - int i; - - for (i = 1; i <= lpn; i++) { - rets[i - 1] = _blkpg_remove_partition (disk, i); - errnums[i - 1] = errno; - } - - for (i = 1; i <= lpn; i++) { - const PedPartition *part = ped_disk_get_partition (disk, i); - if (part) { - /* busy... so we won't (can't!) disturb ;) Prolly - * doesn't matter anyway, because users shouldn't be - * changing mounted partitions anyway... - */ - if (!rets[i - 1] && errnums[i - 1] == EBUSY) - continue; - - /* add the (possibly modified or new) partition */ - if (!_blkpg_add_partition (disk, part)) - ret = 0; - } - } - - free (rets); - free (errnums); - return ret; -} - #ifdef ENABLE_DEVICE_MAPPER static int _dm_remove_map_name(char *name) @@ -2601,19 +2414,6 @@ _kernel_reread_part_table (PedDevice* de } static int -_have_blkpg () -{ - static int have_blkpg = -1; - int kver; - - if (have_blkpg != -1) - return have_blkpg; - - kver = _get_linux_version(); - return have_blkpg = kver >= KERNEL_VERSION (2,4,0) ? 1 : 0; -} - -static int linux_disk_commit (PedDisk* disk) { #ifdef ENABLE_DEVICE_MAPPER @@ -2621,19 +2421,6 @@ linux_disk_commit (PedDisk* disk) return _dm_reread_part_table (disk); #endif if (disk->dev->type != PED_DEVICE_FILE) { - /* The ioctl() command BLKPG_ADD_PARTITION does not notify - * the devfs system; consequently, /proc/partitions will not - * be up to date, and the proper links in /dev are not - * created. Therefore, if using DevFS, we must get the kernel - * to re-read and grok the partition table. - */ - /* Work around kernel dasd problem so we really do BLKRRPART */ - if (disk->dev->type != PED_DEVICE_DASD && - _have_blkpg () && !_have_devfs ()) { - if (_disk_sync_part_table (disk)) - return 1; - } - return _kernel_reread_part_table (disk->dev); }
_______________________________________________ Anaconda-devel-list mailing list Anaconda-devel-list@xxxxxxxxxx https://www.redhat.com/mailman/listinfo/anaconda-devel-list