Signed-off-by: J. corwin Coburn <corwin@xxxxxxxxxx> --- drivers/md/dm-vdo/constants.c | 15 + drivers/md/dm-vdo/constants.h | 102 +++++++ drivers/md/dm-vdo/release-versions.h | 20 ++ drivers/md/dm-vdo/status-codes.c | 126 +++++++++ drivers/md/dm-vdo/status-codes.h | 112 ++++++++ drivers/md/dm-vdo/types.h | 403 +++++++++++++++++++++++++++ drivers/md/dm-vdo/wait-queue.c | 223 +++++++++++++++ drivers/md/dm-vdo/wait-queue.h | 129 +++++++++ 8 files changed, 1130 insertions(+) create mode 100644 drivers/md/dm-vdo/constants.c create mode 100644 drivers/md/dm-vdo/constants.h create mode 100644 drivers/md/dm-vdo/release-versions.h create mode 100644 drivers/md/dm-vdo/status-codes.c create mode 100644 drivers/md/dm-vdo/status-codes.h create mode 100644 drivers/md/dm-vdo/types.h create mode 100644 drivers/md/dm-vdo/wait-queue.c create mode 100644 drivers/md/dm-vdo/wait-queue.h diff --git a/drivers/md/dm-vdo/constants.c b/drivers/md/dm-vdo/constants.c new file mode 100644 index 00000000000..8bdfb782b13 --- /dev/null +++ b/drivers/md/dm-vdo/constants.c @@ -0,0 +1,15 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Copyright Red Hat + */ + +#include "types.h" + +/* The maximum logical space is 4 petabytes, which is 1 terablock. */ +const block_count_t MAXIMUM_VDO_LOGICAL_BLOCKS = 1024ULL * 1024 * 1024 * 1024; + +/* The maximum physical space is 256 terabytes, which is 64 gigablocks. */ +const block_count_t MAXIMUM_VDO_PHYSICAL_BLOCKS = 1024ULL * 1024 * 1024 * 64; + +/* unit test minimum */ +const block_count_t MINIMUM_VDO_SLAB_JOURNAL_BLOCKS = 2; diff --git a/drivers/md/dm-vdo/constants.h b/drivers/md/dm-vdo/constants.h new file mode 100644 index 00000000000..7196e99efe9 --- /dev/null +++ b/drivers/md/dm-vdo/constants.h @@ -0,0 +1,102 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* + * Copyright Red Hat + */ + +#ifndef VDO_CONSTANTS_H +#define VDO_CONSTANTS_H + +#include <linux/blkdev.h> + +#include "types.h" + +enum { + /* + * The maximum number of contiguous PBNs which will go to a single bio submission queue, + * assuming there is more than one queue. + */ + VDO_BIO_ROTATION_INTERVAL_LIMIT = 1024, + + /** The number of entries on a block map page */ + VDO_BLOCK_MAP_ENTRIES_PER_PAGE = 812, + + /** The origin of the flat portion of the block map */ + VDO_BLOCK_MAP_FLAT_PAGE_ORIGIN = 1, + + /* + * The height of a block map tree. Assuming a root count of 60 and 812 entries per page, + * this is big enough to represent almost 95 PB of logical space. + */ + VDO_BLOCK_MAP_TREE_HEIGHT = 5, + + /** The default number of bio submission queues. */ + DEFAULT_VDO_BIO_SUBMIT_QUEUE_COUNT = 4, + + /** The number of contiguous PBNs to be submitted to a single bio queue. */ + DEFAULT_VDO_BIO_SUBMIT_QUEUE_ROTATE_INTERVAL = 64, + + /** The number of trees in the arboreal block map */ + DEFAULT_VDO_BLOCK_MAP_TREE_ROOT_COUNT = 60, + + /** The default size of the recovery journal, in blocks */ + DEFAULT_VDO_RECOVERY_JOURNAL_SIZE = 32 * 1024, + + /** The default size of each slab journal, in blocks */ + DEFAULT_VDO_SLAB_JOURNAL_SIZE = 224, + + /* + * The initial size of lbn_operations and pbn_operations, which is based upon the expected + * maximum number of outstanding VIOs. This value was chosen to make it highly unlikely + * that the maps would need to be resized. + */ + VDO_LOCK_MAP_CAPACITY = 10000, + + /** The maximum number of logical zones */ + MAX_VDO_LOGICAL_ZONES = 60, + + /** The maximum number of physical zones */ + MAX_VDO_PHYSICAL_ZONES = 16, + + /** The base-2 logarithm of the maximum blocks in one slab */ + MAX_VDO_SLAB_BITS = 23, + + /** The maximum number of slabs the slab depot supports */ + MAX_VDO_SLABS = 8192, + + /* + * The maximum number of block map pages to load simultaneously during recovery or rebuild. + */ + MAXIMUM_SIMULTANEOUS_VDO_BLOCK_MAP_RESTORATION_READS = 1024, + + /** The maximum number of entries in the slab summary */ + MAXIMUM_VDO_SLAB_SUMMARY_ENTRIES = MAX_VDO_SLABS * MAX_VDO_PHYSICAL_ZONES, + + /** The maximum number of total threads in a VDO thread configuration. */ + MAXIMUM_VDO_THREADS = 100, + + /** The maximum number of VIOs in the system at once */ + MAXIMUM_VDO_USER_VIOS = 2048, + + /** The only physical block size supported by VDO */ + VDO_BLOCK_SIZE = 4096, + + /** The number of sectors per block */ + VDO_SECTORS_PER_BLOCK = (VDO_BLOCK_SIZE >> SECTOR_SHIFT), + + /** The size of a sector that will not be torn */ + VDO_SECTOR_SIZE = 512, + + /** The physical block number reserved for storing the zero block */ + VDO_ZERO_BLOCK = 0, +}; + +/** The maximum logical space is 4 petabytes, which is 1 terablock. */ +extern const block_count_t MAXIMUM_VDO_LOGICAL_BLOCKS; + +/** The maximum physical space is 256 terabytes, which is 64 gigablocks. */ +extern const block_count_t MAXIMUM_VDO_PHYSICAL_BLOCKS; + +/** unit test minimum */ +extern const block_count_t MINIMUM_VDO_SLAB_JOURNAL_BLOCKS; + +#endif /* VDO_CONSTANTS_H */ diff --git a/drivers/md/dm-vdo/release-versions.h b/drivers/md/dm-vdo/release-versions.h new file mode 100644 index 00000000000..0abc13c04b8 --- /dev/null +++ b/drivers/md/dm-vdo/release-versions.h @@ -0,0 +1,20 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* + * Copyright Red Hat + */ + +#ifndef RELEASE_VERSIONS_H +#define RELEASE_VERSIONS_H + +enum { + VDO_OXYGEN_RELEASE_VERSION_NUMBER = 109583, + VDO_FLUORINE_RELEASE_VERSION_NUMBER = 115838, + VDO_NEON_RELEASE_VERSION_NUMBER = 120965, + VDO_SODIUM_RELEASE_VERSION_NUMBER = 127441, + VDO_MAGNESIUM_RELEASE_VERSION_NUMBER = 131337, + VDO_ALUMINUM_RELEASE_VERSION_NUMBER = 133524, + VDO_HEAD_RELEASE_VERSION_NUMBER = 0, + VDO_CURRENT_RELEASE_VERSION_NUMBER = VDO_HEAD_RELEASE_VERSION_NUMBER, +}; + +#endif /* not RELEASE_VERSIONS_H */ diff --git a/drivers/md/dm-vdo/status-codes.c b/drivers/md/dm-vdo/status-codes.c new file mode 100644 index 00000000000..4d50229ed51 --- /dev/null +++ b/drivers/md/dm-vdo/status-codes.c @@ -0,0 +1,126 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Copyright Red Hat + */ + +#include "status-codes.h" + +#include "errors.h" +#include "logger.h" +#include "permassert.h" +#include "uds-threads.h" + +const struct error_info vdo_status_list[] = { + { "VDO_NOT_IMPLEMENTED", "Not implemented" }, + { "VDO_OUT_OF_RANGE", "Out of range" }, + { "VDO_REF_COUNT_INVALID", "Reference count would become invalid" }, + { "VDO_NO_SPACE", "Out of space" }, + { "VDO_UNEXPECTED_EOF", "Unexpected EOF on block read" }, + { "VDO_BAD_CONFIGURATION", "Bad configuration option" }, + { "VDO_SOCKET_ERROR", "Socket error" }, + { "VDO_BAD_ALIGNMENT", "Mis-aligned block reference" }, + { "VDO_COMPONENT_BUSY", "Prior operation still in progress" }, + { "VDO_BAD_PAGE", "Corrupt or incorrect page" }, + { "VDO_UNSUPPORTED_VERSION", "Unsupported component version" }, + { "VDO_INCORRECT_COMPONENT", "Component id mismatch in decoder" }, + { "VDO_PARAMETER_MISMATCH", "Parameters have conflicting values" }, + { "VDO_BLOCK_SIZE_TOO_SMALL", "The block size is too small" }, + { "VDO_UNKNOWN_PARTITION", "No partition exists with a given id" }, + { "VDO_PARTITION_EXISTS", "A partition already exists with a given id" }, + { "VDO_NOT_READ_ONLY", "The device is not in read-only mode" }, + { "VDO_INCREMENT_TOO_SMALL", "Physical block growth of too few blocks" }, + { "VDO_CHECKSUM_MISMATCH", "Incorrect checksum" }, + { "VDO_RECOVERY_JOURNAL_FULL", "The recovery journal is full" }, + { "VDO_LOCK_ERROR", "A lock is held incorrectly" }, + { "VDO_READ_ONLY", "The device is in read-only mode" }, + { "VDO_SHUTTING_DOWN", "The device is shutting down" }, + { "VDO_CORRUPT_JOURNAL", "Recovery journal entries corrupted" }, + { "VDO_TOO_MANY_SLABS", "Exceeds maximum number of slabs supported" }, + { "VDO_INVALID_FRAGMENT", "Compressed block fragment is invalid" }, + { "VDO_RETRY_AFTER_REBUILD", "Retry operation after rebuilding finishes" }, + { "VDO_UNKNOWN_COMMAND", "The extended command is not known" }, + { "VDO_COMMAND_ERROR", "Bad extended command parameters" }, + { "VDO_CANNOT_DETERMINE_SIZE", "Cannot determine config sizes to fit" }, + { "VDO_BAD_MAPPING", "Invalid page mapping" }, + { "VDO_READ_CACHE_BUSY", "Read cache has no free slots" }, + { "VDO_BIO_CREATION_FAILED", "Bio creation failed" }, + { "VDO_BAD_MAGIC", "Bad magic number" }, + { "VDO_BAD_NONCE", "Bad nonce" }, + { "VDO_JOURNAL_OVERFLOW", "Journal sequence number overflow" }, + { "VDO_INVALID_ADMIN_STATE", "Invalid operation for current state" }, + { "VDO_CANT_ADD_SYSFS_NODE", "Failed to add sysfs node" }, +}; + +static atomic_t vdo_status_codes_registered = ATOMIC_INIT(0); +static int status_code_registration_result; + +static void do_status_code_registration(void) +{ + int result; + + STATIC_ASSERT((VDO_STATUS_CODE_LAST - VDO_STATUS_CODE_BASE) == + ARRAY_SIZE(vdo_status_list)); + + result = uds_register_error_block("VDO Status", + VDO_STATUS_CODE_BASE, + VDO_STATUS_CODE_BLOCK_END, + vdo_status_list, + sizeof(vdo_status_list)); + /* + * The following test handles cases where libvdo is statically linked against both the test + * modules and the test driver (because multiple instances of this module call their own + * copy of this function once each, resulting in multiple calls to register_error_block + * which is shared in libuds). + */ + if (result == UDS_DUPLICATE_NAME) + result = UDS_SUCCESS; + + status_code_registration_result = (result == UDS_SUCCESS) ? VDO_SUCCESS : result; +} + +/** + * vdo_register_status_codes() - Register the VDO status codes if needed. + * Return: A success or error code. + */ +int vdo_register_status_codes(void) +{ + uds_perform_once(&vdo_status_codes_registered, do_status_code_registration); + return status_code_registration_result; +} + +/** + * vdo_map_to_system_error() - Given an error code, return a value we can return to the OS. + * @error: The error code to convert. + * + * The input error code may be a system-generated value (such as -EIO), an errno macro used in our + * code (such as EIO), or a UDS or VDO status code; the result must be something the rest of the OS + * can consume (negative errno values such as -EIO, in the case of the kernel). + * + * Return: A system error code value. + */ +int vdo_map_to_system_error(int error) +{ + char error_name[UDS_MAX_ERROR_NAME_SIZE]; + char error_message[UDS_MAX_ERROR_MESSAGE_SIZE]; + + /* 0 is success, negative a system error code */ + if (likely(error <= 0)) + return error; + if (error < 1024) + return -error; + + /* VDO or UDS error */ + switch (error) { + case VDO_NO_SPACE: + return -ENOSPC; + case VDO_READ_ONLY: + return -EIO; + default: + uds_log_info("%s: mapping internal status code %d (%s: %s) to EIO", + __func__, + error, + uds_string_error_name(error, error_name, sizeof(error_name)), + uds_string_error(error, error_message, sizeof(error_message))); + return -EIO; + } +} diff --git a/drivers/md/dm-vdo/status-codes.h b/drivers/md/dm-vdo/status-codes.h new file mode 100644 index 00000000000..34ad2445419 --- /dev/null +++ b/drivers/md/dm-vdo/status-codes.h @@ -0,0 +1,112 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* + * Copyright Red Hat + */ + +#ifndef VDO_STATUS_CODES_H +#define VDO_STATUS_CODES_H + +#include "errors.h" + +enum { + UDS_BLOCK_SIZE = UDS_ERROR_CODE_BLOCK_END - UDS_ERROR_CODE_BASE, + VDO_BLOCK_START = UDS_ERROR_CODE_BLOCK_END, + VDO_BLOCK_END = VDO_BLOCK_START + UDS_BLOCK_SIZE, + PRP_BLOCK_START = VDO_BLOCK_END, + PRP_BLOCK_END = PRP_BLOCK_START + UDS_BLOCK_SIZE, +}; + +/* VDO-specific status codes. */ +enum vdo_status_codes { + /* successful result */ + VDO_SUCCESS, + /* base of all VDO errors */ + VDO_STATUS_CODE_BASE = VDO_BLOCK_START, + /* we haven't written this yet */ + VDO_NOT_IMPLEMENTED = VDO_STATUS_CODE_BASE, + /* input out of range */ + VDO_OUT_OF_RANGE, + /* an invalid reference count would result */ + VDO_REF_COUNT_INVALID, + /* a free block could not be allocated */ + VDO_NO_SPACE, + /* unexpected EOF on block read */ + VDO_UNEXPECTED_EOF, + /* improper or missing configuration option */ + VDO_BAD_CONFIGURATION, + /* socket opening or binding problem */ + VDO_SOCKET_ERROR, + /* read or write on non-aligned offset */ + VDO_BAD_ALIGNMENT, + /* prior operation still in progress */ + VDO_COMPONENT_BUSY, + /* page contents incorrect or corrupt data */ + VDO_BAD_PAGE, + /* unsupported version of some component */ + VDO_UNSUPPORTED_VERSION, + /* component id mismatch in decoder */ + VDO_INCORRECT_COMPONENT, + /* parameters have conflicting values */ + VDO_PARAMETER_MISMATCH, + /* the block size is too small */ + VDO_BLOCK_SIZE_TOO_SMALL, + /* no partition exists with a given id */ + VDO_UNKNOWN_PARTITION, + /* a partition already exists with a given id */ + VDO_PARTITION_EXISTS, + /* the VDO is not in read-only mode */ + VDO_NOT_READ_ONLY, + /* physical block growth of too few blocks */ + VDO_INCREMENT_TOO_SMALL, + /* incorrect checksum */ + VDO_CHECKSUM_MISMATCH, + /* the recovery journal is full */ + VDO_RECOVERY_JOURNAL_FULL, + /* a lock is held incorrectly */ + VDO_LOCK_ERROR, + /* the VDO is in read-only mode */ + VDO_READ_ONLY, + /* the VDO is shutting down */ + VDO_SHUTTING_DOWN, + /* the recovery journal has corrupt entries */ + VDO_CORRUPT_JOURNAL, + /* exceeds maximum number of slabs supported */ + VDO_TOO_MANY_SLABS, + /* a compressed block fragment is invalid */ + VDO_INVALID_FRAGMENT, + /* action is unsupported while rebuilding */ + VDO_RETRY_AFTER_REBUILD, + /* the extended command is not known */ + VDO_UNKNOWN_COMMAND, + /* bad extended command parameters */ + VDO_COMMAND_ERROR, + /* cannot determine sizes to fit */ + VDO_CANNOT_DETERMINE_SIZE, + /* a block map entry is invalid */ + VDO_BAD_MAPPING, + /* read cache has no free slots */ + VDO_READ_CACHE_BUSY, + /* bio_add_page failed */ + VDO_BIO_CREATION_FAILED, + /* bad magic number */ + VDO_BAD_MAGIC, + /* bad nonce */ + VDO_BAD_NONCE, + /* sequence number overflow */ + VDO_JOURNAL_OVERFLOW, + /* the VDO is not in a state to perform an admin operation */ + VDO_INVALID_ADMIN_STATE, + /* failure adding a sysfs node */ + VDO_CANT_ADD_SYSFS_NODE, + /* one more than last error code */ + VDO_STATUS_CODE_LAST, + VDO_STATUS_CODE_BLOCK_END = VDO_BLOCK_END +}; + +extern const struct error_info vdo_status_list[]; + +int vdo_register_status_codes(void); + +int vdo_map_to_system_error(int error); + +#endif /* VDO_STATUS_CODES_H */ diff --git a/drivers/md/dm-vdo/types.h b/drivers/md/dm-vdo/types.h new file mode 100644 index 00000000000..a8ab9a83587 --- /dev/null +++ b/drivers/md/dm-vdo/types.h @@ -0,0 +1,403 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* + * Copyright Red Hat + */ + +#ifndef VDO_TYPES_H +#define VDO_TYPES_H + +#include <linux/bio.h> +#include <linux/blkdev.h> +#include <linux/device-mapper.h> +#include <linux/list.h> +#include <linux/compiler_attributes.h> +#include <linux/types.h> + +#include "funnel-queue.h" + +/* A size type in blocks. */ +typedef u64 block_count_t; + +/* The size of a block. */ +typedef u16 block_size_t; + +/* A counter for data_vios */ +typedef u16 data_vio_count_t; + +/* A height within a tree. */ +typedef u8 height_t; + +/* The logical block number as used by the consumer. */ +typedef u64 logical_block_number_t; + +/* The type of the nonce used to identify instances of VDO. */ +typedef u64 nonce_t; + +/* A size in pages. */ +typedef u32 page_count_t; + +/* A page number. */ +typedef u32 page_number_t; + +/* + * The physical (well, less logical) block number at which the block is found on the underlying + * device. + */ +typedef u64 physical_block_number_t; + +/* + * A release version number. These numbers are used to make the numbering space for component + * versions independent across release branches. + * + * Really an enum, but we have to specify the size for encoding; see release_versions.h for the + * enumeration values. + */ +typedef u32 release_version_number_t; + +/* A count of tree roots. */ +typedef u8 root_count_t; + +/* A number of sectors. */ +typedef u8 sector_count_t; + +/* A sequence number. */ +typedef u64 sequence_number_t; + +/* The offset of a block within a slab. */ +typedef u32 slab_block_number; + +/* A size type in slabs. */ +typedef u16 slab_count_t; + +/* A slot in a bin or block map page. */ +typedef u16 slot_number_t; + +/* typedef thread_count_t - A thread counter. */ +typedef u8 thread_count_t; + +/* typedef thread_id_t - A thread ID, vdo threads are numbered sequentially from 0. */ +typedef u8 thread_id_t; + +/* A zone counter */ +typedef u8 zone_count_t; + +/* The following enums are persisted on storage, so the values must be preserved. */ + +/* The current operating mode of the VDO. */ +enum vdo_state { + VDO_DIRTY = 0, + VDO_NEW = 1, + VDO_CLEAN = 2, + VDO_READ_ONLY_MODE = 3, + VDO_FORCE_REBUILD = 4, + VDO_RECOVERING = 5, + VDO_REPLAYING = 6, /* VDO_REPLAYING is never set anymore, but retained for upgrade */ + VDO_REBUILD_FOR_UPGRADE = 7, + + /* Keep VDO_STATE_COUNT at the bottom. */ + VDO_STATE_COUNT +}; + +/** + * vdo_state_requires_read_only_rebuild() - Check whether a vdo_state indicates + * that a read-only rebuild is required. + * @state: The vdo_state to check. + * + * Return: true if the state indicates a rebuild is required + */ +static inline bool __must_check vdo_state_requires_read_only_rebuild(enum vdo_state state) +{ + return ((state == VDO_FORCE_REBUILD) || (state == VDO_REBUILD_FOR_UPGRADE)); +} + +/** + * vdo_state_requires_recovery() - Check whether a vdo state indicates that recovery is needed. + * @state: The state to check. + * + * Return: true if the state indicates a recovery is required + */ +static inline bool __must_check vdo_state_requires_recovery(enum vdo_state state) +{ + return ((state == VDO_DIRTY) || (state == VDO_REPLAYING) || (state == VDO_RECOVERING)); +} + +/* + * The current operation on a physical block (from the point of view of the recovery journal, slab + * journals, and reference counts. + */ +enum journal_operation { + VDO_JOURNAL_DATA_REMAPPING = 0, + VDO_JOURNAL_BLOCK_MAP_REMAPPING = 1, +} __packed; + +/* Partition IDs encoded in the volume layout in the super block. */ +enum partition_id { + VDO_BLOCK_MAP_PARTITION = 0, + VDO_SLAB_DEPOT_PARTITION = 1, + VDO_RECOVERY_JOURNAL_PARTITION = 2, + VDO_SLAB_SUMMARY_PARTITION = 3, +} __packed; + +/* Metadata types for the vdo. */ +enum vdo_metadata_type { + VDO_METADATA_RECOVERY_JOURNAL = 1, + VDO_METADATA_SLAB_JOURNAL = 2, + VDO_METADATA_RECOVERY_JOURNAL_2 = 3, +} __packed; + +/* A position in the block map where a block map entry is stored. */ +struct block_map_slot { + physical_block_number_t pbn; + slot_number_t slot; +}; + +/* + * Four bits of each five-byte block map entry contain a mapping state value used to distinguish + * unmapped or trimmed logical blocks (which are treated as mapped to the zero block) from entries + * that have been mapped to a physical block, including the zero block. + * + * FIXME: these should maybe be defines. + */ +enum block_mapping_state { + VDO_MAPPING_STATE_UNMAPPED = 0, /* Must be zero to be the default value */ + VDO_MAPPING_STATE_UNCOMPRESSED = 1, /* A normal (uncompressed) block */ + VDO_MAPPING_STATE_COMPRESSED_BASE = 2, /* Compressed in slot 0 */ + VDO_MAPPING_STATE_COMPRESSED_MAX = 15, /* Compressed in slot 13 */ +}; + +enum { + VDO_MAX_COMPRESSION_SLOTS = + (VDO_MAPPING_STATE_COMPRESSED_MAX - VDO_MAPPING_STATE_COMPRESSED_BASE + 1), +}; + + +struct data_location { + physical_block_number_t pbn; + enum block_mapping_state state; +}; + +/* The configuration of a single slab derived from the configured block size and slab size. */ +struct slab_config { + /* total number of blocks in the slab */ + block_count_t slab_blocks; + /* number of blocks available for data */ + block_count_t data_blocks; + /* number of blocks for reference counts */ + block_count_t reference_count_blocks; + /* number of blocks for the slab journal */ + block_count_t slab_journal_blocks; + /* + * Number of blocks after which the slab journal starts pushing out a reference_block for + * each new entry it receives. + */ + block_count_t slab_journal_flushing_threshold; + /* + * Number of blocks after which the slab journal pushes out all reference_blocks and makes + * all vios wait. + */ + block_count_t slab_journal_blocking_threshold; + /* Number of blocks after which the slab must be scrubbed before coming online. */ + block_count_t slab_journal_scrubbing_threshold; +} __packed; + +/* + * This structure is memcmp'd for equality. Keep it packed and don't add any fields that are not + * properly set in both extant and parsed configs. + */ +struct thread_count_config { + unsigned int bio_ack_threads; + unsigned int bio_threads; + unsigned int bio_rotation_interval; + unsigned int cpu_threads; + unsigned int logical_zones; + unsigned int physical_zones; + unsigned int hash_zones; +} __packed; + +struct device_config { + struct dm_target *owning_target; + struct dm_dev *owned_device; + struct vdo *vdo; + /* All configs referencing a layer are kept on a list in the layer */ + struct list_head config_list; + char *original_string; + unsigned int version; + char *parent_device_name; + block_count_t physical_blocks; + /* + * This is the number of logical blocks from VDO's internal point of view. It is the number + * of 4K blocks regardless of the value of the logical_block_size parameter below. + */ + block_count_t logical_blocks; + unsigned int logical_block_size; + unsigned int cache_size; + unsigned int block_map_maximum_age; + bool deduplication; + bool compression; + struct thread_count_config thread_counts; + block_count_t max_discard_blocks; +}; + +enum vdo_completion_type { + /* Keep VDO_UNSET_COMPLETION_TYPE at the top. */ + VDO_UNSET_COMPLETION_TYPE, + VDO_ACTION_COMPLETION, + VDO_ADMIN_COMPLETION, + VDO_BLOCK_ALLOCATOR_COMPLETION, + VDO_DATA_VIO_POOL_COMPLETION, + VDO_DECREMENT_COMPLETION, + VDO_FLUSH_COMPLETION, + VDO_FLUSH_NOTIFICATION_COMPLETION, + VDO_GENERATION_FLUSHED_COMPLETION, + VDO_HASH_ZONE_COMPLETION, + VDO_HASH_ZONES_COMPLETION, + VDO_LOCK_COUNTER_COMPLETION, + VDO_PAGE_COMPLETION, + VDO_READ_ONLY_MODE_COMPLETION, + VDO_REPAIR_COMPLETION, + VDO_SYNC_COMPLETION, + VIO_COMPLETION, +} __packed; + +struct vdo_completion; + +/** + * typedef vdo_action - An asynchronous VDO operation. + * @completion: The completion of the operation. + */ +typedef void vdo_action(struct vdo_completion *completion); + +enum vdo_completion_priority { + BIO_ACK_Q_ACK_PRIORITY = 0, + BIO_ACK_Q_MAX_PRIORITY = 0, + BIO_Q_COMPRESSED_DATA_PRIORITY = 0, + BIO_Q_DATA_PRIORITY = 0, + BIO_Q_FLUSH_PRIORITY = 2, + BIO_Q_HIGH_PRIORITY = 2, + BIO_Q_METADATA_PRIORITY = 1, + BIO_Q_VERIFY_PRIORITY = 1, + BIO_Q_MAX_PRIORITY = 2, + CPU_Q_COMPLETE_VIO_PRIORITY = 0, + CPU_Q_COMPLETE_READ_PRIORITY = 0, + CPU_Q_COMPRESS_BLOCK_PRIORITY = 0, + CPU_Q_EVENT_REPORTER_PRIORITY = 0, + CPU_Q_HASH_BLOCK_PRIORITY = 0, + CPU_Q_MAX_PRIORITY = 0, + UDS_Q_PRIORITY = 0, + UDS_Q_MAX_PRIORITY = 0, + VDO_DEFAULT_Q_COMPLETION_PRIORITY = 1, + VDO_DEFAULT_Q_FLUSH_PRIORITY = 2, + VDO_DEFAULT_Q_MAP_BIO_PRIORITY = 0, + VDO_DEFAULT_Q_SYNC_PRIORITY = 2, + VDO_DEFAULT_Q_VIO_CALLBACK_PRIORITY = 1, + VDO_DEFAULT_Q_MAX_PRIORITY = 2, + /* The maximum allowable priority */ + VDO_WORK_Q_MAX_PRIORITY = 2, + /* A value which must be out of range for a valid priority */ + VDO_WORK_Q_DEFAULT_PRIORITY = VDO_WORK_Q_MAX_PRIORITY + 1, +}; + +struct vdo_completion { + /* The type of completion this is */ + enum vdo_completion_type type; + + /* + * <code>true</code> once the processing of the operation is complete. This flag should not + * be used by waiters external to the VDO base as it is used to gate calling the callback. + */ + bool complete; + + /* + * If true, queue this completion on the next callback invocation, even if it is already + * running on the correct thread. + */ + bool requeue; + + /* The ID of the thread which should run the next callback */ + thread_id_t callback_thread_id; + + /* The result of the operation */ + int result; + + /* The VDO on which this completion operates */ + struct vdo *vdo; + + /* The callback which will be called once the operation is complete */ + vdo_action *callback; + + /* Callback which, if set, will be called if an error result is set */ + vdo_action *error_handler; + + /* The parent object, if any, that spawned this completion */ + void *parent; + + /* Entry link for lock-free work queue */ + struct funnel_queue_entry work_queue_entry_link; + enum vdo_completion_priority priority; + struct vdo_work_queue *my_queue; + u64 enqueue_time; +}; + +struct block_allocator; +struct data_vio; +struct vdo; +struct vdo_config; + +/* vio types for statistics and instrumentation. */ +enum vio_type { + VIO_TYPE_UNINITIALIZED = 0, + VIO_TYPE_DATA, + VIO_TYPE_BLOCK_ALLOCATOR, + VIO_TYPE_BLOCK_MAP, + VIO_TYPE_BLOCK_MAP_INTERIOR, + VIO_TYPE_GEOMETRY, + VIO_TYPE_PARTITION_COPY, + VIO_TYPE_RECOVERY_JOURNAL, + VIO_TYPE_SLAB_JOURNAL, + VIO_TYPE_SLAB_SUMMARY, + VIO_TYPE_SUPER_BLOCK, +} __packed; + +/* Priority levels for asynchronous I/O operations performed on a vio. */ +enum vio_priority { + VIO_PRIORITY_LOW = 0, + VIO_PRIORITY_DATA = VIO_PRIORITY_LOW, + VIO_PRIORITY_COMPRESSED_DATA = VIO_PRIORITY_DATA, + VIO_PRIORITY_METADATA, + VIO_PRIORITY_HIGH, +} __packed; + +/* + * A wrapper for a bio. All I/O to the storage below a vdo is conducted via vios. + */ +struct vio { + /* The completion for this vio */ + struct vdo_completion completion; + + /* The bio zone in which I/O should be processed */ + zone_count_t bio_zone; + + /* The queueing priority of the vio operation */ + enum vio_priority priority; + + /* The vio type is used for statistics and instrumentation. */ + enum vio_type type; + + /* The size of this vio in blocks */ + unsigned int block_count; + + /* The data being read or written. */ + char *data; + + /* The VDO-owned bio to use for all IO for this vio */ + struct bio *bio; + + /* + * A list of enqueued bios with consecutive block numbers, stored by vdo_submit_bio() under + * the first-enqueued vio. The other vios are found via their bio entries in this list, and + * are not added to the work queue as separate completions. + */ + struct bio_list bios_merged; +}; + +#endif /* VDO_TYPES_H */ diff --git a/drivers/md/dm-vdo/wait-queue.c b/drivers/md/dm-vdo/wait-queue.c new file mode 100644 index 00000000000..4048c11c3e5 --- /dev/null +++ b/drivers/md/dm-vdo/wait-queue.c @@ -0,0 +1,223 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Copyright Red Hat + */ + +#include "wait-queue.h" + +#include <linux/device-mapper.h> + +#include "permassert.h" + +#include "status-codes.h" + +/** + * vdo_enqueue_waiter() - Add a waiter to the tail end of a wait queue. + * @queue: The queue to which to add the waiter. + * @waiter: The waiter to add to the queue. + * + * The waiter must not already be waiting in a queue. + * + * Return: VDO_SUCCESS or an error code. + */ +void vdo_enqueue_waiter(struct wait_queue *queue, struct waiter *waiter) +{ + BUG_ON(waiter->next_waiter != NULL); + + if (queue->last_waiter == NULL) { + /* + * The queue is empty, so form the initial circular list by self-linking the + * initial waiter. + */ + waiter->next_waiter = waiter; + } else { + /* Splice the new waiter in at the end of the queue. */ + waiter->next_waiter = queue->last_waiter->next_waiter; + queue->last_waiter->next_waiter = waiter; + } + + /* In both cases, the waiter we added to the ring becomes the last waiter. */ + queue->last_waiter = waiter; + queue->queue_length += 1; +} + +/** + * vdo_transfer_all_waiters() - Transfer all waiters from one wait queue to a second queue, + * emptying the first queue. + * @from_queue: The queue containing the waiters to move. + * @to_queue: The queue that will receive the waiters from the first queue. + */ +void vdo_transfer_all_waiters(struct wait_queue *from_queue, struct wait_queue *to_queue) +{ + /* If the source queue is empty, there's nothing to do. */ + if (!vdo_has_waiters(from_queue)) + return; + + if (vdo_has_waiters(to_queue)) { + /* + * Both queues are non-empty. Splice the two circular lists together by swapping + * the next (head) pointers in the list tails. + */ + struct waiter *from_head = from_queue->last_waiter->next_waiter; + struct waiter *to_head = to_queue->last_waiter->next_waiter; + + to_queue->last_waiter->next_waiter = from_head; + from_queue->last_waiter->next_waiter = to_head; + } + + to_queue->last_waiter = from_queue->last_waiter; + to_queue->queue_length += from_queue->queue_length; + vdo_initialize_wait_queue(from_queue); +} + +/** + * vdo_notify_all_waiters() - Notify all the entries waiting in a queue. + * @queue: The wait queue containing the waiters to notify. + * @callback: The function to call to notify each waiter, or NULL to invoke the callback field + * registered in each waiter. + * @context: The context to pass to the callback function. + * + * Notifies all the entries waiting in a queue to continue execution by invoking a callback + * function on each of them in turn. The queue is copied and emptied before invoking any callbacks, + * and only the waiters that were in the queue at the start of the call will be notified. + */ +void vdo_notify_all_waiters(struct wait_queue *queue, waiter_callback *callback, void *context) +{ + /* + * Copy and empty the queue first, avoiding the possibility of an infinite loop if entries + * are returned to the queue by the callback function. + */ + struct wait_queue waiters; + + vdo_initialize_wait_queue(&waiters); + vdo_transfer_all_waiters(queue, &waiters); + + /* Drain the copied queue, invoking the callback on every entry. */ + while (vdo_notify_next_waiter(&waiters, callback, context)) + /* All the work is done by the loop condition. */ + ; +} + +/** + * vdo_get_first_waiter() - Return the waiter that is at the head end of a wait queue. + * @queue: The queue from which to get the first waiter. + * + * Return: The first (oldest) waiter in the queue, or NULL if the queue is empty. + */ +struct waiter *vdo_get_first_waiter(const struct wait_queue *queue) +{ + struct waiter *last_waiter = queue->last_waiter; + + if (last_waiter == NULL) + /* There are no waiters, so we're done. */ + return NULL; + + /* The queue is circular, so the last entry links to the head of the queue. */ + return last_waiter->next_waiter; +} + +/** + * vdo_dequeue_matching_waiters() - Remove all waiters that match based on the specified matching + * method and append them to a wait_queue. + * @queue: The wait queue to process. + * @match_method: The method to determine matching. + * @match_context: Contextual info for the match method. + * @matched_queue: A wait_queue to store matches. + */ +void vdo_dequeue_matching_waiters(struct wait_queue *queue, + waiter_match *match_method, + void *match_context, + struct wait_queue *matched_queue) +{ + struct wait_queue matched_waiters, iteration_queue; + + vdo_initialize_wait_queue(&matched_waiters); + + vdo_initialize_wait_queue(&iteration_queue); + vdo_transfer_all_waiters(queue, &iteration_queue); + while (vdo_has_waiters(&iteration_queue)) { + struct waiter *waiter = vdo_dequeue_next_waiter(&iteration_queue); + + vdo_enqueue_waiter((match_method(waiter, match_context) ? + &matched_waiters : + queue), + waiter); + } + + vdo_transfer_all_waiters(&matched_waiters, matched_queue); +} + +/** + * vdo_dequeue_next_waiter() - Remove the first waiter from the head end of a wait queue. + * @queue: The wait queue from which to remove the first entry. + * + * The caller will be responsible for waking the waiter by invoking the correct callback function + * to resume its execution. + * + * Return: The first (oldest) waiter in the queue, or NULL if the queue is empty. + */ +struct waiter *vdo_dequeue_next_waiter(struct wait_queue *queue) +{ + struct waiter *first_waiter = vdo_get_first_waiter(queue); + struct waiter *last_waiter = queue->last_waiter; + + if (first_waiter == NULL) + return NULL; + + if (first_waiter == last_waiter) + /* The queue has a single entry, so just empty it out by nulling the tail. */ + queue->last_waiter = NULL; + else + /* + * The queue has more than one entry, so splice the first waiter out of the + * circular queue. + */ + last_waiter->next_waiter = first_waiter->next_waiter; + + /* The waiter is no longer in a wait queue. */ + first_waiter->next_waiter = NULL; + queue->queue_length -= 1; + return first_waiter; +} + +/** + * vdo_notify_next_waiter() - Notify the next entry waiting in a queue. + * @queue: The wait queue containing the waiter to notify. + * @callback: The function to call to notify the waiter, or NULL to invoke the callback field + * registered in the waiter. + * @context: The context to pass to the callback function. + * + * Notifies the next entry waiting in a queue to continue execution by invoking a callback function + * on it after removing it from the queue. + * + * Return: true if there was a waiter in the queue. + */ +bool vdo_notify_next_waiter(struct wait_queue *queue, waiter_callback *callback, void *context) +{ + struct waiter *waiter = vdo_dequeue_next_waiter(queue); + + if (waiter == NULL) + return false; + + if (callback == NULL) + callback = waiter->callback; + (*callback)(waiter, context); + return true; +} + +/** + * vdo_get_next_waiter() - Get the waiter after this one, for debug iteration. + * @queue: The wait queue. + * @waiter: A waiter. + * + * Return: The next waiter, or NULL. + */ +const struct waiter * +vdo_get_next_waiter(const struct wait_queue *queue, const struct waiter *waiter) +{ + struct waiter *first_waiter = vdo_get_first_waiter(queue); + + if (waiter == NULL) + return first_waiter; + return ((waiter->next_waiter != first_waiter) ? waiter->next_waiter : NULL); +} diff --git a/drivers/md/dm-vdo/wait-queue.h b/drivers/md/dm-vdo/wait-queue.h new file mode 100644 index 00000000000..f73b93bee1f --- /dev/null +++ b/drivers/md/dm-vdo/wait-queue.h @@ -0,0 +1,129 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* + * Copyright Red Hat + */ + +#ifndef VDO_WAIT_QUEUE_H +#define VDO_WAIT_QUEUE_H + +#include <linux/compiler.h> +#include <linux/types.h> + +/** + * DOC: Wait queues. + * + * A wait queue is a circular list of entries waiting to be notified of a change in a condition. + * Keeping a circular list allows the queue structure to simply be a pointer to the tail (newest) + * entry in the queue, supporting constant-time enqueue and dequeue operations. A null pointer is + * an empty queue. + * + * An empty queue: + * queue0.last_waiter -> NULL + * + * A singleton queue: + * queue1.last_waiter -> entry1 -> entry1 -> [...] + * + * A three-element queue: + * queue2.last_waiter -> entry3 -> entry1 -> entry2 -> entry3 -> [...] + */ + +struct waiter; + +struct wait_queue { + /* The tail of the queue, the last (most recently added) entry */ + struct waiter *last_waiter; + /* The number of waiters currently in the queue */ + size_t queue_length; +}; + +/** + * typedef waiter_callback - Callback type for functions which will be called to resume processing + * of a waiter after it has been removed from its wait queue. + */ +typedef void waiter_callback(struct waiter *waiter, void *context); + +/** + * typedef waiter_match - Method type for waiter matching methods. + * + * A waiter_match method returns false if the waiter does not match. + */ +typedef bool waiter_match(struct waiter *waiter, void *context); + +/* The queue entry structure for entries in a wait_queue. */ +struct waiter { + /* + * The next waiter in the queue. If this entry is the last waiter, then this is actually a + * pointer back to the head of the queue. + */ + struct waiter *next_waiter; + + /* Optional waiter-specific callback to invoke when waking this waiter. */ + waiter_callback *callback; +}; + +/** + * is_waiting() - Check whether a waiter is waiting. + * @waiter: The waiter to check. + * + * Return: true if the waiter is on some wait_queue. + */ +static inline bool vdo_is_waiting(struct waiter *waiter) +{ + return (waiter->next_waiter != NULL); +} + +/** + * initialize_wait_queue() - Initialize a wait queue. + * @queue: The queue to initialize. + */ +static inline void vdo_initialize_wait_queue(struct wait_queue *queue) +{ + *queue = (struct wait_queue) { + .last_waiter = NULL, + .queue_length = 0, + }; +} + +/** + * has_waiters() - Check whether a wait queue has any entries waiting in it. + * @queue: The queue to query. + * + * Return: true if there are any waiters in the queue. + */ +static inline bool __must_check vdo_has_waiters(const struct wait_queue *queue) +{ + return (queue->last_waiter != NULL); +} + +void vdo_enqueue_waiter(struct wait_queue *queue, struct waiter *waiter); + +void vdo_notify_all_waiters(struct wait_queue *queue, waiter_callback *callback, void *context); + +bool vdo_notify_next_waiter(struct wait_queue *queue, waiter_callback *callback, void *context); + +void vdo_transfer_all_waiters(struct wait_queue *from_queue, struct wait_queue *to_queue); + +struct waiter *vdo_get_first_waiter(const struct wait_queue *queue); + +void vdo_dequeue_matching_waiters(struct wait_queue *queue, + waiter_match *match_method, + void *match_context, + struct wait_queue *matched_queue); + +struct waiter *vdo_dequeue_next_waiter(struct wait_queue *queue); + +/** + * count_waiters() - Count the number of waiters in a wait queue. + * @queue: The wait queue to query. + * + * Return: The number of waiters in the queue. + */ +static inline size_t __must_check vdo_count_waiters(const struct wait_queue *queue) +{ + return queue->queue_length; +} + +const struct waiter * __must_check +vdo_get_next_waiter(const struct wait_queue *queue, const struct waiter *waiter); + +#endif /* VDO_WAIT_QUEUE_H */ -- 2.40.1 -- dm-devel mailing list dm-devel@xxxxxxxxxx https://listman.redhat.com/mailman/listinfo/dm-devel