This is similar to existing F_{SET/GET}_RW_HINT but more generic/extensible. F_SET/GET_RW_HINT_EX take a pointer to a struct rw_hint_ex as argument: struct rw_hint_ex { __u8 type; __u8 pad[7]; __u64 val; }; With F_SET_RW_HINT_EX, the user passes the hint type and its value. Hint type can be either lifetime hint (TYPE_RW_LIFETIME_HINT) or placement hint (TYPE_RW_PLACEMENT_HINT). The interface allows to add more hint add more hint types in future. Valid values for life hints are same as values supported by existing fcntl(F_SET_RW_HINT). Valid values for placement hints are between 0 to 126, both inclusive. The inode retains either the lifetime hint or the placement hint, whichever is set later. The set hint type and its value can be queried by F_GET_RW_HINT_EX. The i_write_hint field of the inode is a 1-byte field. Use the most significant bit as the hint type. This bit is set for placement hint. For lifetime hint, this bit remains zero. Signed-off-by: Kanchan Joshi <joshi.k@xxxxxxxxxxx> Signed-off-by: Nitesh Shetty <nj.shetty@xxxxxxxxxxx> --- fs/fcntl.c | 67 ++++++++++++++++++++++++++++++++++++++ include/linux/rw_hint.h | 13 ++++++++ include/uapi/linux/fcntl.h | 14 ++++++++ 3 files changed, 94 insertions(+) diff --git a/fs/fcntl.c b/fs/fcntl.c index 9df35e7ff754..b35aec56981a 100644 --- a/fs/fcntl.c +++ b/fs/fcntl.c @@ -291,6 +291,14 @@ static bool rw_lifetime_hint_valid(u64 hint) } } +static inline bool rw_placement_hint_valid(u64 val) +{ + if (val <= MAX_PLACEMENT_HINT_VAL) + return true; + + return false; +} + static long fcntl_get_rw_lifetime_hint(struct file *file, unsigned int cmd, unsigned long arg) { @@ -327,6 +335,59 @@ static long fcntl_set_rw_lifetime_hint(struct file *file, unsigned int cmd, return 0; } +static long fcntl_get_rw_hint_ex(struct file *file, unsigned int cmd, + unsigned long arg) +{ + struct rw_hint_ex __user *rw_hint_ex_p = (void __user *)arg; + struct rw_hint_ex rwh = {}; + struct inode *inode = file_inode(file); + u8 hint = READ_ONCE(inode->i_write_hint); + + rwh.type = WRITE_HINT_TYPE(hint); + rwh.val = WRITE_HINT_VAL(hint); + + if (copy_to_user(rw_hint_ex_p, &rwh, sizeof(rwh))) + return -EFAULT; + + return 0; +} + +static long fcntl_set_rw_hint_ex(struct file *file, unsigned int cmd, + unsigned long arg) +{ + struct rw_hint_ex __user *rw_hint_ex_p = (void __user *)arg; + struct rw_hint_ex rwh; + struct inode *inode = file_inode(file); + u64 hint; + int i; + + if (copy_from_user(&rwh, rw_hint_ex_p, sizeof(rwh))) + return -EFAULT; + for (i = 0; i < ARRAY_SIZE(rwh.pad); i++) + if (rwh.pad[i]) + return -EINVAL; + switch (rwh.type) { + case TYPE_RW_LIFETIME_HINT: + if (!rw_lifetime_hint_valid(rwh.val)) + return -EINVAL; + hint = rwh.val; + break; + case TYPE_RW_PLACEMENT_HINT: + if (!rw_placement_hint_valid(rwh.val)) + return -EINVAL; + hint = PLACEMENT_HINT_TYPE | rwh.val; + break; + default: + return -EINVAL; + } + + WRITE_ONCE(inode->i_write_hint, hint); + if (file->f_mapping->host != inode) + WRITE_ONCE(file->f_mapping->host->i_write_hint, hint); + + return 0; +} + /* Is the file descriptor a dup of the file? */ static long f_dupfd_query(int fd, struct file *filp) { @@ -454,6 +515,12 @@ static long do_fcntl(int fd, unsigned int cmd, unsigned long arg, case F_SET_RW_HINT: err = fcntl_set_rw_lifetime_hint(filp, cmd, arg); break; + case F_GET_RW_HINT_EX: + err = fcntl_get_rw_hint_ex(filp, cmd, arg); + break; + case F_SET_RW_HINT_EX: + err = fcntl_set_rw_hint_ex(filp, cmd, arg); + break; default: break; } diff --git a/include/linux/rw_hint.h b/include/linux/rw_hint.h index b9942f5f13d3..ff708a75e2f6 100644 --- a/include/linux/rw_hint.h +++ b/include/linux/rw_hint.h @@ -21,4 +21,17 @@ enum rw_lifetime_hint { static_assert(sizeof(enum rw_lifetime_hint) == 1); #endif +#define WRITE_HINT_TYPE_BIT BIT(7) +#define WRITE_HINT_VAL_MASK (WRITE_HINT_TYPE_BIT - 1) +#define WRITE_HINT_TYPE(h) (((h) & WRITE_HINT_TYPE_BIT) ? \ + TYPE_RW_PLACEMENT_HINT : TYPE_RW_LIFETIME_HINT) +#define WRITE_HINT_VAL(h) ((h) & WRITE_HINT_VAL_MASK) + +#define WRITE_PLACEMENT_HINT(h) (((h) & WRITE_HINT_TYPE_BIT) ? \ + WRITE_HINT_VAL(h) : 0) +#define WRITE_LIFETIME_HINT(h) (((h) & WRITE_HINT_TYPE_BIT) ? \ + 0 : WRITE_HINT_VAL(h)) + +#define PLACEMENT_HINT_TYPE WRITE_HINT_TYPE_BIT +#define MAX_PLACEMENT_HINT_VAL (WRITE_HINT_VAL_MASK - 1) #endif /* _LINUX_RW_HINT_H */ diff --git a/include/uapi/linux/fcntl.h b/include/uapi/linux/fcntl.h index c0bcc185fa48..f758a7230419 100644 --- a/include/uapi/linux/fcntl.h +++ b/include/uapi/linux/fcntl.h @@ -57,6 +57,8 @@ #define F_SET_RW_HINT (F_LINUX_SPECIFIC_BASE + 12) #define F_GET_FILE_RW_HINT (F_LINUX_SPECIFIC_BASE + 13) #define F_SET_FILE_RW_HINT (F_LINUX_SPECIFIC_BASE + 14) +#define F_GET_RW_HINT_EX (F_LINUX_SPECIFIC_BASE + 15) +#define F_SET_RW_HINT_EX (F_LINUX_SPECIFIC_BASE + 16) /* * Valid hint values for F_{GET,SET}_RW_HINT. 0 is "not set", or can be @@ -76,6 +78,18 @@ */ #define RWF_WRITE_LIFE_NOT_SET RWH_WRITE_LIFE_NOT_SET +enum rw_hint_type { + TYPE_RW_LIFETIME_HINT = 1, + TYPE_RW_PLACEMENT_HINT +}; + +/* Exchange information with F_{GET/SET}_RW_HINT fcntl */ +struct rw_hint_ex { + __u8 type; + __u8 pad[7]; + __u64 val; +}; + /* * Types of directory notifications that may be requested. */ -- 2.25.1