Grades are being read from the extended attribute while preallocating the blocks for a single graded file. We assume binary grading of the file blocks, and high graded blocks to be placed in the persistent memory region of the LVM while the lower graded ones to be placed in the HDD portion of the LVM. Here we alter the block allocation method in the functions ext4_ext_map_blocks() and ext4_alloc_file_blocks(). Leveraging the existing goal-block allocation to get goals in different tiers according to the grades has yet not been done. Consider the LVM is segmented as, --- Segments --- Logical extents 0 to 1219: Type linear Physical volume /dev/sda11 Physical extents 0 to 1219 Logical extents 1220 to 1474: Type linear Physical volume /dev/pmem0 Physical extents 0 to 254 We hard code the ‘LOW_GRADE_STARTING_BLOCK’ as 0*1024, and HIGH_GRADE_STARTING_BLOCK as 1220*1024 for the initial logical block number of the respective tiers. FIX_ME comments have been provided in suitable positions. The patch is on top of Linux Kernel 4.7.2. Signed-off-by: Sayan Ghosh <sgdgp.2014@xxxxxxxxx> --- fs/ext4/ext4.h | 1 + fs/ext4/extents.c | 116 ++++++++++++++++++++++++++++++++++++++++++++++++++++-- 2 files changed, 114 insertions(+), 3 deletions(-) diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h index b9ec0ca..c7d2eed 100755 --- a/fs/ext4/ext4.h +++ b/fs/ext4/ext4.h @@ -3201,6 +3201,7 @@ struct ext4_extent; extern unsigned long long read_count_xattr(struct inode *inode); extern void read_grade_xattr(struct inode *inode,struct grade_struct *grade_array); extern int is_file_graded(struct inode *inode); +extern int find_grade(struct grade_struct* grade_array, unsigned long long total, ext4_fsblk_t val, unsigned long long *req_len); /* * Maximum number of logical blocks in a file; ext4_extent's ee_block is diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c index de9194f..aaff3a3 100755 --- a/fs/ext4/extents.c +++ b/fs/ext4/extents.c @@ -58,6 +58,12 @@ #define EXT4_EXT_DATA_VALID2 0x10 /* second half contains valid data */ /* + * Starting block numbers for low and high grades + */ +#define LOW_GRADE_STARTING_BLOCK 0 +#define HIGH_GRADE_STARTING_BLOCK 1249280 + +/* * read_grade_xattr() is used to read the grade array from the extended attribute. */ void read_grade_xattr(struct inode *inode,struct grade_struct *grade_array) @@ -92,6 +98,43 @@ unsigned long long read_count_xattr(struct inode *inode) return total; } +/* + * find_grade() is to find the grade of a logical block. + * This also returns the length of graded or ungraded portion + * starting from that logical block number (gets stored in the variable + * req_len). The return value is 1 for high grade and 0 otherwise. + */ +int find_grade(struct grade_struct* grade_array, unsigned long long total, ext4_fsblk_t val, unsigned long long *req_len) +{ + if (val >= (grade_array[total -1].block_num + grade_array[total -1].len) ){ + if (req_len != NULL) + (*req_len) = 0; + return 0; + } + unsigned long long beg, end, mid; + beg = 0; + end = total-1; + while (beg <= end){ + mid = (beg + end)/2; + if ((val >= grade_array[mid].block_num) && (val <= (grade_array[mid].block_num + grade_array[mid].len - 1)) ){ + if (req_len != NULL) + (*req_len) = grade_array[mid].len; + return 1; + } + if(beg == end) + break; + if (grade_array[mid].block_num > val){ + end = (mid > 0) ? (mid - 1) : 0; + } + else{ + beg = mid + 1; + } + } + if (req_len != NULL) + (*req_len) = grade_array[mid].block_num - val; + return 0; +} + static __le32 ext4_extent_block_csum(struct inode *inode, struct ext4_extent_header *eh) { @@ -4326,6 +4369,7 @@ int ext4_ext_map_blocks(handle_t *handle, struct inode *inode, struct ext4_extent newex, *ex, *ex2; struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb); ext4_fsblk_t newblock = 0; + int free_on_err = 0, err = 0, depth, ret; unsigned int allocated = 0, offset = 0; unsigned int allocated_clusters = 0; @@ -4333,6 +4377,14 @@ int ext4_ext_map_blocks(handle_t *handle, struct inode *inode, ext4_lblk_t cluster_offset; bool map_from_cluster = false; + struct grade_struct *grade_array = NULL; + unsigned long long total; + if (is_file_graded(inode)){ + total = read_count_xattr(inode); + grade_array = (struct grade_struct *)kmalloc(total*sizeof(struct grade_struct), GFP_USER); + read_grade_xattr(inode,grade_array); + } + ext_debug("blocks %u/%u requested for inode %lu\n", map->m_lblk, map->m_len, inode->i_ino); trace_ext4_ext_map_blocks_enter(inode, map->m_lblk, map->m_len, flags); @@ -4494,8 +4546,36 @@ int ext4_ext_map_blocks(handle_t *handle, struct inode *inode, /* allocate new block */ ar.inode = inode; - ar.goal = ext4_ext_find_goal(inode, path, map->m_lblk); + if(!is_file_graded(inode)){ + ar.goal = ext4_ext_find_goal(inode, path, map->m_lblk); + } + + /* + * ** FIX ME ** + * Now accessing different goals for different tiers is hard coded. + * Please suggest a method to maintain multiple goal states in different tiers, + * each corresponding to the respective grades for proper goal-block placement. + * + * ** TODO 1 ** + * Instead of hard-coding LOW_GRADE_STARTING_BLOCK and HIGH_GRADE_STARTING_BLOCK + * set their values automatically from the LVM (see the description). + * + * ** TODO 2 ** + * It is assumed that higher grade storage area will not overflow. + * We need to take care of the case when high grade storage device gets full + * and data has to be stored in the lower tier. + */ + else{ + unsigned long long temp; + if(find_grade(grade_array,total,map->m_lblk,temp) == 0){ + ar.goal = LOW_GRADE_STARTING_BLOCK; + } + if(find_grade(grade_array,total,map->m_lblk,temp) == 1){ + ar.goal = HIGH_GRADE_STARTING_BLOCK; + } + } ar.logical = map->m_lblk; + /* * We calculate the offset from the beginning of the cluster * for the logical block number, since when we allocate a @@ -4519,7 +4599,12 @@ int ext4_ext_map_blocks(handle_t *handle, struct inode *inode, ar.flags |= EXT4_MB_DELALLOC_RESERVED; if (flags & EXT4_GET_BLOCKS_METADATA_NOFAIL) ar.flags |= EXT4_MB_USE_RESERVED; + if(is_file_graded(inode)){ + ar.flags |= EXT4_MB_HINT_NOPREALLOC; + } newblock = ext4_mb_new_blocks(handle, &ar, &err); + +go_out: if (!newblock) goto out2; ext_debug("allocate new block: goal %llu, found %llu/%u\n", @@ -4706,6 +4791,8 @@ static int ext4_alloc_file_blocks(struct file *file, ext4_lblk_t offset, { struct inode *inode = file_inode(file); handle_t *handle; + + int grade_val = 0; int ret = 0; int ret2 = 0; int retries = 0; @@ -4713,9 +4800,17 @@ static int ext4_alloc_file_blocks(struct file *file, ext4_lblk_t offset, struct ext4_map_blocks map; unsigned int credits; loff_t epos; - map.m_lblk = offset; map.m_len = len; + + struct grade_struct *grade_array = NULL; + unsigned long long total; + if (is_file_graded(inode)){ + total = read_count_xattr(inode); + grade_array = (struct grade_struct *)kmalloc(total*sizeof(struct grade_struct), GFP_USER); + read_grade_xattr(inode,grade_array); + } + /* * Don't normalize the request if it can fit in one extent so * that it doesn't get unnecessarily split into multiple @@ -4735,10 +4830,23 @@ static int ext4_alloc_file_blocks(struct file *file, ext4_lblk_t offset, depth = ext_depth(inode); else depth = -1; - retry: while (ret >= 0 && len) { /* + * Finding length of blocks which have same grade + * and they are preallocated together. + */ + if (is_file_graded(inode)){ + map.m_len = 1; + unsigned long long req_len; + grade_val = find_grade(grade_array,total,map.m_lblk,&req_len); + if (req_len == 0) + map.m_len = len; + else + map.m_len = req_len; + } + + /* * Recalculate credits when extent tree depth changes. */ if (depth >= 0 && depth != ext_depth(inode)) { @@ -4753,6 +4861,7 @@ retry: break; } ret = ext4_map_blocks(handle, inode, &map, flags); + if (ret <= 0) { ext4_debug("inode #%lu: block %u: len %u: " "ext4_ext_map_blocks returned %d", @@ -4762,6 +4871,7 @@ retry: ret2 = ext4_journal_stop(handle); break; } + map.m_lblk += ret; map.m_len = len = len - ret; epos = (loff_t)map.m_lblk << inode->i_blkbits;