On 04/06/2018 04:41 AM, Sayan Ghosh wrote: > The patch is on top of Linux Kernel 4.7.2. > > Signed-off-by: Sayan Ghosh <sgdgp.2014@xxxxxxxxx> > --- > fs/ext4/file.c | 178 +++++++++++++++++++++++++++++++++++++++++++++++++++++++-- > 1 file changed, 174 insertions(+), 4 deletions(-) > > diff --git a/fs/ext4/file.c b/fs/ext4/file.c > index df44c87..368cf53 100755 > --- a/fs/ext4/file.c > +++ b/fs/ext4/file.c > @@ -298,6 +298,164 @@ static const struct vm_operations_struct > ext4_file_vm_ops = { > .page_mkwrite = ext4_page_mkwrite, > }; > > +/* > + * This function is the fault function for our case to > + * redirect the high grade blocks through DAX path (since the > + * higher tier we chose is Persistent Memory) and the lower > + * grade blocks via the normal ext4 fault. > + * Some parts of the code are copied from ext4_dax_fault. > + * The parts where the redirection is done is added by additional > + * comments. > + */ > +static int graded_ext4_fault(struct vm_area_struct *vma, struct vm_fault *vmf){ Put '{' on separate line. > + int result; > + sector_t block; > + handle_t *handle = NULL; > + struct file *file = vma->vm_file; > + struct address_space *mapping = file->f_mapping; > + struct inode *inode = file_inode(vma->vm_file); > + struct super_block *sb = inode->i_sb; > + bool write = vmf->flags & FAULT_FLAG_WRITE; Indentation. (many) > + > + struct grade_struct *grade_array = NULL; > + unsigned long long total; > + if (is_file_graded(inode)){ > + total = read_count_xattr(inode); > + grade_array = (struct grade_struct > *)kmalloc(total*sizeof(struct grade_struct), GFP_USER); > + read_grade_xattr(inode,grade_array); > + } > + > + block = (sector_t)vmf->pgoff << (PAGE_SHIFT - mapping->host->i_blkbits); > + if (write) { > + sb_start_pagefault(sb); > + file_update_time(vma->vm_file); > + down_read(&EXT4_I(inode)->i_mmap_sem); > + handle = ext4_journal_start_sb(sb, EXT4_HT_WRITE_PAGE, > + EXT4_DATA_TRANS_BLOCKS(sb)); > + } else > + down_read(&EXT4_I(inode)->i_mmap_sem); > + > + if (IS_ERR(handle)) > + result = VM_FAULT_SIGBUS; > + else > + { > + if(write){ if (write) { > + unsigned long long temp; > + if(find_grade(grade_array,total,block,&temp) == 1){ if ( 1) { > + result = __dax_fault(vma, vmf, ext4_dax_get_block); > + } > + else if(find_grade(grade_array,total,block,&temp) == 0){ same. > + result = ext4_filemap_fault(vma,vmf); > + } > + } > + else{ else { > + /* > + * Here the higher graded blocks are redirected via DAX path > + * since we consider Persistent Memory as higher tier. > + * > + * ** TODO ** > + * To take care of the case when the higher tier is not > + * persistent memory (can be HDD-SSD combination), a check > + * of the same needs to be provided before re-direction. > + */ > + unsigned long long temp; > + if(find_grade(grade_array,total,block,&temp) == 1){ > + result = __dax_fault(vma, vmf, ext4_dax_get_block); > + } > + else if(find_grade(grade_array,total,block,&temp) == 0){ > + result = ext4_filemap_fault(vma,vmf); > + } > + } > + } > + out: > + if (write) { > + if (!IS_ERR(handle)) > + ext4_journal_stop(handle); > + up_read(&EXT4_I(inode)->i_mmap_sem); > + sb_end_pagefault(sb); > + } else > + up_read(&EXT4_I(inode)->i_mmap_sem); > + > + return result; > +} > + > +/* > + * This is the new page write function for our scenario. > + * This also takes care of the grade and redirects > + * through the correct path, DAX for higer tier > + * (Persistent Memory) and ext4 path for lower tier. > + * To take care of the cases when the higher tier > + * is not Persistent Memory a TODO has been added > + */ > +static int graded_ext4_mkwrite(struct vm_area_struct *vma, struct > vm_fault *vmf){ { on separate line. > + int result; > + sector_t block; > + handle_t *handle = NULL; > + struct file *file = vma->vm_file; > + struct address_space *mapping = file->f_mapping; > + struct inode *inode = file_inode(vma->vm_file); > + struct super_block *sb = inode->i_sb; > + bool write = vmf->flags & FAULT_FLAG_WRITE; > + block = (sector_t)vmf->pgoff << (PAGE_SHIFT - mapping->host->i_blkbits); > + > + struct grade_struct *grade_array = NULL; > + unsigned long long total; > + if (is_file_graded(inode)){ > + total = read_count_xattr(inode); > + grade_array = (struct grade_struct > *)kmalloc(total*sizeof(struct grade_struct), GFP_USER); > + read_grade_xattr(inode,grade_array); > + } > + > + if (write) { > + sb_start_pagefault(sb); > + file_update_time(vma->vm_file); > + down_read(&EXT4_I(inode)->i_mmap_sem); > + handle = ext4_journal_start_sb(sb, EXT4_HT_WRITE_PAGE, > + EXT4_DATA_TRANS_BLOCKS(sb)); > + } else > + down_read(&EXT4_I(inode)->i_mmap_sem); > + > + if (IS_ERR(handle)) > + result = VM_FAULT_SIGBUS; > + else{ else { > + /* > + * Here the higher graded blocks are redirected via DAX path > + * since we consider Persistent Memory as higher tier. > + * > + * ** TODO ** > + * To take care of the case when the higher tier is not > + * persistent memory (can be HDD-SSD combination), a check > + * of the same needs to be provided before re-direction. > + */ > + unsigned long long temp; > + if(find_grade(grade_array,total,block,&temp)==1){ > + result = __dax_fault(vma, vmf, ext4_dax_get_block); > + } > + else if(find_grade(grade_array,total,block,&temp)==0){ > + filemap_map_pages(vma,vmf); > + result = ext4_page_mkwrite(vma,vmf); > + } > + } > + if (write) { > + if (!IS_ERR(handle)) > + ext4_journal_stop(handle); > + up_read(&EXT4_I(inode)->i_mmap_sem); > + sb_end_pagefault(sb); > + } else > + up_read(&EXT4_I(inode)->i_mmap_sem); > + > + return result; > +} > + > +/* > + * New function pointers for page fault handling and page writes. > + */ > +static const struct vm_operations_struct graded_ext4_vm_ops = { > + .fault = graded_ext4_fault, > + .page_mkwrite = graded_ext4_mkwrite, > + > +}; > + > static int ext4_file_mmap(struct file *file, struct vm_area_struct *vma) > { > struct inode *inode = file->f_mapping->host; > @@ -310,11 +468,23 @@ static int ext4_file_mmap(struct file *file, > struct vm_area_struct *vma) > return -ENOKEY; > } > file_accessed(file); > - if (IS_DAX(file_inode(file))) { > - vma->vm_ops = &ext4_dax_vm_ops; > + > + /* > + * For graded file new function pointers for > + * fault and page write are assigned. > + */ > + if(is_file_graded(file_inode(file))){ if ( ))) { > + vma->vm_ops = &graded_ext4_vm_ops; > vma->vm_flags |= VM_MIXEDMAP | VM_HUGEPAGE; > - } else { > - vma->vm_ops = &ext4_file_vm_ops; > + } > + else{ else { > + if (IS_DAX(file_inode(file))) { > + vma->vm_ops = &ext4_dax_vm_ops; > + vma->vm_flags |= VM_MIXEDMAP | VM_HUGEPAGE; > + } > + else { > + vma->vm_ops = &ext4_file_vm_ops; > + } > } > return 0; > } > > -- ~Randy