Hello, On Fri, Jan 06, 2017 at 04:42:25PM +0800, zhouxianrong@xxxxxxxxxx wrote: > From: zhouxianrong <zhouxianrong@xxxxxxxxxx> > > the idea is that without doing more calculations we extend zero pages > to same element pages for zram. zero page is special case of > same element page with zero element. > > 1. the test is done under android 7.0 > 2. startup too many applications circularly > 3. sample the zero pages, same pages (none-zero element) > and total pages in function page_zero_filled > > the result is listed as below: > > ZERO SAME TOTAL > 36214 17842 598196 > > ZERO/TOTAL SAME/TOTAL (ZERO+SAME)/TOTAL ZERO/SAME > AVERAGE 0.060631909 0.024990816 0.085622726 2.663825038 > STDEV 0.00674612 0.005887625 0.009707034 2.115881328 > MAX 0.069698422 0.030046087 0.094975336 7.56043956 > MIN 0.03959586 0.007332205 0.056055193 1.928985507 > > from above data, the benefit is about 2.5% and up to 3% of total > swapout pages. It looks great if we considers the amount of diff. Thanks! > > the defect of the patch is that when we recovery a page from > non-zero element the operations are low efficient for partial > read. Nice description. I love to see pros/cons. :) If it hurts partial-read little bit, it would be not critical problem. > > Signed-off-by: zhouxianrong <zhouxianrong@xxxxxxxxxx> > --- > drivers/block/zram/zram_drv.c | 102 +++++++++++++++++++++++++++++++---------- > drivers/block/zram/zram_drv.h | 11 +++-- > 2 files changed, 84 insertions(+), 29 deletions(-) > > diff --git a/drivers/block/zram/zram_drv.c b/drivers/block/zram/zram_drv.c > index 15f58ab..3250a8b 100644 > --- a/drivers/block/zram/zram_drv.c > +++ b/drivers/block/zram/zram_drv.c > @@ -94,6 +94,17 @@ static void zram_clear_flag(struct zram_meta *meta, u32 index, > meta->table[index].value &= ~BIT(flag); > } > > +static inline void zram_set_element(struct zram_meta *meta, u32 index, > + unsigned long element) > +{ > + meta->table[index].element = element; > +} > + > +static inline void zram_clear_element(struct zram_meta *meta, u32 index) > +{ > + meta->table[index].element = 0; > +} > + > static size_t zram_get_obj_size(struct zram_meta *meta, u32 index) > { > return meta->table[index].value & (BIT(ZRAM_FLAG_SHIFT) - 1); > @@ -158,31 +169,68 @@ static inline void update_used_max(struct zram *zram, > } while (old_max != cur_max); > } > > -static bool page_zero_filled(void *ptr) > +static inline void zram_set_page(char *ptr, unsigned long value) Nit: I prefer fill to set in this case. How about zram_fill_page? It is a just nit so if you want to set, I'm not against. > +{ > + int i; > + unsigned long *page = (unsigned long *)ptr; > + > + for (i = PAGE_SIZE / sizeof(unsigned long) - 1; i >= 0; i--) > + page[i] = value; > +} > + > +static inline void zram_set_page_partial(char *ptr, unsigned int size, > + unsigned long value) > +{ > + int i; > + > + i = ((unsigned long)ptr) % sizeof(unsigned long); > + if (i) { Hmm, Is this case really happens? I guess block layer works with IO size unit which would be aligned (unsigned long) at least, maybe SECTOR or PAGE size. Then, I think we can merge both zram_set_page and zram_set_page_partial like this. static inline void zram_fill_page(char *ptr, unsigned int size, unsigned long elelment) { int i; unsigned long *page = (unsigned long *)ptr; WARN_ON_ONCE((unsigned long)ptr % sizeof(unsigned long)); for (i = size / sizeof(unsigned long) - 1; i >= 0; i--) page[i] = element } > + while (i < sizeof(unsigned long)) { > + *ptr++ = (value >> (i * 8)) & 0xff; > + --size; > + ++i; > + } > + } > + > + for (i = size / sizeof(unsigned long); i > 0; --i) { > + *(unsigned long *)ptr = value; > + ptr += sizeof(unsigned long); > + size -= sizeof(unsigned long); > + } > + > + for (i = 0; i < size; ++i) > + *ptr++ = (value >> (i * 8)) & 0xff; > +} > + > +static bool page_same_filled(void *ptr, unsigned long *element) > { > unsigned int pos; > unsigned long *page; > > page = (unsigned long *)ptr; > > - for (pos = 0; pos != PAGE_SIZE / sizeof(*page); pos++) { > - if (page[pos]) > + for (pos = PAGE_SIZE / sizeof(unsigned long) - 1; pos > 0; pos--) { > + if (page[pos] != page[pos - 1]) > return false; > } > > + if (element) Maybe, we don't need this condition check? > + element[0] = page[pos]; > + > return true; > } > > -static void handle_zero_page(struct bio_vec *bvec) > +static void handle_same_page(struct bio_vec *bvec, unsigned long element) > { > struct page *page = bvec->bv_page; > void *user_mem; > > user_mem = kmap_atomic(page); > if (is_partial_io(bvec)) > - memset(user_mem + bvec->bv_offset, 0, bvec->bv_len); > + zram_set_page_partial(user_mem + bvec->bv_offset, bvec->bv_len, > + element); > else > - clear_page(user_mem); > + zram_set_page(user_mem, element); > kunmap_atomic(user_mem); > > flush_dcache_page(page); > @@ -431,7 +479,7 @@ static ssize_t mm_stat_show(struct device *dev, > mem_used << PAGE_SHIFT, > zram->limit_pages << PAGE_SHIFT, > max_used << PAGE_SHIFT, > - (u64)atomic64_read(&zram->stats.zero_pages), > + (u64)atomic64_read(&zram->stats.same_pages), Unfortunately, we cannot replace zero pages stat with same pages's one right now due to compatibility problem. Please add same_pages to tail of the stat and we should warn deprecated zero_pages stat so we finally will remove it two year later. Please reference Documentation/ABI/obsolete/sysfs-block-zram And add zero-pages to the document. For example, ... mm_stat_show() { pr_warn_once("zero pages was deprecated so it will be removed at 2019 Jan"); } Sergey, what's your opinion? > pool_stats.pages_compacted); > up_read(&zram->init_lock); > > @@ -464,7 +512,7 @@ static ssize_t debug_stat_show(struct device *dev, > ZRAM_ATTR_RO(failed_writes); > ZRAM_ATTR_RO(invalid_io); > ZRAM_ATTR_RO(notify_free); > -ZRAM_ATTR_RO(zero_pages); > +ZRAM_ATTR_RO(same_pages); > ZRAM_ATTR_RO(compr_data_size); > > static inline bool zram_meta_get(struct zram *zram) > @@ -538,18 +586,20 @@ static void zram_free_page(struct zram *zram, size_t index) > struct zram_meta *meta = zram->meta; > unsigned long handle = meta->table[index].handle; > > - if (unlikely(!handle)) { > - /* > - * No memory is allocated for zero filled pages. > - * Simply clear zero page flag. > - */ > - if (zram_test_flag(meta, index, ZRAM_ZERO)) { > - zram_clear_flag(meta, index, ZRAM_ZERO); > - atomic64_dec(&zram->stats.zero_pages); > - } > + /* > + * No memory is allocated for same element filled pages. > + * Simply clear same page flag. > + */ > + if (zram_test_flag(meta, index, ZRAM_SAME)) { > + zram_clear_flag(meta, index, ZRAM_SAME); > + zram_clear_element(meta, index); > + atomic64_dec(&zram->stats.same_pages); > return; > } > > + if (!handle) > + return; > + > zs_free(meta->mem_pool, handle); > > atomic64_sub(zram_get_obj_size(meta, index), > @@ -572,9 +622,9 @@ static int zram_decompress_page(struct zram *zram, char *mem, u32 index) > handle = meta->table[index].handle; > size = zram_get_obj_size(meta, index); > > - if (!handle || zram_test_flag(meta, index, ZRAM_ZERO)) { > + if (!handle || zram_test_flag(meta, index, ZRAM_SAME)) { > bit_spin_unlock(ZRAM_ACCESS, &meta->table[index].value); > - clear_page(mem); > + zram_set_page(mem, meta->table[index].element); > return 0; > } > > @@ -610,9 +660,9 @@ static int zram_bvec_read(struct zram *zram, struct bio_vec *bvec, > > bit_spin_lock(ZRAM_ACCESS, &meta->table[index].value); > if (unlikely(!meta->table[index].handle) || > - zram_test_flag(meta, index, ZRAM_ZERO)) { > + zram_test_flag(meta, index, ZRAM_SAME)) { > bit_spin_unlock(ZRAM_ACCESS, &meta->table[index].value); > - handle_zero_page(bvec); > + handle_same_page(bvec, meta->table[index].element); > return 0; > } > bit_spin_unlock(ZRAM_ACCESS, &meta->table[index].value); > @@ -660,6 +710,7 @@ static int zram_bvec_write(struct zram *zram, struct bio_vec *bvec, u32 index, > struct zram_meta *meta = zram->meta; > struct zcomp_strm *zstrm = NULL; > unsigned long alloced_pages; > + unsigned long element; > > page = bvec->bv_page; > if (is_partial_io(bvec)) { > @@ -688,16 +739,17 @@ static int zram_bvec_write(struct zram *zram, struct bio_vec *bvec, u32 index, > uncmem = user_mem; > } > > - if (page_zero_filled(uncmem)) { > + if (page_same_filled(uncmem, &element)) { > if (user_mem) > kunmap_atomic(user_mem); > /* Free memory associated with this sector now. */ > bit_spin_lock(ZRAM_ACCESS, &meta->table[index].value); > zram_free_page(zram, index); > - zram_set_flag(meta, index, ZRAM_ZERO); > + zram_set_flag(meta, index, ZRAM_SAME); > + zram_set_element(meta, index, element); > bit_spin_unlock(ZRAM_ACCESS, &meta->table[index].value); > > - atomic64_inc(&zram->stats.zero_pages); > + atomic64_inc(&zram->stats.same_pages); > ret = 0; > goto out; > } > @@ -1203,7 +1255,7 @@ static int zram_open(struct block_device *bdev, fmode_t mode) > &dev_attr_compact.attr, > &dev_attr_invalid_io.attr, > &dev_attr_notify_free.attr, > - &dev_attr_zero_pages.attr, > + &dev_attr_same_pages.attr, > &dev_attr_orig_data_size.attr, > &dev_attr_compr_data_size.attr, > &dev_attr_mem_used_total.attr, > diff --git a/drivers/block/zram/zram_drv.h b/drivers/block/zram/zram_drv.h > index 74fcf10..4bb92e1 100644 > --- a/drivers/block/zram/zram_drv.h > +++ b/drivers/block/zram/zram_drv.h > @@ -60,8 +60,8 @@ > > /* Flags for zram pages (table[page_no].value) */ > enum zram_pageflags { > - /* Page consists entirely of zeros */ > - ZRAM_ZERO = ZRAM_FLAG_SHIFT, > + /* Page consists entirely of same elements */ > + ZRAM_SAME = ZRAM_FLAG_SHIFT, > ZRAM_ACCESS, /* page is now accessed */ > > __NR_ZRAM_PAGEFLAGS, > @@ -71,7 +71,10 @@ enum zram_pageflags { > > /* Allocated for each disk page */ > struct zram_table_entry { > - unsigned long handle; > + union { > + unsigned long handle; > + unsigned long element; > + }; > unsigned long value; > }; > > @@ -83,7 +86,7 @@ struct zram_stats { > atomic64_t failed_writes; /* can happen when memory is too low */ > atomic64_t invalid_io; /* non-page-aligned I/O requests */ > atomic64_t notify_free; /* no. of swap slot free notifications */ > - atomic64_t zero_pages; /* no. of zero filled pages */ > + atomic64_t same_pages; /* no. of same element filled pages */ > atomic64_t pages_stored; /* no. of pages currently stored */ > atomic_long_t max_used_pages; /* no. of maximum pages stored */ > atomic64_t writestall; /* no. of write slow paths */ > -- > 1.7.9.5 > > -- > To unsubscribe, send a message with 'unsubscribe linux-mm' in > the body to majordomo@xxxxxxxxx. For more info on Linux MM, > see: http://www.linux-mm.org/ . > Don't email: <a href=mailto:"dont@xxxxxxxxx"> email@xxxxxxxxx </a> -- To unsubscribe, send a message with 'unsubscribe linux-mm' in the body to majordomo@xxxxxxxxx. For more info on Linux MM, see: http://www.linux-mm.org/ . Don't email: <a href=mailto:"dont@xxxxxxxxx"> email@xxxxxxxxx </a>