Re: [PATCH 1/3] zram: allow user to set QUEUE_FLAG_NOWAIT

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



On 5/16/23 13:43, Jens Axboe wrote:
> On 5/16/23 2:41 PM, Chaitanya Kulkarni wrote:
>> On 5/16/23 06:08, Sergey Senozhatsky wrote:
>>> On (23/05/16 05:51), Chaitanya Kulkarni wrote:
>>>> Removed modparam v2 is ready to send, but I've few  concerns enabling
>>>> nowait unconditionally for zram :-
>>>>
>>>>    From brd data [1] and zram data [2] from my setup :-
>>>>
>>>>            IOPs  (old->new)    | sys cpu% (old->new)
>>>> --------------------------------------------------
>>>> brd  | 1.5x (3919 -> 5874) | 3x (29 -> 87)
>>>> zram | 1.09x ( 29 ->   87) | 9x (11 -> 97)
>>>>
>>>> brd:-
>>>> IOPs increased by               ~1.5  times (50% up)
>>>> sys CPU percentage increased by ~3.0  times (200% up)
>>>>
>>>> zram:-
>>>> IOPs increased by               ~1.09 times (  9% up)
>>>> sys CPU percentage increased by ~8.81 times (781% up)
>>>>
>>>> This comparison clearly demonstrates that zram experiences a much more
>>>> substantial CPU load relative to the increase in IOPs compared to brd.
>>>> Such a significant difference might suggest a potential CPU regression
>>>> in zram ?
>>>>
>>>> Especially for zram, if applications are not expecting this high cpu
>>>> usage then they we'll get regression reports with default nowait
>>>> approach. How about we avoid something like this with one of the
>>>> following options ?
>>> Well, zram performs decompression/compression on the CPU (per-CPU
>>> crypto streams) for each IO operation, so zram IO is CPU intensive.
>> and that is exactly I've raised this issue, are you okay with that ?
>> I'll send V2 with nowait enabled by default ..
> Did you check that it's actually nowait sane to begin with? I spent
> literally 30 seconds on that when you sent the first patch, and the
> partial/sync path does not look kosher.
>

I did check for it and it needs a nowait sane preparation in V2 with
something like this [1] plus returning error with bio_wouldblock_error()
when we end up in read_from_bdev_sync() when it is not called from
writeback_store(). (rough changes [1])

But after taking performance numbers repeatedly, the main concern I
failed to resolve is above numbers & default enabling nowait for
zram ...

-ck

[1]
diff --git a/drivers/block/zram/zram_drv.c b/drivers/block/zram/zram_drv.c
index 08d198431a88..c2f154911cc0 100644
--- a/drivers/block/zram/zram_drv.c
+++ b/drivers/block/zram/zram_drv.c
@@ -55,7 +55,7 @@ static const struct block_device_operations zram_devops;

  static void zram_free_page(struct zram *zram, size_t index);
  static int zram_read_page(struct zram *zram, struct page *page, u32 index,
-              struct bio *parent, blk_opf_t opf);
+              struct bio *parent, bool nowait);

  static int zram_slot_trylock(struct zram *zram, u32 index)
  {
@@ -690,7 +690,7 @@ static ssize_t writeback_store(struct device *dev,
          /* Need for hugepage writeback racing */
          zram_set_flag(zram, index, ZRAM_IDLE);
          zram_slot_unlock(zram, index);
-        if (zram_read_page(zram, page, index, NULL, 0)) {
+        if (zram_read_page(zram, page, index, NULL, false)) {
              zram_slot_lock(zram, index);
              zram_clear_flag(zram, index, ZRAM_UNDER_WB);
              zram_clear_flag(zram, index, ZRAM_IDLE);
@@ -772,6 +772,7 @@ struct zram_work {
      unsigned long entry;
      struct page *page;
      int error;
+    bool nowait;
  };

  static void zram_sync_read(struct work_struct *work)
@@ -779,8 +780,9 @@ static void zram_sync_read(struct work_struct *work)
      struct zram_work *zw = container_of(work, struct zram_work, work);
      struct bio_vec bv;
      struct bio bio;
+    blk_opf_t_nowait = zw->nowait ? REQ_NOWAIT : 0;

-    bio_init(&bio, zw->zram->bdev, &bv, 1, REQ_OP_READ);
+    bio_init(&bio, zw->zram->bdev, &bv, 1, REQ_OP_READ | nowait);
      bio.bi_iter.bi_sector = zw->entry * (PAGE_SIZE >> 9);
      __bio_add_page(&bio, zw->page, PAGE_SIZE, 0);
      zw->error = submit_bio_wait(&bio);
@@ -792,18 +794,14 @@ static void zram_sync_read(struct work_struct *work)
   * use a worker thread context.
   */
  static int read_from_bdev_sync(struct zram *zram, struct page *page,
-                unsigned long entry, blk_opf_t opf)
+                unsigned long entry, bool nowait)
  {
      struct zram_work work;

      work.page = page;
      work.zram = zram;
      work.entry = entry;
-
-    if (opf & REQ_NOWAIT) {
-        zram_sync_read(&work);
-        return work.error;
-    }
+    work.nowait = nowait;

      INIT_WORK_ONSTACK(&work.work, zram_sync_read);
      queue_work(system_unbound_wq, &work.work);
@@ -814,21 +812,21 @@ static int read_from_bdev_sync(struct zram *zram, 
struct page *page,
  }

  static int read_from_bdev(struct zram *zram, struct page *page,
-            unsigned long entry, struct bio *parent, blk_opf_t opf)
+            unsigned long entry, struct bio *parent, bool nowait)
  {
      atomic64_inc(&zram->stats.bd_reads);
      if (!parent) {
          if (WARN_ON_ONCE(!IS_ENABLED(ZRAM_PARTIAL_IO)))
              return -EIO;
-        return read_from_bdev_sync(zram, page, entry, opf);
+        return read_from_bdev_sync(zram, page, entry, nowait);
      }
-    read_from_bdev_async(zram, page, entry, parent, opf);
+    read_from_bdev_async(zram, page, entry, parent, nowait);
      return 0;
  }
  #else
  static inline void reset_bdev(struct zram *zram) {};
  static int read_from_bdev(struct zram *zram, struct page *page,
-            unsigned long entry, struct bio *parent, blk_opf_t opf)
+            unsigned long entry, struct bio *parent, bool nowait)
  {
      return -EIO;
  }
@@ -1361,7 +1359,7 @@ static int zram_read_from_zspool(struct zram 
*zram, struct page *page,
  }

  static int zram_read_page(struct zram *zram, struct page *page, u32 index,
-              struct bio *parent, blk_opf_t bi_opf)
+              struct bio *parent, bool nowait)
  {
      int ret;

@@ -1378,7 +1376,7 @@ static int zram_read_page(struct zram *zram, 
struct page *page, u32 index,
          zram_slot_unlock(zram, index);

          ret = read_from_bdev(zram, page, zram_get_element(zram, index),
-                     parent, bi_opf);
+                     parent, nowait);
      }

      /* Should NEVER happen. Return bio error if it does. */
@@ -1395,13 +1393,14 @@ static int zram_read_page(struct zram *zram, 
struct page *page, u32 index,
  static int zram_bvec_read_partial(struct zram *zram, struct bio_vec *bvec,
                    u32 index, int offset, struct bio *bio)
  {
+    bool nowait = bio->bi_opf & REQ_NOWAIT;
      struct page *page;
      int ret;

-    page = alloc_page(bio->bi_opf & REQ_NOWAIT ? GFP_NOWAIT : GFP_NOIO);
+    page = alloc_page(nowait ? GFP_NOWAIT : GFP_NOIO);
      if (!page)
          return -ENOMEM;
-    ret = zram_read_page(zram, page, index, NULL, bio->bi_opf);
+    ret = zram_read_page(zram, page, index, NULL, nowait);
      if (likely(!ret))
          memcpy_to_bvec(bvec, page_address(page) + offset);
      __free_page(page);
@@ -1413,7 +1412,8 @@ static int zram_bvec_read(struct zram *zram, 
struct bio_vec *bvec,
  {
      if (is_partial_io(bvec))
          return zram_bvec_read_partial(zram, bvec, index, offset, bio);
-    return zram_read_page(zram, bvec->bv_page, index, bio, bio->bi_opf);
+    return zram_read_page(zram, bvec->bv_page, index, bio,
+                  bio->bi_opf & REQ_NOWAIT);
  }

  static int zram_write_page(struct zram *zram, struct page *page, u32 
index)
@@ -1547,14 +1547,15 @@ static int zram_write_page(struct zram *zram, 
struct page *page, u32 index)
  static int zram_bvec_write_partial(struct zram *zram, struct bio_vec 
*bvec,
                     u32 index, int offset, struct bio *bio)
  {
+    bool nowait = bio->bi_opf & REQ_NOWAIT;
      struct page *page;
      int ret;

-    page = alloc_page(bio->bi_opf & REQ_NOWAIT ? GFP_NOWAIT : GFP_NOIO);
+    page = alloc_page(nowait ? GFP_NOWAIT : GFP_NOIO);
      if (!page)
          return -ENOMEM;

-    ret = zram_read_page(zram, page, index, bio, bio->bi_opf);
+    ret = zram_read_page(zram, page, index, bio, nowait);
      if (!ret) {
          memcpy_from_bvec(page_address(page) + offset, bvec);
          ret = zram_write_page(zram, page, index);





[Index of Archives]     [Linux RAID]     [Linux SCSI]     [Linux ATA RAID]     [IDE]     [Linux Wireless]     [Linux Kernel]     [ATH6KL]     [Linux Bluetooth]     [Linux Netdev]     [Kernel Newbies]     [Security]     [Git]     [Netfilter]     [Bugtraq]     [Yosemite News]     [MIPS Linux]     [ARM Linux]     [Linux Security]     [Device Mapper]

  Powered by Linux