[PATCH 1/5] Osd: add three fields to pg_pool_t

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



From: MingXin Liu <mingxinliu@xxxxxxxxxxxxxxx>

Signed-off-by: MingXin Liu <mingxinliu@xxxxxxxxxxxxxxx>
Reviewed-by: Li Wang <liwang@xxxxxxxxxxxxxxx>
---
 src/osd/osd_types.cc | 32 ++++++++++++++++++++++++++++++--
 src/osd/osd_types.h  | 49 +++++++++++++++++++++++++++++++++++++++++++++++++
 2 files changed, 79 insertions(+), 2 deletions(-)

diff --git a/src/osd/osd_types.cc b/src/osd/osd_types.cc
index a73b46f..ba81889 100644
--- a/src/osd/osd_types.cc
+++ b/src/osd/osd_types.cc
@@ -871,7 +871,6 @@ void pool_snap_info_t::generate_test_instances(list<pool_snap_info_t*>& o)
   o.back()->name = "foo";
 }
 
-
 // -- pg_pool_t --
 
 void pg_pool_t::dump(Formatter *f) const
@@ -910,6 +909,7 @@ void pg_pool_t::dump(Formatter *f) const
   f->dump_int("read_tier", read_tier);
   f->dump_int("write_tier", write_tier);
   f->dump_string("cache_mode", get_cache_mode_name());
+  f->dump_string("cache_measure", get_cache_measure_name());
   f->dump_unsigned("target_max_bytes", target_max_bytes);
   f->dump_unsigned("target_max_objects", target_max_objects);
   f->dump_unsigned("cache_target_dirty_ratio_micro",
@@ -925,6 +925,11 @@ void pg_pool_t::dump(Formatter *f) const
   f->dump_unsigned("hit_set_period", hit_set_period);
   f->dump_unsigned("hit_set_count", hit_set_count);
   f->dump_unsigned("min_read_recency_for_promote", min_read_recency_for_promote);
+  f->dump_unsigned("hit_set_grade_decay_rate",hit_set_grade_decay_rate);
+  f->open_array_section("grade_table");
+  for (vector<uint32_t>::const_iterator p = grade_table.begin(); p != grade_table.end(); ++p)
+    f->dump_unsigned("grade", *p);
+  f->close_section();
   f->dump_unsigned("stripe_width", get_stripe_width());
   f->dump_unsigned("expected_num_objects", expected_num_objects);
 }
@@ -1226,7 +1231,7 @@ void pg_pool_t::encode(bufferlist& bl, uint64_t features) const
     return;
   }
 
-  ENCODE_START(17, 5, bl);
+  ENCODE_START(18, 5, bl);
   ::encode(type, bl);
   ::encode(size, bl);
   ::encode(crush_ruleset, bl);
@@ -1268,6 +1273,9 @@ void pg_pool_t::encode(bufferlist& bl, uint64_t features) const
   ::encode(last_force_op_resend, bl);
   ::encode(min_read_recency_for_promote, bl);
   ::encode(expected_num_objects, bl);
+  __u8 m = cache_measure;
+  ::encode(m, bl);
+  ::encode(hit_set_grade_decay_rate, bl);
   ENCODE_FINISH(bl);
 }
 
@@ -1385,6 +1393,16 @@ void pg_pool_t::decode(bufferlist::iterator& bl)
   } else {
     expected_num_objects = 0;
   }
+  if (struct_v >= 18) {
+    __u8 v;
+    ::decode(v, bl);
+    cache_measure = (cache_measure_t)v;
+    ::decode(hit_set_grade_decay_rate, bl);
+    set_grade(hit_set_grade_decay_rate, hit_set_count);
+  } else {
+    cache_measure = CACHEMEASURE_ATIME;
+    hit_set_grade_decay_rate = 0;
+  }
   DECODE_FINISH(bl);
   calc_pg_masks();
 }
@@ -1425,12 +1443,16 @@ void pg_pool_t::generate_test_instances(list<pg_pool_t*>& o)
   a.tiers.insert(1);
   a.tier_of = 2;
   a.cache_mode = CACHEMODE_WRITEBACK;
+  a.cache_measure = CACHEMEASURE_ATIME;
   a.read_tier = 1;
   a.write_tier = 1;
   a.hit_set_params = HitSet::Params(new BloomHitSet::Params);
   a.hit_set_period = 3600;
   a.hit_set_count = 8;
   a.min_read_recency_for_promote = 1;
+  a.hit_set_grade_decay_rate = 50;
+  a.grade_table.push_back(1000000);
+  a.grade_table.push_back(500000);
   a.set_stripe_width(12345);
   a.target_max_bytes = 1238132132;
   a.target_max_objects = 1232132;
@@ -1475,6 +1497,8 @@ ostream& operator<<(ostream& out, const pg_pool_t& p)
     out << " write_tier " << p.write_tier;
   if (p.cache_mode)
     out << " cache_mode " << p.get_cache_mode_name();
+  if (p.cache_mode)
+    out << " cache_measure " << p.get_cache_measure_name();
   if (p.target_max_bytes)
     out << " target_bytes " << p.target_max_bytes;
   if (p.target_max_objects)
@@ -1483,6 +1507,10 @@ ostream& operator<<(ostream& out, const pg_pool_t& p)
     out << " hit_set " << p.hit_set_params
 	<< " " << p.hit_set_period << "s"
 	<< " x" << p.hit_set_count;
+    if (p.cache_measure == pg_pool_t::CACHEMEASURE_TEMP) {
+      out << " decay_rate " << p.hit_set_grade_decay_rate
+      << " grade_table" << p.grade_table;
+    }
   }
   if (p.min_read_recency_for_promote)
     out << " min_read_recency_for_promote " << p.min_read_recency_for_promote;
diff --git a/src/osd/osd_types.h b/src/osd/osd_types.h
index 18f5402..7bea017 100644
--- a/src/osd/osd_types.h
+++ b/src/osd/osd_types.h
@@ -888,6 +888,12 @@ struct pg_pool_t {
     CACHEMODE_READFORWARD = 4,           ///< forward reads, write to cache flush later
     CACHEMODE_READPROXY = 5              ///< proxy reads, write to cache flush later
   } cache_mode_t;
+
+  typedef enum {
+    CACHEMEASURE_ATIME = 0,              ///< judge hot by atime
+    CACHEMEASURE_TEMP = 1                ///< judge hot by temperature
+  } cache_measure_t;
+
   static const char *get_cache_mode_name(cache_mode_t m) {
     switch (m) {
     case CACHEMODE_NONE: return "none";
@@ -932,6 +938,24 @@ struct pg_pool_t {
     }
   }
 
+  static const char *get_cache_measure_name(cache_measure_t m) {
+    switch (m) {
+    case CACHEMEASURE_ATIME: return "atime";
+    case CACHEMEASURE_TEMP: return "temperature";
+    default: return "unknown";
+    }
+  }
+  static cache_measure_t get_cache_measure_from_str(const string& s) {
+    if (s == "atime")
+      return CACHEMEASURE_ATIME;
+    if (s == "temperature")
+      return CACHEMEASURE_TEMP;
+    return (cache_measure_t)-1;
+  }
+  const char *get_cache_measure_name() const {
+    return get_cache_measure_name(cache_measure);
+  }
+
   uint64_t flags;           ///< FLAG_*
   __u8 type;                ///< TYPE_*
   __u8 size, min_size;      ///< number of osds in each pg
@@ -976,6 +1000,7 @@ public:
   int64_t read_tier;       ///< pool/tier for objecter to direct reads to
   int64_t write_tier;      ///< pool/tier for objecter to direct writes to
   cache_mode_t cache_mode;  ///< cache pool mode
+  cache_measure_t cache_measure;  ///< cache measure demension,atime or temperature
 
   bool is_tier() const { return tier_of >= 0; }
   bool has_tiers() const { return !tiers.empty(); }
@@ -993,6 +1018,7 @@ public:
     if (cache_mode != CACHEMODE_NONE)
       flags |= FLAG_INCOMPLETE_CLONES;
     cache_mode = CACHEMODE_NONE;
+    cache_measure = CACHEMEASURE_ATIME;
 
     target_max_bytes = 0;
     target_max_objects = 0;
@@ -1001,6 +1027,8 @@ public:
     hit_set_params = HitSet::Params();
     hit_set_period = 0;
     hit_set_count = 0;
+    hit_set_grade_decay_rate = 0;
+    grade_table.resize(0);
   }
 
   uint64_t target_max_bytes;   ///< tiering: target max pool size
@@ -1016,6 +1044,25 @@ public:
   uint32_t hit_set_period;      ///< periodicity of HitSet segments (seconds)
   uint32_t hit_set_count;       ///< number of periods to retain
   uint32_t min_read_recency_for_promote;   ///< minimum number of HitSet to check before promote
+  uint32_t hit_set_grade_decay_rate;   ///< current hit_set has highest priority on objects
+                                       ///temperature count,the follow hit_set's priority decay
+                                       ///by this params than pre hit_set
+  vector<uint32_t> grade_table;
+  void set_grade(uint32_t decay, unsigned size)
+  {
+    unsigned v = 1000000;
+    grade_table.resize(size);
+    for (unsigned i = 0; i < size; i++) {
+      grade_table[i] = v;
+      v = v * (1 - (decay / 100.0));
+    }
+  }
+  uint32_t get_grade(unsigned i)
+  {
+    if (grade_table.size() <= i)
+      return 0;
+    return grade_table[i];
+  }
 
   uint32_t stripe_width;        ///< erasure coded stripe size in bytes
 
@@ -1035,6 +1082,7 @@ public:
       pg_num_mask(0), pgp_num_mask(0),
       tier_of(-1), read_tier(-1), write_tier(-1),
       cache_mode(CACHEMODE_NONE),
+      cache_measure(CACHEMEASURE_ATIME),
       target_max_bytes(0), target_max_objects(0),
       cache_target_dirty_ratio_micro(0),
       cache_target_full_ratio_micro(0),
@@ -1044,6 +1092,7 @@ public:
       hit_set_period(0),
       hit_set_count(0),
       min_read_recency_for_promote(0),
+      hit_set_grade_decay_rate(0),
       stripe_width(0),
       expected_num_objects(0)
   { }
-- 
1.9.1

--
To unsubscribe from this list: send the line "unsubscribe ceph-devel" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at  http://vger.kernel.org/majordomo-info.html




[Index of Archives]     [CEPH Users]     [Ceph Large]     [Information on CEPH]     [Linux BTRFS]     [Linux USB Devel]     [Video for Linux]     [Linux Audio Users]     [Yosemite News]     [Linux Kernel]     [Linux SCSI]
  Powered by Linux