Re: Weird issues related to (large/small) weights in mixed nvme/hdd pool

Peter Linder <peter.linder@xxxxxxxxxxxxxx> · Fri, 26 Jan 2018 12:59:15 +0100



    Well, we do, but our problem is with our hybrid setup (1 nvme and
      2 hdds). The other two (that we rarely use) are nvme only and hdd
      only, as far as I can tell they work and "take" command uses class
      to select only the relevant OSDs. 

    
    I'll just paste our entire crushmap dump here. This one starts
      working when changing the 1.7 weight to 1.0... crushtool --test
      doesn't show any errors in any case, all PGs seem to be properly
      assigned to osds. 

    
    # begin crush map

        tunable choose_local_tries 0

        tunable choose_local_fallback_tries 0

        tunable choose_total_tries 50

        tunable chooseleaf_descend_once 1

        tunable chooseleaf_vary_r 1

        tunable chooseleaf_stable 1

        tunable straw_calc_version 1

        tunable allowed_bucket_algs 54

        
        # devices

        device 0 osd.0 class nvme

        device 1 osd.1 class nvme

        device 2 osd.2 class nvme

        device 3 osd.3 class nvme

        device 4 osd.4 class nvme

        device 5 osd.5 class nvme

        device 6 osd.6 class nvme

        device 7 osd.7 class nvme

        device 8 osd.8 class nvme

        device 9 osd.9 class nvme

        device 10 osd.10 class nvme

        device 12 osd.12 class hdd

        device 13 osd.13 class hdd

        device 14 osd.14 class hdd

        device 15 osd.15 class hdd

        device 16 osd.16 class hdd

        device 17 osd.17 class hdd

        device 18 osd.18 class hdd

        device 19 osd.19 class hdd

        device 20 osd.20 class hdd

        device 21 osd.21 class hdd

        device 22 osd.22 class hdd

        device 23 osd.23 class hdd

        device 24 osd.24 class nvme

        device 25 osd.25 class nvme

        device 26 osd.26 class nvme

        device 27 osd.27 class nvme

        device 36 osd.36 class hdd

        device 37 osd.37 class hdd

        device 38 osd.38 class hdd

        device 39 osd.39 class hdd

        device 40 osd.40 class hdd

        device 41 osd.41 class hdd

        device 42 osd.42 class hdd

        device 43 osd.43 class hdd

        device 44 osd.44 class hdd

        device 45 osd.45 class hdd

        device 46 osd.46 class hdd

        device 47 osd.47 class hdd

        device 48 osd.48 class hdd

        device 49 osd.49 class hdd

        device 50 osd.50 class hdd

        device 51 osd.51 class hdd

        device 52 osd.52 class hdd

        device 53 osd.53 class hdd

        device 54 osd.54 class hdd

        device 55 osd.55 class hdd

        device 56 osd.56 class hdd

        device 57 osd.57 class hdd

        device 58 osd.58 class hdd

        device 59 osd.59 class hdd

        
        # types

        type 0 osd

        type 1 host

        type 2 hostgroup

        type 3 rack

        type 4 row

        type 5 pdu

        type 6 pod

        type 7 room

        type 8 datacenter

        type 9 region

        type 10 root

        
        # buckets

        host storage11 {

                id -5           # do not change unnecessarily

                id -6 class nvme                # do not change
        unnecessarily

                id -10 class hdd                # do not change
        unnecessarily

                # weight 4.612

                alg straw2

                hash 0  # rjenkins1

                item osd.0 weight 0.728

                item osd.3 weight 0.728

                item osd.6 weight 0.728

                item osd.7 weight 0.728

                item osd.10 weight 1.700

        }

        host storage21 {

                id -13          # do not change unnecessarily

                id -14 class nvme               # do not change
        unnecessarily

                id -15 class hdd                # do not change
        unnecessarily

                # weight 65.496

                alg straw2

                hash 0  # rjenkins1

                item osd.12 weight 5.458

                item osd.13 weight 5.458

                item osd.14 weight 5.458

                item osd.15 weight 5.458

                item osd.16 weight 5.458

                item osd.17 weight 5.458

                item osd.18 weight 5.458

                item osd.19 weight 5.458

                item osd.20 weight 5.458

                item osd.21 weight 5.458

                item osd.22 weight 5.458

                item osd.23 weight 5.458

        }

        datacenter HORN79 {

                id -19          # do not change unnecessarily

                id -26 class nvme               # do not change
        unnecessarily

                id -27 class hdd                # do not change
        unnecessarily

                # weight 70.108

                alg straw2

                hash 0  # rjenkins1

                item storage11 weight 4.612

                item storage21 weight 65.496

        }

        host storage13 {

                id -7           # do not change unnecessarily

                id -8 class nvme                # do not change
        unnecessarily

                id -11 class hdd                # do not change
        unnecessarily

                # weight 4.612

                alg straw2

                hash 0  # rjenkins1

                item osd.24 weight 0.728

                item osd.25 weight 0.728

                item osd.26 weight 0.728

                item osd.27 weight 0.728

                item osd.8 weight 1.700

        }

        host storage23 {

                id -16          # do not change unnecessarily

                id -17 class nvme               # do not change
        unnecessarily

                id -18 class hdd                # do not change
        unnecessarily

                # weight 65.784

                alg straw2

                hash 0  # rjenkins1

                item osd.36 weight 5.482

                item osd.37 weight 5.482

                item osd.38 weight 5.482

                item osd.39 weight 5.482

                item osd.40 weight 5.482

                item osd.41 weight 5.482

                item osd.42 weight 5.482

                item osd.43 weight 5.482

                item osd.44 weight 5.482

                item osd.45 weight 5.482

                item osd.58 weight 5.482

                item osd.59 weight 5.482

        }

        datacenter WAR {

                id -20          # do not change unnecessarily

                id -24 class nvme               # do not change
        unnecessarily

                id -25 class hdd                # do not change
        unnecessarily

                # weight 70.401

                alg straw2

                hash 0  # rjenkins1

                item storage13 weight 4.612

                item storage23 weight 65.789

        }

        host storage12 {

                id -3           # do not change unnecessarily

                id -4 class nvme                # do not change
        unnecessarily

                id -9 class hdd         # do not change unnecessarily

                # weight 4.612

                alg straw2

                hash 0  # rjenkins1

                item osd.1 weight 0.728

                item osd.2 weight 0.728

                item osd.4 weight 0.728

                item osd.5 weight 0.728

                item osd.9 weight 1.700

        }

        host storage22 {

                id -67          # do not change unnecessarily

                id -68 class nvme               # do not change
        unnecessarily

                id -69 class hdd                # do not change
        unnecessarily

                # weight 65.736

                alg straw2

                hash 0  # rjenkins1

                item osd.46 weight 5.458

                item osd.47 weight 5.458

                item osd.48 weight 5.482

                item osd.49 weight 5.482

                item osd.50 weight 5.482

                item osd.51 weight 5.482

                item osd.52 weight 5.482

                item osd.53 weight 5.482

                item osd.54 weight 5.482

                item osd.55 weight 5.482

                item osd.56 weight 5.482

                item osd.57 weight 5.482

        }

        datacenter TEG4 {

                id -21          # do not change unnecessarily

                id -22 class nvme               # do not change
        unnecessarily

                id -23 class hdd                # do not change
        unnecessarily

                # weight 70.352

                alg straw2

                hash 0  # rjenkins1

                item storage12 weight 4.612

                item storage22 weight 65.740

        }

        root default {

                id -1           # do not change unnecessarily

                id -2 class nvme                # do not change
        unnecessarily

                id -12 class hdd                # do not change
        unnecessarily

                # weight 210.861

                alg straw2

                hash 0  # rjenkins1

                item HORN79 weight 70.108

                item WAR weight 70.401

                item TEG4 weight 70.352

        }

        hostgroup hg1-1 {

                id -30          # do not change unnecessarily

        #       id -28 class nvme               # do not change
        unnecessarily

        #       id -54 class hdd                # do not change
        unnecessarily

                # weight 1.700

                alg straw2

                hash 0  # rjenkins1

                item storage11 weight 100.000

        }

        hostgroup hg1-2 {

                id -31          # do not change unnecessarily

        #       id -29 class nvme               # do not change
        unnecessarily

        #       id -55 class hdd                # do not change
        unnecessarily

                # weight 10.000

                alg straw2

                hash 0  # rjenkins1

                item storage22 weight 100.000

        }

        hostgroup hg1-3 {

                id -32          # do not change unnecessarily

        #       id -43 class nvme               # do not change
        unnecessarily

        #       id -56 class hdd                # do not change
        unnecessarily

                # weight 10.000

                alg straw2

                hash 0  # rjenkins1

                item storage23 weight 100.000

        }

        hostgroup hg2-1 {

                id -33          # do not change unnecessarily

        #       id -45 class nvme               # do not change
        unnecessarily

        #       id -58 class hdd                # do not change
        unnecessarily

                # weight 10.000

                alg straw2

                hash 0  # rjenkins1

                item storage12 weight 100.000

        }

        hostgroup hg2-2 {

                id -34          # do not change unnecessarily

        #       id -46 class nvme               # do not change
        unnecessarily

        #       id -59 class hdd                # do not change
        unnecessarily

                # weight 10.000

                alg straw2

                hash 0  # rjenkins1

                item storage21 weight 100.000

        }

        hostgroup hg2-3 {

                id -35          # do not change unnecessarily

        #       id -47 class nvme               # do not change
        unnecessarily

        #       id -60 class hdd                # do not change
        unnecessarily

                # weight 10.000

                alg straw2

                hash 0  # rjenkins1

                item storage23 weight 100.000

        }

        hostgroup hg3-1 {

                id -36          # do not change unnecessarily

        #       id -49 class nvme               # do not change
        unnecessarily

        #       id -62 class hdd                # do not change
        unnecessarily

                # weight 10.000

                alg straw2

                hash 0  # rjenkins1

                item storage13 weight 100.000

        }

        hostgroup hg3-2 {

                id -37          # do not change unnecessarily

        #       id -50 class nvme               # do not change
        unnecessarily

        #       id -63 class hdd                # do not change
        unnecessarily

                # weight 10.000

                alg straw2

                hash 0  # rjenkins1

                item storage21 weight 100.000

        }

        hostgroup hg3-3 {

                id -38          # do not change unnecessarily

        #       id -51 class nvme               # do not change
        unnecessarily

        #       id -64 class hdd                # do not change
        unnecessarily

                # weight 10.000

                alg straw2

                hash 0  # rjenkins1

                item storage22 weight 100.000

        }

        datacenter ldc1 {

                id -39          # do not change unnecessarily

        #       id -44 class nvme               # do not change
        unnecessarily

        #       id -57 class hdd                # do not change
        unnecessarily

                # weight 30.000

                alg straw2

                hash 0  # rjenkins1

                item hg1-1 weight 100.000

                item hg1-2 weight 100.000

                item hg1-3 weight 100.000

        }

        datacenter ldc2 {

                id -40          # do not change unnecessarily

        #       id -48 class nvme               # do not change
        unnecessarily

        #       id -61 class hdd                # do not change
        unnecessarily

                # weight 30.000

                alg straw2

                hash 0  # rjenkins1

                item hg2-1 weight 100.000

                item hg2-2 weight 100.000

                item hg2-3 weight 100.000

        }

        datacenter ldc3 {

                id -41          # do not change unnecessarily

        #       id -52 class nvme               # do not change
        unnecessarily

        #       id -65 class hdd                # do not change
        unnecessarily

                # weight 30.000

                alg straw2

                hash 0  # rjenkins1

                item hg3-1 weight 100.000

                item hg3-2 weight 100.000

                item hg3-3 weight 100.000

        }

        root ldc {

                id -42          # do not change unnecessarily

        #       id -53 class nvme               # do not change
        unnecessarily

        #       id -66 class hdd                # do not change
        unnecessarily

                # weight 90.000

                alg straw2

                hash 0  # rjenkins1

                item ldc1 weight 300.000

                item ldc2 weight 300.000

                item ldc3 weight 300.000

        }

        
        # rules

        rule hybrid {

                id 1

                type replicated

                min_size 1

                max_size 10

                step take ldc

                step choose indep 1 type datacenter

                step chooseleaf indep 0 type hostgroup

                step emit

        }

        rule hdd {

                id 2

                type replicated

                min_size 1

                max_size 3

                step take default class hdd

                step chooseleaf firstn 0 type datacenter

                step emit

        }

        rule nvme {

                id 3

                type replicated

                min_size 1

                max_size 3

                step take default class nvme

                step chooseleaf firstn 0 type datacenter

                step emit

        }

        
        # end crush map

      
    Den 2018-01-26 kl. 11:22, skrev Thomas
      Bennett:

    
      Hi Peter,
        

        Just to check if your problem is similar to mine:
        
          
            Do you have any pools that follow a crush rule to only
              use osds that are backed by hdds (i.e not nvmes)?
            Do these pools obey that rule? i.e do they maybe have
              pgs that are on nvmes?
          
        
        Regards,
        Tom
      
      
        On Fri, Jan 26, 2018 at 11:48 AM, Peter
          Linder <peter.linder@xxxxxxxxxxxxxx>
          wrote:

          
              Hi Thomas, 

              
              No, we haven't gotten any closer to resolving this, in
                fact we had another issue again when we added a new nvme
                drive to our nvme servers (storage11, storage12 and
                storage13) that had weight 1.7 instead of the usual
                0.728 size. This (see below) is what a nvme and hdd
                server pair at a site looks like, and it broke when
                adding osd.10 (adding the nvme drive to storage12 and
                storage13 worked, it failed when adding the last one to
                storage11). Changing osd.10's weight to 1.0 instead and
                recompiling crushmap allowed all PGs to activate. 

              
              Unfortunately this is a production cluster that we were
                hoping to expand as needed, so if there is a problem we
                quickly have to revert to the last working crushmap, so
                no time to debug :(
              We are currently building a copy of the environment
                though virtualized and I hope that we will be able to
                re-create the issue there as we will be able to break it
                at will :)

              
              host storage11 {

                          id -5           # do not change unnecessarily

                          id -6 class nvme                # do not
                  change unnecessarily

                          id -10 class hdd                # do not
                  change unnecessarily

                          # weight 4.612

                            alg straw2

                            hash 0  # rjenkins1

                           item osd.0 weight 0.728

                          item osd.3 weight 0.728

                          item osd.6 weight 0.728

                          item osd.7 weight 0.728

                          item osd.10 weight 1.700

                  }

                  host storage21 {

                          id -13          # do not change unnecessarily

                          id -14 class nvme               # do not
                  change unnecessarily

                          id -15 class hdd                # do not
                  change unnecessarily

                            # weight 65.496

                            alg straw2

                            hash 0  # rjenkins1

                           item osd.12 weight 5.458

                          item osd.13 weight 5.458

                          item osd.14 weight 5.458

                          item osd.15 weight 5.458

                          item osd.16 weight 5.458

                          item osd.17 weight 5.458

                          item osd.18 weight 5.458

                          item osd.19 weight 5.458

                          item osd.20 weight 5.458

                          item osd.21 weight 5.458

                          item osd.22 weight 5.458

                          item osd.23 weight 5.458

                  }

                
                  Den
                    2018-01-26 kl. 08:45, skrev Thomas Bennett:

                  
                    Hi Peter,
                      

                      Not sure if you have got to the bottom of
                        your problem,  but I seem to have found what
                        might be a similar problem. I recommend reading
                        below,  as there could be a potential hidden
                        problem.
                      

                        Yesterday our cluster went into HEALTH_WARN
                          state and I noticed that one of my pg's
                          was listed as 'activating' and marked
                          as 'inactive' and 'unclean'.
                        

                        We also have a mixed OSD system - 768 HDDs
                          and 16 NVMEs with three crush rules for object
                          placement: the default replicated_rule (I
                          never deleted it) and then two new ones for replicate_rule_hdd
                          and replicate_rule_nvme.
                      
                      
                      Running a query on the pg (in my case pg
                        15.792) did not yield anything out of place,
                        except for it telling me that that it's state
                        was 'activating' (that's not even a pg
                        state: pg
                          states) and made me slightly alarmed.
                      

                      The bits of information that alerted me to
                        the issue where:
                      

                      1. Running 'ceph
                          pg dump' and finding the 'activating'
                        pg showed the following information:
                      

                        15.792
                            activating [4,724,242] #for pool 15 pg there
                            are osds 4,724,242
                      
                      
                      2. Running 'ceph
                          osd tree | grep 'osd.4 ' and
                          getting the following information:

                      
                        4 nvme
                            osd.4

                        
                      3. Now checking what pool 15 is by running 'ceph osd pool ls
                          detail':
                      

                          pool 15
                              'default.rgw.data' replicated size 3
                              min_size 2 crush_rule 1
                        
                      
                      These three bits of information made me
                        realise what was going on:
                      
                        
                          OSD 4,724,242 are all nvmes
                          Pool 15 should obey crush_rule 1 (replicate_rule_hdd)
                          Pool 15 has pgs that use nvmes!
                        
                      
                      I found the following really useful tool
                        online which showed me the depth of the
                        problem: Get the
                          Number of Placement Groups Per Osd
                      

                      So it turns out in my case pool 15 has osds
                        in all the nvmes!

                      
                      To test a fix to mimic the problem again - I
                        executed the following command: 'ceph
                          osd pg-upmap-items 15.792 4 22 724 67 76 242'
                      

                      It
                          remap the osds used by the 'activating' pg and
                          my cluster status when back to HEALTH_OK and
                          the pg went back to normal making the cluster
                          appear healthy.
                      

                      Luckily for me we've not put the cluster into
                        production so I'll just blow away the pool and
                        recreate it.
                      

                      What
                          I've not yet figured out is how this happened.
                      

                      The
                          steps (I think) I took where: 
                      
                        
                          Run ceph-ansible
                              and  'default.rgw.data' pool
                              was created automatically.
                          I
                              think I then increased the pg count.
                          Create a new rule: ceph osd crush rule
                              create-replicated replicated_rule_hdd
                              default host hdd
                          Move
                              pool to new rule: ceph osd pool
                              set default.rgw.data crush_rule
                              replicated_rule_hdd
                        
                      
                      I don't know what the expected behaviour of
                        the set command is, so I'm planing to see if I
                        can recreate the problem on a test cluster to
                        see which part of the process created the
                        problem. Perhaps I should have first migrated to
                        the new rule before increasing the pgs.
                      

                      Regards,
                      Tom
                      

                        On Sat, Jan 20, 2018 at
                          10:30 PM, <peter.linder@xxxxxxxxxxxxxx>
                          wrote:

                          Hi all,

                            
                            I'm getting such weird problems when we for
                            instance re-add a server, add disks etc!
                            Most of the time some PGs end up in
                            "active+clean+remapped" mode, but today some
                            of them got stuck "activating" which meant
                            that some PGs were offline for a while. I'm
                            able to fix things, but the fix is so weird
                            that I'm wondering whats going on...

                            
                            Background is we have a pool (rep=3,min=2)
                            where for each pg we select 1 osd from a
                            server with only nvme-osds, and 2 osds from
                            servers with only hdd's. There are a total
                            of 9 servers, with 3 (1 nvme + 2 hdd) in 3
                            separate data centers. We always select
                            servers from different data centers (latency
                            is not an issue), so we would select for
                            instance dc2:nvme, dc1.hdd, dc3:hdd, in 3
                            separate permutations.

                            
                            Here is the relevant part of our crushmap. I
                            will explain layout and my fix (that I have
                            no idea why I'm doing) below it:

                            
                            hostgroup hg1-1 {

                                    id -30          # do not change
                            unnecessarily

                                    id -28 class nvme               # do
                            not change unnecessarily

                                    id -54 class hdd                # do
                            not change unnecessarily

                                    id -71 class ssd                # do
                            not change unnecessarily

                                    # weight 2.911

                                    alg straw2

                                    hash 0  # rjenkins1

                                    item storage11 weight 2.911

                            }

                            hostgroup hg1-2 {

                                    id -31          # do not change
                            unnecessarily

                                    id -29 class nvme               # do
                            not change unnecessarily

                                    id -55 class hdd                # do
                            not change unnecessarily

                                    id -73 class ssd                # do
                            not change unnecessarily

                                    # weight 65.789

                                    alg straw2

                                    hash 0  # rjenkins1

                                    item storage22 weight 65.789

                            }

                            hostgroup hg1-3 {

                                    id -32          # do not change
                            unnecessarily

                                    id -43 class nvme               # do
                            not change unnecessarily

                                    id -56 class hdd                # do
                            not change unnecessarily

                                    id -75 class ssd                # do
                            not change unnecessarily

                                    # weight 65.789

                                    alg straw2

                                    hash 0  # rjenkins1

                                    item storage23 weight 65.789

                            }

                            hostgroup hg2-1 {

                                    id -33          # do not change
                            unnecessarily

                                    id -45 class nvme               # do
                            not change unnecessarily

                                    id -58 class hdd                # do
                            not change unnecessarily

                                    id -78 class ssd                # do
                            not change unnecessarily

                                    # weight 2.911

                                    alg straw2

                                    hash 0  # rjenkins1

                                    item storage12 weight 2.911

                            }

                            hostgroup hg2-2 {

                                    id -34          # do not change
                            unnecessarily

                                    id -46 class nvme               # do
                            not change unnecessarily

                                    id -59 class hdd                # do
                            not change unnecessarily

                                    id -80 class ssd                # do
                            not change unnecessarily

                                    # weight 65.496

                                    alg straw2

                                    hash 0  # rjenkins1

                                    item storage21 weight 65.496

                            }

                            hostgroup hg2-3 {

                                    id -35          # do not change
                            unnecessarily

                                    id -47 class nvme               # do
                            not change unnecessarily

                                    id -60 class hdd                # do
                            not change unnecessarily

                                    id -81 class ssd                # do
                            not change unnecessarily

                                    # weight 65.789

                                    alg straw2

                                    hash 0  # rjenkins1

                                    item storage23 weight 65.789

                            }

                            hostgroup hg3-1 {

                                    id -36          # do not change
                            unnecessarily

                                    id -49 class nvme               # do
                            not change unnecessarily

                                    id -62 class hdd                # do
                            not change unnecessarily

                                    id -84 class ssd                # do
                            not change unnecessarily

                                    # weight 2.911

                                    alg straw2

                                    hash 0  # rjenkins1

                                    item storage13 weight 2.911

                            }

                            hostgroup hg3-2 {

                                    id -37          # do not change
                            unnecessarily

                                    id -50 class nvme               # do
                            not change unnecessarily

                                    id -63 class hdd                # do
                            not change unnecessarily

                                    id -85 class ssd                # do
                            not change unnecessarily

                                    # weight 65.496

                                    alg straw2

                                    hash 0  # rjenkins1

                                    item storage21 weight 65.496

                            }

                            hostgroup hg3-3 {

                                    id -38          # do not change
                            unnecessarily

                                    id -51 class nvme               # do
                            not change unnecessarily

                                    id -64 class hdd                # do
                            not change unnecessarily

                                    id -86 class ssd                # do
                            not change unnecessarily

                                    # weight 65.789

                                    alg straw2

                                    hash 0  # rjenkins1

                                    item storage22 weight 65.789

                            }

                            datacenter ldc1 {

                                    id -39          # do not change
                            unnecessarily

                                    id -44 class nvme               # do
                            not change unnecessarily

                                    id -57 class hdd                # do
                            not change unnecessarily

                                    id -76 class ssd                # do
                            not change unnecessarily

                                    # weight 134.489

                                    alg straw2

                                    hash 0  # rjenkins1

                                    item hg1-1 weight 65.496

                                    item hg1-2 weight 65.789

                                    item hg1-3 weight 65.789

                            }

                            datacenter ldc2 {

                                    id -40          # do not change
                            unnecessarily

                                    id -48 class nvme               # do
                            not change unnecessarily

                                    id -61 class hdd                # do
                            not change unnecessarily

                                    id -82 class ssd                # do
                            not change unnecessarily

                                    # weight 196.781

                                    alg straw2

                                    hash 0  # rjenkins1

                                    item hg2-1 weight 65.496

                                    item hg2-2 weight 65.496

                                    item hg2-3 weight 65.789

                            }

                            datacenter ldc3 {

                                    id -41          # do not change
                            unnecessarily

                                    id -52 class nvme               # do
                            not change unnecessarily

                                    id -65 class hdd                # do
                            not change unnecessarily

                                    id -87 class ssd                # do
                            not change unnecessarily

                                    # weight 197.197

                                    alg straw2

                                    hash 0  # rjenkins1

                                    item hg3-1 weight 65.912

                                    item hg3-2 weight 65.496

                                    item hg3-3 weight 65.789

                            }

                            root ldc {

                                    id -42          # do not change
                            unnecessarily

                                    id -53 class nvme               # do
                            not change unnecessarily

                                    id -66 class hdd                # do
                            not change unnecessarily

                                    id -88 class ssd                # do
                            not change unnecessarily

                            
                                    # weight 528.881

                                    alg straw2

                                    hash 0  # rjenkins1

                                    item ldc1 weight 97.489

                                    item ldc2 weight 97.196

                                    item ldc3 weight 97.196

                            }

                            
                            # rules

                            rule hybrid {

                                    id 1

                                    type replicated

                                    min_size 1

                                    max_size 10

                                    step take ldc

                                    step choose firstn 1 type datacenter

                                    step chooseleaf firstn 0 type
                            hostgroup

                                    step emit

                            }

                            
                            Ok, so there are 9 hostgroups (i changed
                            "type 2"). Each hostgroup currently holds 1
                            server, but may in the future hold more.
                            These are grouped in 3, and called a
                            "datacenter" even though the set is spread
                            out onto 3 physical data centers. These are
                            then put in a separate root called "ldc".

                            
                            The "hybrid" rule then proceeds to select 1
                            datacenter, and then 3 osds from that
                            datacenter. The end result is that 3 OSDs
                            from different physical datacenters are
                            selected, with 1 nvme and 2 hdd (hdds have
                            reduced primary affinity to 0.00099, and yes
                            this might be a problem?). If one datacenter
                            is lost, only 1/3'rd of the nvmes are in
                            fact offline so capacity loss is manageable
                            compared to having all nvme's in one
                            datacenter.

                            
                            Because nvmes are much smaller, after adding
                            one the "datacenter" looks like this:

                            
                                    item hg1-1 weight 2.911

                                    item hg1-2 weight 65.789

                                    item hg1-3 weight 65.789

                            
                            This causes PGs to go into
                            "active+clean+remapped" state forever. If I
                            manually change the weights so that they are
                            all almost the same, the problem goes away!
                            I would have though that the weights does
                            not matter, since we have to choose 3 of
                            these anyways. So I'm really confused over
                            this.

                            
                            Today I also had to change

                            
                                    item ldc1 weight 197.489

                                    item ldc2 weight 197.196

                                    item ldc3 weight 197.196

                            to

                                    item ldc1 weight 97.489

                                    item ldc2 weight 97.196

                                    item ldc3 weight 97.196

                            
                            or some PGs wouldn't activate at all! I'm
                            really not aware how the hashing/selection
                            process works though, it does somehow seem
                            that if the values are too far apart, things
                            seem to break. crushtool --test seems to
                            correctly calculate my PGs.

                            
                            Basically when this happens I just randomly
                            change some weights and most of the time it
                            starts working. Why?

                            
                            Regards,

                            Peter

                            
                            _______________________________________________

                            ceph-users mailing list

                            ceph-users@xxxxxxxxxxxxxx

                            http://lists.ceph.com/listinfo.cgi/ceph-users-ceph.com

                          
                        -- 

                        
                            Thomas Bennett

                            
                            SKA South Africa
                            Science Processing Team
                            

                            Office: +27 21 5067341
                            Mobile: +27 79 5237105
                          
                        
        -- 

        
            Thomas Bennett

            
            SKA South Africa
            Science Processing Team
            

            Office: +27 21 5067341
            Mobile: +27 79 5237105
          
        
_______________________________________________
ceph-users mailing list
ceph-users@xxxxxxxxxxxxxx
http://lists.ceph.com/listinfo.cgi/ceph-users-ceph.com