Re: ceph-fuse using excessive memory

Andras Pataki <apataki@xxxxxxxxxxxxxxxxxxxxx> · Wed, 5 Sep 2018 10:15:22 -0400

Below are the performance counters.  Some scientific workflows trigger 
this - some parts of them are quite data intensive - they process 
thousands of files over many hours to days.  The 200GB ceph-fuse got 
there in about 3 days.  I'm keeping the node alive for now in case we 
can extract some more definitive info on what is happening there.

Andras

# ceph daemon /var/run/ceph/ceph-client.admin.asok perf dump
{
    "AsyncMessenger::Worker-0": {
        "msgr_recv_messages": 37730,
        "msgr_send_messages": 37731,
        "msgr_recv_bytes": 1121379127,
        "msgr_send_bytes": 11913693154,
        "msgr_created_connections": 75333,
        "msgr_active_connections": 730,
        "msgr_running_total_time": 642.152166956,
        "msgr_running_send_time": 536.723862752,
        "msgr_running_recv_time": 25.429112242,
        "msgr_running_fast_dispatch_time": 63.814291954
    },
    "AsyncMessenger::Worker-1": {
        "msgr_recv_messages": 38507,
        "msgr_send_messages": 38467,
        "msgr_recv_bytes": 1240174043,
        "msgr_send_bytes": 11673685736,
        "msgr_created_connections": 75479,
        "msgr_active_connections": 729,
        "msgr_running_total_time": 628.670562086,
        "msgr_running_send_time": 523.772820969,
        "msgr_running_recv_time": 25.902871268,
        "msgr_running_fast_dispatch_time": 62.375965165
    },
    "AsyncMessenger::Worker-2": {
        "msgr_recv_messages": 597697,
        "msgr_send_messages": 504640,
        "msgr_recv_bytes": 1314713236,
        "msgr_send_bytes": 11880445442,
        "msgr_created_connections": 75338,
        "msgr_active_connections": 728,
        "msgr_running_total_time": 711.909282325,
        "msgr_running_send_time": 556.195748166,
        "msgr_running_recv_time": 127.267332682,
        "msgr_running_fast_dispatch_time": 62.209721085
    },
    "client": {
        "reply": {
            "avgcount": 236795,
            "sum": 6177.205536940,
            "avgtime": 0.026086722
        },
        "lat": {
            "avgcount": 236795,
            "sum": 6177.205536940,
            "avgtime": 0.026086722
        },
        "wrlat": {
            "avgcount": 857828153,
            "sum": 8413.835066735,
            "avgtime": 0.000009808
        }
    },
    "objectcacher-libcephfs": {
        "cache_ops_hit": 4160412,
        "cache_ops_miss": 4887,
        "cache_bytes_hit": 3247294145494,
        "cache_bytes_miss": 12914144260,
        "data_read": 48923557765,
        "data_written": 35292875783,
        "data_flushed": 35292681606,
        "data_overwritten_while_flushing": 0,
        "write_ops_blocked": 0,
        "write_bytes_blocked": 0,
        "write_time_blocked": 0.000000000
    },
    "objecter": {
        "op_active": 0,
        "op_laggy": 0,
        "op_send": 111268,
        "op_send_bytes": 35292681606,
        "op_resend": 0,
        "op_reply": 111268,
        "op": 111268,
        "op_r": 2193,
        "op_w": 109075,
        "op_rmw": 0,
        "op_pg": 0,
        "osdop_stat": 2,
        "osdop_create": 2,
        "osdop_read": 2193,
        "osdop_write": 109071,
        "osdop_writefull": 0,
        "osdop_writesame": 0,
        "osdop_append": 0,
        "osdop_zero": 0,
        "osdop_truncate": 0,
        "osdop_delete": 0,
        "osdop_mapext": 0,
        "osdop_sparse_read": 0,
        "osdop_clonerange": 0,
        "osdop_getxattr": 0,
        "osdop_setxattr": 0,
        "osdop_cmpxattr": 0,
        "osdop_rmxattr": 0,
        "osdop_resetxattrs": 0,
        "osdop_tmap_up": 0,
        "osdop_tmap_put": 0,
        "osdop_tmap_get": 0,
        "osdop_call": 0,
        "osdop_watch": 0,
        "osdop_notify": 0,
        "osdop_src_cmpxattr": 0,
        "osdop_pgls": 0,
        "osdop_pgls_filter": 0,
        "osdop_other": 0,
        "linger_active": 0,
        "linger_send": 0,
        "linger_resend": 0,
        "linger_ping": 0,
        "poolop_active": 0,
        "poolop_send": 0,
        "poolop_resend": 0,
        "poolstat_active": 0,
        "poolstat_send": 0,
        "poolstat_resend": 0,
        "statfs_active": 0,
        "statfs_send": 1348,
        "statfs_resend": 0,
        "command_active": 0,
        "command_send": 0,
        "command_resend": 0,
        "map_epoch": 1079783,
        "map_full": 0,
        "map_inc": 632,
        "osd_sessions": 2160,
        "osd_session_open": 226144,
        "osd_session_close": 223984,
        "osd_laggy": 0,
        "omap_wr": 0,
        "omap_rd": 0,
        "omap_del": 0
    },
    "throttle-msgr_dispatch_throttler-client": {
        "val": 0,
        "max": 104857600,
        "get_started": 0,
        "get": 673934,
        "get_sum": 3626395290,
        "get_or_fail_fail": 0,
        "get_or_fail_success": 673934,
        "take": 0,
        "take_sum": 0,
        "put": 673934,
        "put_sum": 3626395290,
        "wait": {
            "avgcount": 0,
            "sum": 0.000000000,
            "avgtime": 0.000000000
        }
    },
    "throttle-objecter_bytes": {
        "val": 0,
        "max": 104857600,
        "get_started": 0,
        "get": 0,
        "get_sum": 0,
        "get_or_fail_fail": 0,
        "get_or_fail_success": 0,
        "take": 111268,
        "take_sum": 38456168409,
        "put": 111264,
        "put_sum": 38456168409,
        "wait": {
            "avgcount": 0,
            "sum": 0.000000000,
            "avgtime": 0.000000000
        }
    },
    "throttle-objecter_ops": {
        "val": 0,
        "max": 1024,
        "get_started": 0,
        "get": 0,
        "get_sum": 0,
        "get_or_fail_fail": 0,
        "get_or_fail_success": 0,
        "take": 111268,
        "take_sum": 111268,
        "put": 111268,
        "put_sum": 111268,
        "wait": {
            "avgcount": 0,
            "sum": 0.000000000,
            "avgtime": 0.000000000
        }
    }
}

On 09/05/2018 10:00 AM, Sage Weil wrote:
On Wed, 5 Sep 2018, Andras Pataki wrote:
Hi cephers,

Every so often we have a ceph-fuse process that grows to rather large size (up
to eating up the whole memory of the machine).  Here is an example of a 200GB
RSS size ceph-fuse instance:

# ceph daemon /var/run/ceph/ceph-client.admin.asok dump_mempools
{
     "bloom_filter": {
         "items": 0,
         "bytes": 0
     },
     "bluestore_alloc": {
         "items": 0,
         "bytes": 0
     },
     "bluestore_cache_data": {
         "items": 0,
         "bytes": 0
     },
     "bluestore_cache_onode": {
         "items": 0,
         "bytes": 0
     },
     "bluestore_cache_other": {
         "items": 0,
         "bytes": 0
     },
     "bluestore_fsck": {
         "items": 0,
         "bytes": 0
     },
     "bluestore_txc": {
         "items": 0,
         "bytes": 0
     },
     "bluestore_writing_deferred": {
         "items": 0,
         "bytes": 0
     },
     "bluestore_writing": {
         "items": 0,
         "bytes": 0
     },
     "bluefs": {
         "items": 0,
         "bytes": 0
     },
     "buffer_anon": {
         "items": 51534897,
         "bytes": 207321872398
     },
     "buffer_meta": {
         "items": 64,
         "bytes": 5632
     },
     "osd": {
         "items": 0,
         "bytes": 0
     },
     "osd_mapbl": {
         "items": 0,
         "bytes": 0
     },
     "osd_pglog": {
         "items": 0,
         "bytes": 0
     },
     "osdmap": {
         "items": 28593,
         "bytes": 431872
     },
     "osdmap_mapping": {
         "items": 0,
         "bytes": 0
     },
     "pgmap": {
         "items": 0,
         "bytes": 0
     },
     "mds_co": {
         "items": 0,
         "bytes": 0
     },
     "unittest_1": {
         "items": 0,
         "bytes": 0
     },
     "unittest_2": {
         "items": 0,
         "bytes": 0
     },
     "total": {
         "items": 51563554,
         "bytes": 207322309902
     }
}

The general cache size looks like this (if it is helpful I can put a whole
cache dump somewhere):

# ceph daemon /var/run/ceph/ceph-client.admin.asok dump_cache | grep path | wc
-l
84085
# ceph daemon /var/run/ceph/ceph-client.admin.asok dump_cache | grep name | wc
-l
168186

Any ideas what 'buffer_anon' is and what could be eating up the 200GB of
RAM?
buffer_anon is memory consumed by the bufferlist class that hasn't been
explicitly put into a separate mempool category.  The question is
where/why are buffers getting pinned in memory.  Can you dump the
perfcounters?  That might give some hint.

My guess is a leak, or a problem with the ObjectCacher code that is
preventing it from timming older buffers.

How reproducible is the situation?  Any idea what workloads trigger it?

Thanks!
sage

We are running with a few ceph-fuse specific parameters increased in
ceph.conf:

    # Description:  Set the number of inodes that the client keeps in
    the metadata cache.
    # Default:      16384
    client_cache_size = 262144

    # Description:  Set the maximum number of dirty bytes in the object
    cache.
    # Default:      104857600 (100MB)
    client_oc_max_dirty = 536870912

    # Description:  Set the maximum number of objects in the object cache.
    # Default:      1000
    client_oc_max_objects = 8192

    # Description:  Set how many bytes of data will the client cache.
    # Default:      209715200 (200 MB)
    client_oc_size = 2147483640

    # Description:  Set the maximum number of bytes that the kernel
    reads ahead for future read operations. Overridden by the
    client_readahead_max_periods setting.
    # Default:      0 (unlimited)
    #client_readahead_max_bytes = 67108864

    # Description:  Set the number of file layout periods (object size *
    number of stripes) that the kernel reads ahead. Overrides the
    client_readahead_max_bytes setting.
    # Default:      4
    client_readahead_max_periods = 64

    # Description:  Set the minimum number bytes that the kernel reads
    ahead.
    # Default:      131072 (128KB)
    client_readahead_min = 4194304

We are running a 12.2.7 ceph cluster, and the cluster is otherwise healthy.

Any hints would be appreciated.  Thanks,

Andras

_______________________________________________
ceph-users mailing list
ceph-users@xxxxxxxxxxxxxx
http://lists.ceph.com/listinfo.cgi/ceph-users-ceph.com