Finally, the issue that has haunted me for quite some time turned out to be a ceph.conf issue:
I had
osd_pool_default_pg_num = 100
osd_pool_default_pgp_num = 100
osd_pool_default_pgp_num = 100
once I changed to
osd_pool_default_pg_num = 32
osd_pool_default_pgp_num = 32
osd_pool_default_pgp_num = 32
then no issue to start the second rgw process.
No idea why 32 works but 100 doesn't. The debug output is useless and log files too. Just insane.
Anyway, thanks.
On Fri, Jan 12, 2018 at 7:25 PM, Yehuda Sadeh-Weinraub <yehuda@xxxxxxxxxx> wrote:
The errors you're seeing there don't look like related to
elasticsearch. It's a generic radosgw related error that says that it
failed to reach the rados (ceph) backend. You can try bumping up the
messenger log (debug ms =1) and see if there's any hint in there.
Yehuda
On Fri, Jan 12, 2018 at 12:54 PM, Youzhong Yang <youzhong@xxxxxxxxx> wrote:
> So I did the exact same thing using Kraken and the same set of VMs, no
> issue. What is the magic to make it work in Luminous? Anyone lucky enough to
> have this RGW ElasticSearch working using Luminous?
>
> On Mon, Jan 8, 2018 at 10:26 AM, Youzhong Yang <youzhong@xxxxxxxxx> wrote:
>>
>> Hi Yehuda,
>>
>> Thanks for replying.
>>
>> >radosgw failed to connect to your ceph cluster. Does the rados command
>> >with the same connection params work?
>>
>> I am not quite sure what to do by running rados command to test.
>>
>> So I tried again, could you please take a look and check what could have
>> gone wrong?
>>
>> Here are what I did:
>>
>> **** On ceph admin node, I removed installation on ceph-rgw1 and
>> ceph-rgw2, reinstalled rgw on ceph-rgw1, stoped rgw service, removed all rgw
>> pools. Elasticsearch is running on ceph-rgw2 node on port 9200.
>>
>> ceph-deploy purge ceph-rgw1
>> ceph-deploy purge ceph-rgw2
>> ceph-deploy purgedata ceph-rgw2
>> ceph-deploy purgedata ceph-rgw1
>> ceph-deploy install --release luminous ceph-rgw1
>> ceph-deploy admin ceph-rgw1
>> ceph-deploy rgw create ceph-rgw1
>> ssh ceph-rgw1 sudo systemctl stop ceph-radosgw@xxxxxxxx-rgw1
>> rados rmpool default.rgw.log default.rgw.log --yes-i-really-really-mean-it
>> rados rmpool default.rgw.meta default.rgw.meta
>> --yes-i-really-really-mean-it
>> rados rmpool default.rgw.control default.rgw.control
>> --yes-i-really-really-mean-it
>> rados rmpool .rgw.root .rgw.root --yes-i-really-really-mean-it
>>
>> **** On ceph-rgw1 node:
>>
>> export RGWHOST="ceph-rgw1"
>> export ELASTICHOST="ceph-rgw2"
>> export REALM="demo"
>> export ZONEGRP="zone1"
>> export ZONE1="zone1-a"
>> export ZONE2="zone1-b"
>> export SYNC_AKEY="$( cat /dev/urandom | tr -dc 'a-zA-Z0-9' | fold -w 20 |
>> head -n 1 )"
>> export SYNC_SKEY="$( cat /dev/urandom | tr -dc 'a-zA-Z0-9' | fold -w 40 |
>> head -n 1 )"
>>
>> radosgw-admin realm create --rgw-realm=${REALM} --default
>> radosgw-admin zonegroup create --rgw-realm=${REALM}
>> --rgw-zonegroup=${ZONEGRP} --endpoints=http://${RGWHOST}:8000 --master
>> --default
>> radosgw-admin zone create --rgw-realm=${REALM} --rgw-zonegroup=${ZONEGRP}
>> --rgw-zone=${ZONE1} --endpoints=http://${RGWHOST}:8000
>> --access-key=${SYNC_AKEY} --secret=${SYNC_SKEY} --master --default
>> radosgw-admin user create --uid=sync --display-name="zone sync"
>> --access-key=${SYNC_AKEY} --secret=${SYNC_SKEY} --system
>> radosgw-admin period update --commit
>> sudo systemctl start ceph-radosgw@rgw.${RGWHOST}
>>
>> radosgw-admin zone create --rgw-realm=${REALM} --rgw-zonegroup=${ZONEGRP}
>> --rgw-zone=${ZONE2} --access-key=${SYNC_AKEY} --secret=${SYNC_SKEY}
>> --endpoints=http://${RGWHOST}:8002
>> radosgw-admin zone modify --rgw-realm=${REALM} --rgw-zonegroup=${ZONEGRP}
>> --rgw-zone=${ZONE2} --tier-type=elasticsearch
>> --tier-config=endpoint=http://${ELASTICHOST}:9200,num_ replicas=1,num_shards=10
>> radosgw-admin period update --commit
>>
>> sudo systemctl restart ceph-radosgw@rgw.${RGWHOST}
>> sudo radosgw --keyring /etc/ceph/ceph.client.admin.keyring -f
>> --rgw-zone=${ZONE2} --rgw-frontends="civetweb port=8002"
>> 2018-01-08 00:21:54.389432 7f0fe9cd2e80 -1 Couldn't init storage provider
>> (RADOS)
>>
>> **** As you can see, starting rgw on port 8002 failed, but rgw on port
>> 8000 was started successfully.
>> **** Here are some more info which may be useful for diagnosis:
>>
>> $ cat /etc/ceph/ceph.conf
>> [global]
>> fsid = 3e5a32d4-e45e-48dd-a3c5-f6f28fef8edf
>> mon_initial_members = ceph-mon1, ceph-osd1, ceph-osd2, ceph-osd3
>> mon_host = 172.30.212.226,172.30.212.227,172.30.212.228,172.30.212.250
>> auth_cluster_required = cephx
>> auth_service_required = cephx
>> auth_client_required = cephx
>> osd_pool_default_size = 2
>> osd_pool_default_min_size = 2
>> osd_pool_default_pg_num = 100
>> osd_pool_default_pgp_num = 100
>> bluestore_compression_algorithm = zlib
>> bluestore_compression_mode = force
>> rgw_max_put_size = 21474836480
>> [osd]
>> osd_max_object_size = 1073741824
>> [mon]
>> mon_allow_pool_delete = true
>> [client.rgw.ceph-rgw1]
>> host = ceph-rgw1
>> rgw frontends = civetweb port=8000
>>
>> $ wget -O - -q http://ceph-rgw2:9200/
>> {
>> "name" : "Hippolyta",
>> "cluster_name" : "elasticsearch",
>> "version" : {
>> "number" : "2.3.1",
>> "build_hash" : "bd980929010aef404e7cb0843e61d0 665269fc39",
>> "build_timestamp" : "2016-04-04T12:25:05Z",
>> "build_snapshot" : false,
>> "lucene_version" : "5.5.0"
>> },
>> "tagline" : "You Know, for Search"
>> }
>>
>> $ ceph df
>> GLOBAL:
>> SIZE AVAIL RAW USED %RAW USED
>> 719G 705G 14473M 1.96
>> POOLS:
>> NAME ID USED %USED MAX AVAIL
>> OBJECTS
>> .rgw.root 17 6035 0 333G
>> 19
>> zone1-a.rgw.control 18 0 0 333G
>> 8
>> zone1-a.rgw.meta 19 350 0 333G
>> 2
>> zone1-a.rgw.log 20 50 0 333G
>> 176
>> zone1-b.rgw.control 21 0 0 333G
>> 8
>> zone1-b.rgw.meta 22 0 0 333G
>> 0
>>
>> $ rados df
>> POOL_NAME USED OBJECTS CLONES COPIES MISSING_ON_PRIMARY UNFOUND
>> DEGRADED RD_OPS RD WR_OPS WR
>> .rgw.root 6035 19 0 38 0 0
>> 0 817 553k 55 37888
>> zone1-a.rgw.control 0 8 0 16 0 0
>> 0 0 0 0 0
>> zone1-a.rgw.log 50 176 0 352 0 0
>> 0 3703 3522k 2451 1024
>> zone1-a.rgw.meta 350 2 0 4 0 0
>> 0 9 7168 3 2048
>> zone1-b.rgw.control 0 8 0 16 0 0
>> 0 0 0 0 0
>> zone1-b.rgw.meta 0 0 0 0 0 0
>> 0 0 0 0 0
>>
>> total_objects 213
>> total_used 14473M
>> total_avail 705G
>> total_space 719G
>>
>> $ radosgw-admin zonegroup get
>> {
>> "id": "80a70f06-3b8f-458a-b2fc-6ea648871891",
>> "name": "zone1",
>> "api_name": "zone1",
>> "is_master": "true",
>> "endpoints": [
>> "http://ceph-rgw1:8000"
>> ],
>> "hostnames": [],
>> "hostnames_s3website": [],
>> "master_zone": "0f265e95-d12b-43af-a60c-3685ee31d267",
>> "zones": [
>> {
>> "id": "0f265e95-d12b-43af-a60c-3685ee31d267",
>> "name": "zone1-a",
>> "endpoints": [
>> "http://ceph-rgw1:8000"
>> ],
>> "log_meta": "false",
>> "log_data": "true",
>> "bucket_index_max_shards": 0,
>> "read_only": "false",
>> "tier_type": "",
>> "sync_from_all": "true",
>> "sync_from": []
>> },
>> {
>> "id": "37c249e7-5159-4838-b287-a3641df6f1fb",
>> "name": "zone1-b",
>> "endpoints": [
>> "http://ceph-rgw1:8002"
>> ],
>> "log_meta": "false",
>> "log_data": "true",
>> "bucket_index_max_shards": 0,
>> "read_only": "false",
>> "tier_type": "elasticsearch",
>> "sync_from_all": "true",
>> "sync_from": []
>> }
>> ],
>> "placement_targets": [
>> {
>> "name": "default-placement",
>> "tags": []
>> }
>> ],
>> "default_placement": "default-placement",
>> "realm_id": "b9e5c955-a8d9-4a2a-bd0a-9b786269e909"
>> }
>>
>> $ radosgw-admin zone get --rgw-zone=zone1-a
>> {
>> "id": "0f265e95-d12b-43af-a60c-3685ee31d267",
>> "name": "zone1-a",
>> "domain_root": "zone1-a.rgw.meta:root",
>> "control_pool": "zone1-a.rgw.control",
>> "gc_pool": "zone1-a.rgw.log:gc",
>> "lc_pool": "zone1-a.rgw.log:lc",
>> "log_pool": "zone1-a.rgw.log",
>> "intent_log_pool": "zone1-a.rgw.log:intent",
>> "usage_log_pool": "zone1-a.rgw.log:usage",
>> "reshard_pool": "zone1-a.rgw.log:reshard",
>> "user_keys_pool": "zone1-a.rgw.meta:users.keys",
>> "user_email_pool": "zone1-a.rgw.meta:users.email",
>> "user_swift_pool": "zone1-a.rgw.meta:users.swift",
>> "user_uid_pool": "zone1-a.rgw.meta:users.uid",
>> "system_key": {
>> "access_key": "BoBwc6kUhr5L5GXZSKu0",
>> "secret_key": "TPOQcoZf9ZDtb8pRB7UsbhkxPdLRGE 0ruSBSBEuJ"
>> },
>> "placement_pools": [
>> {
>> "key": "default-placement",
>> "val": {
>> "index_pool": "zone1-a.rgw.buckets.index",
>> "data_pool": "zone1-a.rgw.buckets.data",
>> "data_extra_pool": "zone1-a.rgw.buckets.non-ec",
>> "index_type": 0,
>> "compression": ""
>> }
>> }
>> ],
>> "metadata_heap": "",
>> "tier_config": [],
>> "realm_id": ""
>> }
>>
>> $ radosgw-admin zone get --rgw-zone=zone1-b
>> {
>> "id": "37c249e7-5159-4838-b287-a3641df6f1fb",
>> "name": "zone1-b",
>> "domain_root": "zone1-b.rgw.meta:root",
>> "control_pool": "zone1-b.rgw.control",
>> "gc_pool": "zone1-b.rgw.log:gc",
>> "lc_pool": "zone1-b.rgw.log:lc",
>> "log_pool": "zone1-b.rgw.log",
>> "intent_log_pool": "zone1-b.rgw.log:intent",
>> "usage_log_pool": "zone1-b.rgw.log:usage",
>> "reshard_pool": "zone1-b.rgw.log:reshard",
>> "user_keys_pool": "zone1-b.rgw.meta:users.keys",
>> "user_email_pool": "zone1-b.rgw.meta:users.email",
>> "user_swift_pool": "zone1-b.rgw.meta:users.swift",
>> "user_uid_pool": "zone1-b.rgw.meta:users.uid",
>> "system_key": {
>> "access_key": "BoBwc6kUhr5L5GXZSKu0",
>> "secret_key": "TPOQcoZf9ZDtb8pRB7UsbhkxPdLRGE 0ruSBSBEuJ"
>> },
>> "placement_pools": [
>> {
>> "key": "default-placement",
>> "val": {
>> "index_pool": "zone1-b.rgw.buckets.index",
>> "data_pool": "zone1-b.rgw.buckets.data",
>> "data_extra_pool": "zone1-b.rgw.buckets.non-ec",
>> "index_type": 0,
>> "compression": ""
>> }
>> }
>> ],
>> "metadata_heap": "",
>> "tier_config": [
>> {
>> "key": "endpoint",
>> "val": "http://ceph-rgw2:9200"
>> },
>> {
>> "key": "num_replicas",
>> "val": "1"
>> },
>> {
>> "key": "num_shards",
>> "val": "10"
>> }
>> ],
>> "realm_id": "b9e5c955-a8d9-4a2a-bd0a-9b786269e909"
>> }
>>
>> # radosgw --keyring /etc/ceph/ceph.client.admin.keyring -f
>> --rgw-zone=zone1-b --rgw-frontends="civetweb port=8002" --debug-rgw=255
>> 2018-01-08 09:54:53.118526 7f681a211e80 -1 Couldn't init storage provider
>> (RADOS)
>>
>> # tail -12 /var/log/ceph/ceph-client.admin.log
>> 2018-01-08 09:54:52.914513 7f681a211e80 20 zone zone1-b
>> 2018-01-08 09:54:52.919424 7f681a211e80 20 add_watcher() i=0
>> 2018-01-08 09:54:52.924267 7f681a211e80 20 add_watcher() i=1
>> 2018-01-08 09:54:52.928440 7f681a211e80 20 add_watcher() i=2
>> 2018-01-08 09:54:52.933062 7f681a211e80 20 add_watcher() i=3
>> 2018-01-08 09:54:52.937396 7f681a211e80 20 add_watcher() i=4
>> 2018-01-08 09:54:52.942317 7f681a211e80 20 add_watcher() i=5
>> 2018-01-08 09:54:52.946186 7f681a211e80 20 add_watcher() i=6
>> 2018-01-08 09:54:52.950883 7f681a211e80 20 add_watcher() i=7
>> 2018-01-08 09:54:52.950904 7f681a211e80 2 all 8 watchers are set,
>> enabling cache
>> 2018-01-08 09:54:52.950930 7f681a211e80 20 generating connection object
>> for zone zone1-a id 0f265e95-d12b-43af-a60c-3685ee31d267
>> 2018-01-08 09:54:53.118526 7f681a211e80 -1 Couldn't init storage provider
>> (RADOS)
>>
>> Thanks very much,
>>
>> --Youzhong
>>
>
_______________________________________________ ceph-users mailing list ceph-users@xxxxxxxxxxxxxx http://lists.ceph.com/listinfo.cgi/ceph-users-ceph.com