Hi,
I need your help with upgrading our cluster from Hammer (last version) to Jewel 10.2.5 without loosing write access to Radosgw.
We have a fairly large cluster (4.3 PB raw) mostly used to store large S3 objects, and we currently have more than 500 TB of data in the ".rgw.buckets" pool, so I'm very cautious about upgrading it to Jewel.
The plan is to upgrade Ceph-mon and Radosgw to 10.2.5, while keeping the OSD nodes on Hammer, then slowly update them as well.
I am currently testing the upgrade procedure in a lab environment, but once I update ceph-mon and radosgw to Jewel, I cannot upload files into new or existing buckets anymore, but I can still create new buckets.
I read [1], [2], [3] and [4] and even ran the script in [4] as it can be seen below, but still cannot upload new objects.
I was hoping that if I wait long enough to update from Hammer to Jewel, most of the big issues will be solved by point releases, but it seems that I'm doing something wrong, probably because of lack of up to date documentation.
After the update to Jewel, this is how things look in my test environment.
root@ceph-mon1:~# radosgw zonegroup get
root@ceph-mon1:~# radosgw-admin period get
period init failed: (2) No such file or directory
2017-01-25 10:13:06.941018 7f98f0d13900 0 RGWPeriod::init failed to init realm id : (2) No such file or directory
root@ceph-mon1:~# radosgw-admin zonegroup get
failed to init zonegroup: (2) No such file or directory
root@ceph-mon1:~# ceph --version
ceph version 10.2.5 (c461ee19ecbc0c5c330aca20f7392c9a00730367)
root@ceph-mon1:~# radosgw-admin realm list
{
"default_info": "",
"realms": []
}
root@ceph-mon1:~# radosgw-admin period list
{
"periods": []
}
root@ceph-mon1:~# radosgw-admin period get
period init failed: (2) No such file or directory
2017-01-25 12:26:07.217986 7f97ca82e900 0 RGWPeriod::init failed to init realm id : (2) No such file or directory
root@ceph-mon1:~# radosgw-admin zonegroup get --rgw-zonegroup=default
{
"id": "default",
"name": "default",
"api_name": "",
"is_master": "true",
"endpoints": [],
"hostnames": [],
"hostnames_s3website": [],
"master_zone": "default",
"zones": [
{
"id": "default",
"name": "default",
"endpoints": [],
"log_meta": "false",
"log_data": "false",
"bucket_index_max_shards": 0,
"read_only": "false"
}
],
"placement_targets": [
{
"name": "default-placement",
"tags": []
}
],
"default_placement": "default-placement",
"realm_id": ""
}
root@ceph-mon1:~# radosgw-admin zone get --zone-id=default
{
"id": "default",
"name": "default",
"domain_root": ".rgw",
"control_pool": ".rgw.control",
"gc_pool": ".rgw.gc",
"log_pool": ".log",
"intent_log_pool": ".intent-log",
"usage_log_pool": ".usage",
"user_keys_pool": ".users",
"user_email_pool": ".users.email",
"user_swift_pool": ".users.swift",
"user_uid_pool": ".users.uid",
"system_key": {
"access_key": "",
"secret_key": ""
},
"placement_pools": [
{
"key": "default-placement",
"val": {
"index_pool": ".rgw.buckets.index",
"data_pool": ".rgw.buckets",
"data_extra_pool": ".rgw.buckets.extra",
"index_type": 0
}
}
],
"metadata_heap": ".rgw.meta",
"realm_id": ""
}
root@ceph-mon1:~# rados df
pool name KB objects clones degraded unfound rd rd KB wr wr KB
.log 0 127 0 0 0 41402 41275 41402 0
.rgw 4 14 0 0 0 147 117 35 14
.rgw.buckets 11635 4 0 0 0 4 4969 38 11637
.rgw.buckets.index 0 56 0 0 0 1871 1815 119 0
.rgw.control 0 8 0 0 0 0 0 0 0
.rgw.gc 0 32 0 0 0 5214 5182 3519 0
.rgw.meta 2 8 0 0 0 0 0 20 8
.rgw.root 2 4 0 0 0 72 48 12 8
.usage 0 2 0 0 0 87 87 174 0
.users.uid 1 4 0 0 0 104 96 44 2
rbd 0 0 0 0 0 0 0 0 0
total used 432024 259
total avail 84622260
total space 85054284
root@ceph-mon1:~# ceph -s
cluster XXX
health HEALTH_OK
monmap e1: 3 mons at {ceph-mon1=172.25.12.15:6789/0,ceph-mon2=172.25.12.16:6789/0,ceph-mon3=172.25.12.17:6789/0}
election epoch 68, quorum 0,1,2 ceph-mon1,ceph-mon2,ceph-mon3
osdmap e212: 9 osds: 9 up, 9 in
pgmap v8162: 1344 pgs, 11 pools, 11640 kB data, 259 objects
421 MB used, 82638 MB / 83060 MB avail
1344 active+clean
root@ceph-mon1:~# /etc/init.d/radosgw stop
root@ceph-mon1:~# cat fix_rgw.sh
#!/bin/sh
set -x
RADOSGW_ADMIN=radosgw-admin
echo "Exercise initialization code"
$RADOSGW_ADMIN user info --uid=foo # exercise init code (???)
echo "Get default zonegroup"
$RADOSGW_ADMIN zonegroup get --rgw-zonegroup=default | sed 's/"id":.*/"id": "default",/g' | sed 's/"master_zone.*/"master_zone": "default",/g' > default-zg.json
echo "Get default zone"
$RADOSGW_ADMIN zone get --zone-id=default > default-zone.json
echo "Creating realm"
$RADOSGW_ADMIN realm create --rgw-realm=myrealm
echo "Creating default zonegroup"
$RADOSGW_ADMIN zonegroup set --rgw-zonegroup=default < default-zg.json
echo "Creating default zone"
$RADOSGW_ADMIN zone set --rgw-zone=default < default-zone.json
echo "Setting default zonegroup to 'default'"
$RADOSGW_ADMIN zonegroup default --rgw-zonegroup=default
echo "Setting default zone to 'default'"
$RADOSGW_ADMIN zone default --rgw-zone=default
root@ceph-mon1:~# chmod +x fix_rgw.sh
root@ceph-mon1:~# ./fix_rgw.sh
+ RADOSGW_ADMIN=radosgw-admin
+ echo Exercise initialization code
Exercise initialization code
+ radosgw-admin user info --uid=foo
could not fetch user info: no user info saved
+ echo Get default zonegroup
Get default zonegroup
+ sed s/"master_zone.*/"master_zone": "default",/g
+ sed s/"id":.*/"id": "default",/g
+ radosgw-admin zonegroup get --rgw-zonegroup=default
+ echo Get default zone
Get default zone
+ radosgw-admin zone get --zone-id=default
+ echo Creating realm
Creating realm
+ radosgw-admin realm create --rgw-realm=myrealm
{
"id": "7c95f6f4-d437-45c0-bc4d-12f17f7ca4ca",
"name": "myrealm",
"current_period": "a79d06d5-4836-4f2b-ab3d-961d62a00815",
"epoch": 1
}
+ echo Creating default zonegroup
Creating default zonegroup
+ radosgw-admin zonegroup set --rgw-zonegroup=default
{
"id": "default",
"name": "default",
"api_name": "",
"is_master": "true",
"endpoints": [],
"hostnames": [],
"hostnames_s3website": [],
"master_zone": "default",
"zones": [
{
"id": "default",
"name": "default",
"endpoints": [],
"log_meta": "false",
"log_data": "false",
"bucket_index_max_shards": 0,
"read_only": "false"
}
],
"placement_targets": [
{
"name": "default-placement",
"tags": []
}
],
"default_placement": "default-placement",
"realm_id": "7c95f6f4-d437-45c0-bc4d-12f17f7ca4ca"
}
+ echo Creating default zone
Creating default zone
+ radosgw-admin zone set --rgw-zone=default
zone id default{
"id": "default",
"name": "default",
"domain_root": ".rgw",
"control_pool": ".rgw.control",
"gc_pool": ".rgw.gc",
"log_pool": ".log",
"intent_log_pool": ".intent-log",
"usage_log_pool": ".usage",
"user_keys_pool": ".users",
"user_email_pool": ".users.email",
"user_swift_pool": ".users.swift",
"user_uid_pool": ".users.uid",
"system_key": {
"access_key": "",
"secret_key": ""
},
"placement_pools": [
{
"key": "default-placement",
"val": {
"index_pool": ".rgw.buckets.index",
"data_pool": ".rgw.buckets",
"data_extra_pool": ".rgw.buckets.extra",
"index_type": 0
}
}
],
"metadata_heap": ".rgw.meta",
"realm_id": "7c95f6f4-d437-45c0-bc4d-12f17f7ca4ca"
}
+ echo Setting default zonegroup to 'default'
Setting default zonegroup to 'default'
+ radosgw-admin zonegroup default --rgw-zonegroup=default
+ echo Setting default zone to 'default'
Setting default zone to 'default'
+ radosgw-admin zone default --rgw-zone=default
root@ceph-mon1:~# radosgw-admin zonegroup get
{
"id": "default",
"name": "default",
"api_name": "",
"is_master": "true",
"endpoints": [],
"hostnames": [],
"hostnames_s3website": [],
"master_zone": "default",
"zones": [
{
"id": "default",
"name": "default",
"endpoints": [],
"log_meta": "false",
"log_data": "false",
"bucket_index_max_shards": 0,
"read_only": "false"
}
],
"placement_targets": [
{
"name": "default-placement",
"tags": []
}
],
"default_placement": "default-placement",
"realm_id": "7c95f6f4-d437-45c0-bc4d-12f17f7ca4ca"
}
After running the script:
root@ceph-mon1:/var/log/ceph# radosgw-admin zonegroup get
{
"id": "default",
"name": "default",
"api_name": "",
"is_master": "true",
"endpoints": [],
"hostnames": [],
"hostnames_s3website": [],
"master_zone": "default",
"zones": [
{
"id": "default",
"name": "default",
"endpoints": [],
"log_meta": "false",
"log_data": "false",
"bucket_index_max_shards": 0,
"read_only": "false"
}
],
"placement_targets": [
{
"name": "default-placement",
"tags": []
}
],
"default_placement": "default-placement",
"realm_id": "7c95f6f4-d437-45c0-bc4d-12f17f7ca4ca"
}
root@ceph-mon1:~# radosgw-admin zone get
{
"id": "default",
"name": "default",
"domain_root": ".rgw",
"control_pool": ".rgw.control",
"gc_pool": ".rgw.gc",
"log_pool": ".log",
"intent_log_pool": ".intent-log",
"usage_log_pool": ".usage",
"user_keys_pool": ".users",
"user_email_pool": ".users.email",
"user_swift_pool": ".users.swift",
"user_uid_pool": ".users.uid",
"system_key": {
"access_key": "",
"secret_key": ""
},
"placement_pools": [
{
"key": "default-placement",
"val": {
"index_pool": ".rgw.buckets.index",
"data_pool": ".rgw.buckets",
"data_extra_pool": ".rgw.buckets.extra",
"index_type": 0
}
}
],
"metadata_heap": ".rgw.meta",
"realm_id": "7c95f6f4-d437-45c0-bc4d-12f17f7ca4ca"
}
root@ceph-mon1:~# vi /etc/ceph/ceph.conf -> enabled debug for rgw
root@ceph-mon1:~# /etc/init.d/radosgw start
Starting client.radosgw.ceph-mon1...
The log file with debugging enabled for the failed upload request is uploaded at http://pastebin.com/1eLfrazn
I would appreciate any help with this as I spent a lot of time trying different things without any progress so far.
Thank you,
George
[1] http://lists.opennebula.org/pipermail/ceph-users-ceph.com/2016-July/011797.html
[2] http://robbat2.livejournal.com/242849.html
[3] http://www.spinics.net/lists/ceph-users/msg28100.html
[4] http://lists.ceph.com/pipermail/ceph-users-ceph.com/2016-July/011157.html
_______________________________________________ ceph-users mailing list ceph-users@xxxxxxxxxxxxxx http://lists.ceph.com/listinfo.cgi/ceph-users-ceph.com