Hi All, I took a old version of code that was originally written by Mike Christie and started testing it. Found some issues, fixed them, added some additional logic for completeness. Here is the code in its current state. Please provide me with your feedback. Also, find the attached diagram to see the current behavior of the hardware handler. Thanks, chandra /* * Engenio/LSI RDAC DM HW handler * * Copyright (C) 2005 Mike Christie. All rights reserved. * Copyright (C) Chandra Seetharaman, IBM Corp. 2007 * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 2 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with this program; if not, write to the Free Software * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. * */ #include <scsi/scsi.h> #include <scsi/scsi_cmnd.h> #include "dm.h" #include "dm-hw-handler.h" #define RDAC_DM_HWH_NAME "rdac" #define RDAC_DM_HWH_VER "0.4" /* * LSI mode page stuff * * These struct definitions and the forming of the * mode page were taken from the LSI RDAC 2.4 GPL'd * driver, and then converted to Linux conventions. */ #define RDAC_QUIESCENCE_TIME 20; /* * Page Codes */ #define RDAC_PAGE_CODE_REDUNDANT_CONTROLLER 0x2c /* * Controller modes definitions */ #define RDAC_MODE_TRANSFER_ALL_LUNS 0x01 #define RDAC_MODE_TRANSFER_SPECIFIED_LUNS 0x02 /* * RDAC Options field */ #define RDAC_FORCED_QUIESENCE 0x02 /* * default value TODO - make configurable */ #define RDAC_FAILOVER_TIMEOUT (60 * HZ) struct rdac_mode_6_hdr { u8 data_len; u8 medium_type; u8 device_params; u8 block_desc_len; }; struct rdac_mode_10_hdr { u16 data_len; u8 medium_type; u8 device_params; u16 reserved; u16 block_desc_len; }; struct rdac_mode_common { u8 controller_serial[16]; u8 alt_controller_serial[16]; u8 rdac_mode[2]; u8 alt_rdac_mode[2]; u8 quiescence_timeout; u8 rdac_options; }; struct rdac_pg_legacy { struct rdac_mode_6_hdr hdr; u8 page_code; u8 page_len; struct rdac_mode_common common; #define MODE6_MAX_LUN 32 u8 lun_table[MODE6_MAX_LUN]; u8 reserved2[32]; u8 reserved3; u8 reserved4; }; struct rdac_pg_expanded { struct rdac_mode_10_hdr hdr; u8 page_code; u8 subpage_code; u8 page_len[2]; struct rdac_mode_common common; u8 lun_table[256]; u8 reserved3; u8 reserved4; }; struct c9_inquiry { u8 peripheral_info; u8 page_code; /* 0xC9 */ u8 reserved1; u8 page_len; u8 page_id[4]; /* "vace" */ u8 avte_cvp; u8 path_prio; u8 reserved2[38]; }; #define SUBSYS_ID_LEN 16 #define SLOT_ID_LEN 2 struct c4_inquiry { u8 peripheral_info; u8 page_code; /* 0xC4 */ u8 reserved1; u8 page_len; u8 page_id[4]; /* "subs" */ u8 subsys_id[SUBSYS_ID_LEN]; u8 revision[4]; u8 slot_id[SUBSYS_ID_LEN]; u8 reserved[2]; }; struct rdac_handler { unsigned use_10_ms; unsigned timeout; struct rdac_controller *ctlr; #define UNINITIALIZED_LUN (1 << 8) #define UNSUPPORTED_LUN (2 << 8) unsigned lun; unsigned char sense[SCSI_SENSE_BUFFERSIZE]; }; struct rdac_private { struct rdac_handler *h; struct path *path; struct list_head entry; /* list of all controllers */ }; struct rdac_controller { u8 subsys_id[SUBSYS_ID_LEN]; u8 slot_id[SLOT_ID_LEN]; struct kref kref; struct list_head node; /* list of all controllers */ spinlock_t lock; int submitted; struct list_head cmd_list; /* list of commands to be submitted */ }; struct c8_inquiry { u8 peripheral_info; u8 page_code; /* 0xC8 */ u8 reserved1; u8 page_len; u8 page_id[4]; /* "edid" */ u8 reserved2[3]; u8 vol_uniq_id_len; u8 vol_uniq_id[16]; u8 vol_user_label_len; u8 vol_user_label[60]; u8 array_uniq_id_len; u8 array_unique_id[16]; u8 array_user_label_len; u8 array_user_label[60]; u8 lun[8]; }; static LIST_HEAD(ctlr_list); static spinlock_t list_lock = SPIN_LOCK_UNLOCKED; static inline void free_bio(struct bio *bio) { __free_page(bio->bi_io_vec[0].bv_page); bio_put(bio); } static void submit_inquiry(struct rdac_handler *, struct path *, int, unsigned int, bio_end_io_t); static int c9_inquiry_endio(struct bio *, unsigned int, int); #define submit_c9_inquiry(h, path) \ submit_inquiry(h, path, 0xC9, sizeof(struct c9_inquiry), \ c9_inquiry_endio) #define submit_c4_inquiry(h, path) \ submit_inquiry(h, path, 0xC4, sizeof(struct c4_inquiry), \ c4_inquiry_endio) #define submit_c8_inquiry(h, path) \ submit_inquiry(h, path, 0xC8, sizeof(struct c8_inquiry), \ c8_inquiry_endio) static void rdac_resubmit_all(struct rdac_handler *h) { struct rdac_private *tmp, *p; struct rdac_controller *ctlr = h->ctlr; spin_lock(&ctlr->lock); list_for_each_entry_safe(p, tmp, &ctlr->cmd_list, entry) { submit_c9_inquiry(p->h, p->path); list_del(&p->entry); kfree(p); } ctlr->submitted = 0; spin_unlock(&ctlr->lock); } static int mode_select_endio(struct bio *bio, unsigned int done, int error) { struct rdac_private *p = bio->bi_private; struct rdac_handler *h = p->h; struct path *path = p->path; int sense = bio_sense_value(bio); kfree(p); if (bio->bi_size) return 1; /* If it is retryable failure, submit the c9 inquiry again */ if (sense == 0x59136 || sense == 0x68b02 || sense == 0xb8b02) { /* 0x59136 - Command lock contention * 0x[6b]8b02 - Quiesense in progress or achieved */ submit_c9_inquiry(h, path); goto done; } if (sense) DMINFO("dm-rdac: MODE_SELECT failed on %s with sense 0x%x", path->dev->name, sense); if (error || sense) dm_pg_init_complete(path, MP_FAIL_PATH); else dm_pg_init_complete(path, 0); done: rdac_resubmit_all(h); /* request is freed in block layer */ free_bio(bio); return 0; } static struct bio *get_rdac_bio(struct path *path, unsigned data_size, bio_end_io_t endio, int rw, struct rdac_handler *h) { struct bio *bio; struct page *page; struct rdac_private *p; bio = bio_alloc(GFP_ATOMIC, 1); if (!bio) return NULL; if (rw == WRITE) bio->bi_rw |= (1 << BIO_RW); bio->bi_bdev = path->dev->bdev; bio->bi_sector = 0; bio->bi_end_io = endio; p = kmalloc(sizeof(*p), GFP_ATOMIC); if (!p) goto bio; p->path = path; p->h = h; bio->bi_private = p; page = alloc_page(GFP_ATOMIC); if (!page) goto free_private; if (bio_add_page(bio, page, data_size, 0) == data_size) return bio; __free_page(page); free_private: kfree(p); bio: bio_put(bio); return NULL; } static struct request *get_rdac_req(struct rdac_handler *h, struct bio *bio, struct path *path, int rw) { struct request *rq; struct block_device *bdev = bio->bi_bdev; struct request_queue *q = bdev_get_queue(bdev); rq = blk_get_request(q, rw, GFP_ATOMIC); if (!rq) { DMINFO("dm-rdac: get_failover_req: blk_get_request failed"); return NULL; } rq->bio = rq->biotail = bio; blk_rq_bio_prep(q, rq, bio); rq->rq_disk = bdev->bd_contains->bd_disk; /* bio backed don't set data */ rq->buffer = rq->data = NULL; /* rq data_len used for pc cmd's request_bufflen */ rq->data_len = bio->bi_size; rq->sense = h->sense; memset(rq->sense, 0, SCSI_SENSE_BUFFERSIZE); rq->sense_len = 0; memset(&rq->cmd, 0, BLK_MAX_CDB); rq->timeout = h->timeout; rq->flags |= (REQ_BLOCK_PC | REQ_FAILFAST | REQ_NOMERGE); return rq; } static struct request *rdac_failover_get(struct rdac_handler *h, struct path *path) { struct bio *bio; struct request *rq; struct rdac_mode_common *common; unsigned data_size = h->use_10_ms ? sizeof(struct rdac_pg_expanded) : sizeof(struct rdac_pg_legacy); /* get bio backing */ if (data_size > PAGE_SIZE) /* this should never happen */ return NULL; bio = get_rdac_bio(path, data_size, mode_select_endio, WRITE, h); if (!bio) { DMERR("dm-rdac: rdac_failover_get: no bio"); return NULL; } if (h->use_10_ms) { struct rdac_pg_expanded *rdac_pg; rdac_pg = (struct rdac_pg_expanded *)bio_data(bio); memset(rdac_pg, 0, data_size); common = &rdac_pg->common; rdac_pg->page_code = RDAC_PAGE_CODE_REDUNDANT_CONTROLLER + 0x40; rdac_pg->subpage_code = 0x1; rdac_pg->page_len[0] = 0x01; rdac_pg->page_len[1] = 0x28; rdac_pg->lun_table[h->lun] = 0x81; } else { struct rdac_pg_legacy *rdac_pg; rdac_pg = (struct rdac_pg_legacy *)bio_data(bio); memset(rdac_pg, 0, data_size); common = &rdac_pg->common; rdac_pg->page_code = RDAC_PAGE_CODE_REDUNDANT_CONTROLLER; rdac_pg->page_len = 0x68; rdac_pg->lun_table[h->lun] = 0x81; } common->rdac_mode[1] = RDAC_MODE_TRANSFER_SPECIFIED_LUNS; common->quiescence_timeout = RDAC_QUIESCENCE_TIME; common->rdac_options = RDAC_FORCED_QUIESENCE; /* get request for block layer packet command */ rq = get_rdac_req(h, bio, path, WRITE); if (!rq) { DMERR("dm-rdac: rdac_failover_get: no rq"); free_bio(bio); return NULL; } /* Prepare the command. */ if (h->use_10_ms) { rq->cmd[0] = MODE_SELECT_10; rq->cmd[7] = data_size >> 8; rq->cmd[8] = data_size & 0xff; } else { rq->cmd[0] = MODE_SELECT; rq->cmd[4] = data_size; } rq->cmd_len = COMMAND_SIZE(rq->cmd[0]); return rq; } static int submit_mode_select(struct rdac_handler *h, struct path *path) { int ret = 1; struct request *rq; struct request_queue *q = bdev_get_queue(path->dev->bdev); if (!q) { DMINFO("dm-rdac: submit_mode_select: no queue"); goto fail_path; } rq = rdac_failover_get(h, path); if (!rq) { DMERR("dm-rdac: submit_mode_select: no rq"); goto fail_path; } DMINFO("dm-rdac: queueing MODE_SELECT command on %s", path->dev->name); elv_add_request(q, rq, ELEVATOR_INSERT_FRONT, 1); ret = 0; fail_path: return ret; } static void release_ctlr(struct kref *kref) { struct rdac_controller *ctlr; ctlr = container_of(kref, struct rdac_controller, kref); spin_lock(&list_lock); list_del(&ctlr->node); spin_unlock(&list_lock); kfree(ctlr); } static struct rdac_controller *get_controller(u8 *subsys_id, u8 *slot_id) { struct rdac_controller *ctlr, *tmp; spin_lock(&list_lock); list_for_each_entry(tmp, &ctlr_list, node) { if ((memcmp(tmp->subsys_id, subsys_id, SUBSYS_ID_LEN) == 0) && (memcmp(tmp->slot_id, slot_id, SLOT_ID_LEN) == 0)) { kref_get(&tmp->kref); spin_unlock(&list_lock); return tmp; } } ctlr = kmalloc(sizeof(*ctlr), GFP_ATOMIC); if (!ctlr) goto done; /* initialize fields of controller */ memcpy(ctlr->subsys_id, subsys_id, SUBSYS_ID_LEN); memcpy(ctlr->slot_id, slot_id, SLOT_ID_LEN); kref_init(&ctlr->kref); spin_lock_init(&ctlr->lock); ctlr->submitted = 0; INIT_LIST_HEAD(&ctlr->cmd_list); list_add(&ctlr->node, &ctlr_list); done: spin_unlock(&list_lock); return ctlr; } static int c4_inquiry_endio(struct bio *bio, unsigned int done, int error) { struct rdac_private *p = bio->bi_private; struct rdac_handler *h = p->h; struct path *path = p->path; struct c4_inquiry *sp; kfree(p); if (error) { dm_pg_init_complete(path, MP_FAIL_PATH); goto done; } sp = (struct c4_inquiry *)bio_data(bio); h->ctlr = get_controller(sp->subsys_id, sp->slot_id); if (h->ctlr) submit_c9_inquiry(h, path); else dm_pg_init_complete(path, MP_FAIL_PATH); done: /* request is freed in block layer */ free_bio(bio); return 0; } static int c9_inquiry_endio(struct bio *bio, unsigned int done, int error) { struct rdac_private *p = bio->bi_private; struct rdac_handler *h = p->h; struct path *path = p->path; struct c9_inquiry *sp; if (error) { dm_pg_init_complete(path, MP_FAIL_PATH); goto done; } /* We need to look at the sense keys here to take clear action. * For now simple logic: if controller owns the lun, return * dm_pg_init_complete(), otherwise submit MODE SELECT. */ sp = (struct c9_inquiry *)bio_data(bio); if (sp->avte_cvp & 0x1) { dm_pg_init_complete(path, 0); goto done; } if (h->ctlr) { spin_lock(&h->ctlr->lock); if (!h->ctlr->submitted) { if (submit_mode_select(h, path) != 0) dm_pg_init_complete(path, MP_FAIL_PATH); else h->ctlr->submitted = 1; } else { list_add(&p->entry, &h->ctlr->cmd_list); p = NULL; /* Reuse p, do not free */ } spin_unlock(&h->ctlr->lock); } else submit_c4_inquiry(h, path); done: kfree(p); /* request is freed in block layer */ free_bio(bio); return 0; } static int c8_inquiry_endio(struct bio *bio, unsigned int done, int error) { struct rdac_private *p = bio->bi_private; struct rdac_handler *h = p->h; struct path *path = p->path; struct c8_inquiry *sp; kfree(p); if (error) { dm_pg_init_complete(path, MP_FAIL_PATH); goto done; } /* We need to look at the sense keys here to take clear action. * For now simple logic: Get the lun from the inquiry page. */ sp = (struct c8_inquiry *)bio_data(bio); if (sp->lun[7] >= MODE6_MAX_LUN && !h->use_10_ms) { DMERR("dm-rdac: MODE_SELECT6 cannot support device(%s) with " "lun %d(>=%d)\n", path->dev->name, sp->lun[7], MODE6_MAX_LUN); h->lun = UNSUPPORTED_LUN; dm_pg_init_complete(path, MP_FAIL_PATH); goto done; } h->lun = sp->lun[7]; /* currently it uses only one byte */ submit_c9_inquiry(h, path); done: /* request is freed in block layer */ free_bio(bio); return 0; } static struct request *rdac_inquiry_get(struct rdac_handler *h, struct path *path, int page_code, unsigned int len, bio_end_io_t endio) { struct bio *bio; struct request *rq; bio = get_rdac_bio(path, len, endio, READ, h); if (!bio) { DMERR("dm-rdac: rdac_inquiry_get: no bio"); return NULL; } memset(bio_data(bio), 0, len); /* get request for block layer packet command */ rq = get_rdac_req(h, bio, path, READ); if (!rq) { DMERR("dm-rdac: rdac_inquiry_get: no rq"); free_bio(bio); return NULL; } /* Prepare the command. */ rq->cmd[0] = INQUIRY; rq->cmd[1] = 1; rq->cmd[2] = page_code; rq->cmd[4] = len; rq->cmd_len = COMMAND_SIZE(rq->cmd[0]); return rq; } /* * only support subpage2c until we confirm that this is just a matter of * of updating firmware or not, and RDAC (basic AVT works already) for now * but we can add these in in when we get time and testers */ static int rdac_create(struct hw_handler *hwh, unsigned argc, char **argv) { struct rdac_handler *h; unsigned timeout, use_10_ms; if (argc == 0) { /* No arguments: use defaults */ timeout = RDAC_FAILOVER_TIMEOUT; use_10_ms = 1; } else if (argc != 2) { DMWARN("dm-rdac: incorrect number of arguments"); return -EINVAL; } else { if ((sscanf(argv[0], "%u", &use_10_ms) != 1) || (use_10_ms > 1)) { DMWARN("dm-rdac: invalid command mode selected"); return -EINVAL; } if (sscanf(argv[1], "%u", &timeout) != 1) { DMWARN("dm-rdac: invalid timeout value"); return -EINVAL; } } h = kzalloc(sizeof(*h), GFP_KERNEL); if (!h) return -ENOMEM; hwh->context = h; h->timeout = timeout; h->use_10_ms = use_10_ms; h->lun = UNINITIALIZED_LUN; DMWARN("dm-rdac: using %s RDAC command with timeout %u", h->use_10_ms?"expanded":"legacy",h->timeout); return 0; } static void rdac_destroy(struct hw_handler *hwh) { struct rdac_handler *h = (struct rdac_handler *) hwh->context; if (h->ctlr) kref_put(&h->ctlr->kref, release_ctlr); kfree(h); hwh->context = NULL; } static unsigned rdac_error(struct hw_handler *hwh, struct bio *bio) { int sense; if (bio_sense_valid(bio)) { sense = bio_sense_value(bio); /* sense key / asc / ascq */ if (sense == 0x020481) { /* LUN Not Ready - Storage firmware incompatible * Manual code synchonisation required. * * Nothing we can do here. Try to bypass the path. */ return MP_BYPASS_PG; } else if (sense == 0x059401) { /* Invalid Request - Current Logical Unit Ownership. * Controller is not the current owner of the LUN, * Fail the path, so that the other path be used. */ return MP_FAIL_PATH; } else if (sense == 0x0204A1) { /* LUN Not Ready - Quiescense in progress * * Just retry and wait. */ return 0; } } /* Try default handler */ return dm_scsi_err_handler(hwh, bio); } static void submit_inquiry(struct rdac_handler *h, struct path *path, int page_code, unsigned int len, bio_end_io_t endio) { struct request *rq; struct request_queue *q = bdev_get_queue(path->dev->bdev); if (!q) goto fail_path; rq = rdac_inquiry_get(h, path, page_code, len, endio); if (!rq) goto fail_path; elv_add_request(q, rq, ELEVATOR_INSERT_FRONT, 1); return; fail_path: dm_pg_init_complete(path, MP_FAIL_PATH); } static void rdac_pg_init(struct hw_handler *hwh, unsigned bypassed, struct path *path) { struct rdac_handler *h = hwh->context; switch (h->lun) { case UNINITIALIZED_LUN: submit_c8_inquiry(h, path); break; case UNSUPPORTED_LUN: dm_pg_init_complete(path, MP_FAIL_PATH); break; default: submit_c9_inquiry(h, path); } } static struct hw_handler_type rdac_handler = { .name = RDAC_DM_HWH_NAME, .module = THIS_MODULE, .create = rdac_create, .destroy = rdac_destroy, .pg_init = rdac_pg_init, .error = rdac_error, }; static int __init rdac_init(void) { int r = dm_register_hw_handler(&rdac_handler); if (r < 0) DMERR("%s: register failed %d", RDAC_DM_HWH_NAME, r); DMINFO("%s: version %s loaded", RDAC_DM_HWH_NAME, RDAC_DM_HWH_VER); return r; } static void __exit rdac_exit(void) { int r = dm_unregister_hw_handler(&rdac_handler); if (r < 0) DMERR("%s: unregister failed %d", RDAC_DM_HWH_NAME, r); } module_init(rdac_init); module_exit(rdac_exit); MODULE_DESCRIPTION("DM Multipath LSI/Engenio RDAC support"); MODULE_AUTHOR("Mike Christie"); MODULE_LICENSE("GPL"); MODULE_VERSION(RDAC_DM_HWH_VER); Pre� ph � � i�� � � p� � � � � � h� � � ph � � p�� > � � aph � � ֟� � ΟA x?� � � � �� � p� �� � � � � � � aph � raph �� p� � ap� ph aph � raph � ���� �� � �� � � � � � � � � � x aph � � � � � � ! � � ( ��� ; y $,� aph 8 � ��Y � � � �� �I � �� p� �(� � 8� � p� �� � � � ph Y � ���� � � � �� h �� ph Y � �� raph 8 � h � H � � h � � � � �� � raph � I � ph � ph p� � �� � � raph � aph K� p� �� � a� � � h A � � �i � � rap� � x?�) � a� �� Y ! � h raph ( ph� i� on.H raph h� aph ap� , o�� p� � � � � h � � � e, oI aph � *�� raph � aph � � � � �� � )1 aph � � � � � ��� y(� � o� �a raph � ph I � � rs *� Y � ��� � � � aph ph� � � � � � raph � � � � A � � � ) � aph � ,�KKK�� � � aph� � �� h � ph � ; y� p� � � Y � �� � � I � /*� i � � � �A � � �� ( � �� y $,�K� ph � @ � � ph ph � raph ph � � � � � � � ap � h ph� h ph � � 1 � � � uq y $,�K� ph � P� � � � � � aph ph � aph aph \?� i 307�) Y ! � > bio� ph � � @ � � h ��Y � ! o� � raph i � /* � d� � � }�e}�� o� ey � � � � � K��� y( � � DME� � � ���Y 8 1 ph � � ph �( � � ph ��� � rs *� � ; y $,�KKK�� ! ph� aph ph � aph i ap 8 h Pr� �1 << 8)1 I aph � ?����Y 0 aph ph ph aph aph ph ph ph ph ph ph � NSE aph ph Prepa� ph ph aph DME� r� Xy (me d� et; � uctA aph bio� � � n� ; free rs *� h 2 2 << 8)� 1307� ht Q y e, o� U�?����Y -- ---------------------------------------------------------------------- Chandra Seetharaman | Be careful what you choose.... - sekharan@xxxxxxxxxx | .......you may get it. ----------------------------------------------------------------------
Attachment:
dm-rdac.png
Description: PNG image
-- dm-devel mailing list dm-devel@xxxxxxxxxx https://www.redhat.com/mailman/listinfo/dm-devel