Gluster is a distributed file system (www.gluster.org). The backing store gives block access protocols such as iSCSI access to data within gluster. Gluster data resides on volumes which are physically located on servers. A "LUN" is represented by a file within a volume. To specify which file to use: --bstype=glfs --backing-store="volume@hostname:filename" Optionally, logs may be specified. See README.glfs All I/Os are synchronous. This is based off the rdrw and rbd backend storage drivers. Signed-off-by: Dan Lambright <dlambrig@xxxxxxxxxx> --- Makefile | 1 + doc/README.glfs | 50 +++++ usr/Makefile | 7 + usr/bs_glfs.c | 561 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++ 4 files changed, 619 insertions(+) create mode 100644 doc/README.glfs create mode 100644 usr/bs_glfs.c diff --git a/Makefile b/Makefile index b74f4ae..5fb6cfa 100644 --- a/Makefile +++ b/Makefile @@ -12,6 +12,7 @@ export VERSION PREFIX # Export the feature switches so sub-make knows about them export ISCSI_RDMA export CEPH_RBD +export GLFS_BD .PHONY: all all: programs doc conf scripts diff --git a/doc/README.glfs b/doc/README.glfs new file mode 100644 index 0000000..77c607e --- /dev/null +++ b/doc/README.glfs @@ -0,0 +1,50 @@ +The 'glfs' backing-store driver provides block access to a file within the +gluster distributed file system (www.gluster.org). The file represents a +LUN visible to the initiator (the file may be a regular file in gluster's +underlying XFS filesystem, or a gluster "block device"). This configuration +gives gluster support for any access method supported by the target driver, +such as iSCSI. + +The backing-store driver uses the interfaces to gluster in libgfapi.so. +This file is part of the glusterfs-api package which can be downloaded from +www.gluster.org. + +To build the glfs backing store driver, set the GLFS_BS environment +variable (export GLFS_BD=1) before running Make. + +When a LUN on a target is created with backing store of type glfs (--bstype +glfs), a handle to gluster is created. The handle is destroyed when the +target is closed. The handle represents a particular volume on a gluster +server and a particular file representing the LUN. + +To set the gluster volume and file, --backing-store takes the form: +volume@hostname:filename + +For example, if the volume name was vol1 , and the host gprfs010, and file +name was "disk1", you would set --backing-store to: + +--backing-store="vol1@gprfs010:disk1" + +For each CDB, the driver issues an appropriate gluster api call against the +handle. For example, WRITE_16 becomes glfs_pwrite(), SYNCHRONIZE_CACHE +becomes glfs_fdatasync() and UNMAP becomes glfs_discard(). + +Each call is synchronous, meaning the thread will wait for a response from +gluster before returning to the caller. The libgfapi interfaces support +asynchronous calls, and an asynchronous version of the driver has been +tested. For more information on the asynchronous version please contact +dlambrig@xxxxxxxxxx. + +If the backing store driver was not used, the linux target driver could +still write data to gluster by loopback mounting the gluster file system. +The backing-store would be a file within the file system. Gluster uses FUSE +to forward I/O from the kernel to it's user space daemon. The overhead +incured by FUSE and the kernel is removed when the backing store driver and +libgfapi package is used. + +The libgfapi interfaces supports logs. You can use the --bsopts option to +set the logfile and loglevel. + +--bsops="logfile=glfs.log;loglevel=3" + + diff --git a/usr/Makefile b/usr/Makefile index e29826c..6234f3f 100644 --- a/usr/Makefile +++ b/usr/Makefile @@ -17,6 +17,10 @@ ifneq ($(CEPH_RBD),) MODULES += bs_rbd.so endif +ifneq ($(GLFS_BD),) +MODULES += bs_glfs.so +endif + ifneq ($(shell test -e /usr/include/sys/eventfd.h && test -e /usr/include/libaio.h && echo 1),) CFLAGS += -DUSE_EVENTFD TGTD_OBJS += bs_aio.o @@ -88,6 +92,9 @@ tgtimg: $(TGTIMG_OBJS) bs_rbd.so: bs_rbd.c $(CC) -shared $(CFLAGS) bs_rbd.c -o bs_rbd.so -lrados -lrbd +bs_glfs.so: bs_glfs.c + $(CC) -I/usr/include/glusterfs/api -shared $(CFLAGS) bs_glfs.c -o bs_glfs.so -lgfapi + .PHONY: install install: $(PROGRAMS) $(MODULES) install -d -m 755 $(DESTDIR)$(sbindir) diff --git a/usr/bs_glfs.c b/usr/bs_glfs.c new file mode 100644 index 0000000..bb09f33 --- /dev/null +++ b/usr/bs_glfs.c @@ -0,0 +1,561 @@ +/* + * Synchronous glfs backing store routines + * + * modified from bs_rdb.c + * Copyright (C) 2013 Dan Lambright <dlambrig@xxxxxxxxxx> + * Copyright (C) 2006-2007 FUJITA Tomonori <tomof@xxxxxxx> + * Copyright (C) 2006-2007 Mike Christie <michaelc@xxxxxxxxxxx> + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License as + * published by the Free Software Foundation, version 2 of the + * License. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA + * 02110-1301 USA + */ +#define _XOPEN_SOURCE 600 + +#include <errno.h> +#include <fcntl.h> +#include <inttypes.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <sys/types.h> +#include <sys/stat.h> +#include <unistd.h> + +#include <linux/fs.h> +#include <sys/epoll.h> + +#include "list.h" +#include "util.h" +#include "tgtd.h" +#include "scsi.h" +#include "spc.h" +#include "bs_thread.h" + +#include "glfs.h" + +struct active_glfs { + char *name; + glfs_t *fs; + glfs_fd_t *gfd; + char *logfile; + int loglevel; +}; + +#define ALLOWED_BSOFLAGS (O_SYNC | O_DIRECT | O_RDWR | O_LARGEFILE) + +#define GLUSTER_PORT 24007 + +#define GFSP(lu) ((struct active_glfs *) \ + ((char *)lu + \ + sizeof(struct scsi_lu) + \ + sizeof(struct bs_thread_info)) \ + ) + +static void set_medium_error(int *result, uint8_t *key, uint16_t *asc) +{ + *result = SAM_STAT_CHECK_CONDITION; + *key = MEDIUM_ERROR; + *asc = ASC_READ_ERROR; +} + +static int bs_glfs_discard(glfs_fd_t *gfd, off_t offset, size_t len) +{ +#ifdef BS_GLFS_DISCARD + return glfs_discard(gfd, offset, len); +#endif + return 0; +} + +static void bs_glfs_request(struct scsi_cmd *cmd) +{ + glfs_fd_t *gfd = GFSP(cmd->dev)->gfd; + struct scsi_lu *lu = cmd->dev; + int ret; + uint32_t length; + int result = SAM_STAT_GOOD; + uint8_t key; + uint16_t asc; + char *tmpbuf; + size_t blocksize; + uint64_t offset = cmd->offset; + uint32_t tl = cmd->tl; + int do_verify = 0; + int i; + char *ptr; + const char *write_buf = NULL; + ret = length = 0; + key = asc = 0; + + switch (cmd->scb[0]) { + case ORWRITE_16: + length = scsi_get_out_length(cmd); + + tmpbuf = malloc(length); + if (!tmpbuf) { + result = SAM_STAT_CHECK_CONDITION; + key = HARDWARE_ERROR; + asc = ASC_INTERNAL_TGT_FAILURE; + break; + } + + ret = glfs_pread(gfd, tmpbuf, length, offset, lu->bsoflags); + + if (ret != length) { + set_medium_error(&result, &key, &asc); + free(tmpbuf); + break; + } + + ptr = scsi_get_out_buffer(cmd); + for (i = 0; i < length; i++) + ptr[i] |= tmpbuf[i]; + + free(tmpbuf); + + write_buf = scsi_get_out_buffer(cmd); + goto write; + case COMPARE_AND_WRITE: + /* Blocks are transferred twice, first the set that + * we compare to the existing data, and second the set + * to write if the compare was successful. + */ + length = scsi_get_out_length(cmd) / 2; + if (length != cmd->tl) { + result = SAM_STAT_CHECK_CONDITION; + key = ILLEGAL_REQUEST; + asc = ASC_INVALID_FIELD_IN_CDB; + break; + } + + tmpbuf = malloc(length); + if (!tmpbuf) { + result = SAM_STAT_CHECK_CONDITION; + key = HARDWARE_ERROR; + asc = ASC_INTERNAL_TGT_FAILURE; + break; + } + + ret = glfs_pread(gfd, tmpbuf, length, offset, SEEK_SET); + + if (ret != length) { + set_medium_error(&result, &key, &asc); + free(tmpbuf); + break; + } + + if (memcmp(scsi_get_out_buffer(cmd), tmpbuf, length)) { + uint32_t pos = 0; + char *spos = scsi_get_out_buffer(cmd); + char *dpos = tmpbuf; + + /* + * Data differed, this is assumed to be 'rare' + * so use a much more expensive byte-by-byte + * comparasion to find out at which offset the + * data differs. + */ + for (pos = 0; pos < length && *spos++ == *dpos++; + pos++) + ; + result = SAM_STAT_CHECK_CONDITION; + key = MISCOMPARE; + asc = ASC_MISCOMPARE_DURING_VERIFY_OPERATION; + free(tmpbuf); + break; + } + + free(tmpbuf); + + write_buf = scsi_get_out_buffer(cmd) + length; + goto write; + case SYNCHRONIZE_CACHE: + case SYNCHRONIZE_CACHE_16: + /* TODO */ + length = (cmd->scb[0] == SYNCHRONIZE_CACHE) ? 0 : 0; + + if (cmd->scb[1] & 0x2) { + result = SAM_STAT_CHECK_CONDITION; + key = ILLEGAL_REQUEST; + asc = ASC_INVALID_FIELD_IN_CDB; + } else { + glfs_fdatasync(gfd); + } + break; + case WRITE_VERIFY: + case WRITE_VERIFY_12: + case WRITE_VERIFY_16: + do_verify = 1; + case WRITE_6: + case WRITE_10: + case WRITE_12: + case WRITE_16: + length = scsi_get_out_length(cmd); + write_buf = scsi_get_out_buffer(cmd); +write: + ret = glfs_pwrite(gfd, write_buf, length, offset, lu->bsoflags); + + if (ret == length) { + struct mode_pg *pg; + + /* + * it would be better not to access to pg + * directy. + */ + pg = find_mode_page(cmd->dev, 0x08, 0); + if (pg == NULL) { + result = SAM_STAT_CHECK_CONDITION; + key = ILLEGAL_REQUEST; + asc = ASC_INVALID_FIELD_IN_CDB; + break; + } + if (((cmd->scb[0] != WRITE_6) && (cmd->scb[1] & 0x8)) || + !(pg->mode_data[0] & 0x04)) + glfs_fdatasync(gfd); + } else + set_medium_error(&result, &key, &asc); + + if (do_verify) + goto verify; + break; + case WRITE_SAME: + case WRITE_SAME_16: + /* WRITE_SAME used to punch hole in file */ + if (cmd->scb[1] & 0x08) { + ret = bs_glfs_discard(gfd, offset, tl); + if (ret != 0) { + eprintf("Failed to punch hole for " + "WRITE_SAME command\n"); + result = SAM_STAT_CHECK_CONDITION; + key = HARDWARE_ERROR; + asc = ASC_INTERNAL_TGT_FAILURE; + break; + } + break; + } + while (tl > 0) { + blocksize = 1 << cmd->dev->blk_shift; + tmpbuf = scsi_get_out_buffer(cmd); + + switch (cmd->scb[1] & 0x06) { + case 0x02: /* PBDATA==0 LBDATA==1 */ + put_unaligned_be32(offset, tmpbuf); + break; + case 0x04: /* PBDATA==1 LBDATA==0 */ + /* physical sector format */ + put_unaligned_be64(offset, tmpbuf); + break; + } + + ret = glfs_pwrite(gfd, tmpbuf, blocksize, + offset, lu->bsoflags); + + if (ret != blocksize) + set_medium_error(&result, &key, &asc); + + offset += blocksize; + tl -= blocksize; + } + break; + case READ_6: + case READ_10: + case READ_12: + case READ_16: + length = scsi_get_in_length(cmd); + ret = glfs_pread(gfd, scsi_get_in_buffer(cmd), + length, offset, SEEK_SET); + + if (ret != length) { + eprintf("Error on read %x %x", ret, length); + set_medium_error(&result, &key, &asc); + } + break; + case PRE_FETCH_10: + case PRE_FETCH_16: + if (ret != 0) + set_medium_error(&result, &key, &asc); + break; + case VERIFY_10: + case VERIFY_12: + case VERIFY_16: +verify: + length = scsi_get_out_length(cmd); + + tmpbuf = malloc(length); + if (!tmpbuf) { + result = SAM_STAT_CHECK_CONDITION; + key = HARDWARE_ERROR; + asc = ASC_INTERNAL_TGT_FAILURE; + break; + } + + ret = glfs_pread(gfd, tmpbuf, length, offset, lu->bsoflags); + + if (ret != length) + set_medium_error(&result, &key, &asc); + else if (memcmp(scsi_get_out_buffer(cmd), tmpbuf, length)) { + result = SAM_STAT_CHECK_CONDITION; + key = MISCOMPARE; + asc = ASC_MISCOMPARE_DURING_VERIFY_OPERATION; + } + + free(tmpbuf); + break; + case UNMAP: + if (!cmd->dev->attrs.thinprovisioning) { + result = SAM_STAT_CHECK_CONDITION; + key = ILLEGAL_REQUEST; + asc = ASC_INVALID_FIELD_IN_CDB; + break; + } + + length = scsi_get_out_length(cmd); + tmpbuf = scsi_get_out_buffer(cmd); + + if (length < 8) + break; + + length -= 8; + tmpbuf += 8; + + while (length >= 16) { + offset = get_unaligned_be64(&tmpbuf[0]); + offset = offset << cmd->dev->blk_shift; + + tl = get_unaligned_be32(&tmpbuf[8]); + tl = tl << cmd->dev->blk_shift; + + if (offset + tl > cmd->dev->size) { + eprintf("UNMAP beyond EOF\n"); + result = SAM_STAT_CHECK_CONDITION; + key = ILLEGAL_REQUEST; + asc = ASC_LBA_OUT_OF_RANGE; + break; + } + + if (tl > 0) { + if (bs_glfs_discard(gfd, offset, tl) != 0) { + eprintf("Failed to punch hole for" + " UNMAP at offset:%" PRIu64 + " length:%d\n", + offset, tl); + result = SAM_STAT_CHECK_CONDITION; + key = HARDWARE_ERROR; + asc = ASC_INTERNAL_TGT_FAILURE; + break; + } + } + + length -= 16; + tmpbuf += 16; + } + break; + default: + break; + } + + dprintf("io done %p %x %d %u\n", cmd, cmd->scb[0], ret, length); + + scsi_set_result(cmd, result); + + if (result != SAM_STAT_GOOD) { + eprintf("io error %p %x %x %d %d %" PRIu64 ", %m\n", + cmd, result, cmd->scb[0], ret, length, offset); + sense_data_build(cmd, key, asc); + } +} + +static void parse_imagepath(char *image, char **server, char **vol, char **path) +{ + char *origp = strdup(image); + char *p, *sep; + + p = origp; + sep = strchr(p, '@'); + if (sep == NULL) { + *server = ""; + } else { + *sep = '\0'; + *server = strdup(p); + p = sep + 1; + } + sep = strchr(p, ':'); + if (sep == NULL) { + *vol = ""; + } else { + *vol = strdup(sep + 1); + *sep = '\0'; + } + + /* p points to path\0 */ + *path = strdup(p); + free(origp); +} + +static int bs_glfs_open(struct scsi_lu *lu, char *image, int *fd, + uint64_t *size) +{ + int ret = 0; + char *servername; + char *volname; + char *pathname; + int bsoflags = ALLOWED_BSOFLAGS; + glfs_t *fs = 0; + + parse_imagepath(image, &volname, &pathname, &servername); + + if (volname && servername && pathname) { + glfs_fd_t *gfd = NULL; + struct stat st; + + fs = glfs_new(volname); + if (!fs) + goto fail; + + ret = glfs_set_volfile_server(fs, "tcp", servername, GLUSTER_PORT); + + ret = glfs_init(fs); + if (ret) + goto fail; + + GFSP(lu)->fs = fs; + + if (lu->bsoflags) + bsoflags = lu->bsoflags; + + gfd = glfs_open(fs, pathname, bsoflags); + if (gfd == NULL) + goto fail; + + ret = glfs_lstat(fs, pathname, &st); + if (ret) + goto fail; + + GFSP(lu)->gfd = gfd; + + *size = (long) st.st_size; + + if (GFSP(lu)->logfile) + glfs_set_logging(fs, GFSP(lu)->logfile, + GFSP(lu)->loglevel); + + return 0; + } +fail: + if (fs) + glfs_fini(fs); + + return -EIO; +} + +static void bs_glfs_close(struct scsi_lu *lu) +{ + if (GFSP(lu)->gfd) + glfs_close(GFSP(lu)->gfd); + + if (GFSP(lu)->gfd) + glfs_fini(GFSP(lu)->fs); +} + +static char *slurp_to_semi(char **p) +{ + char *end = index(*p, ';'); + char *ret; + int len; + + if (end == NULL) + end = *p + strlen(*p); + len = end - *p; + ret = malloc(len + 1); + strncpy(ret, *p, len); + ret[len] = '\0'; + *p = end; + /* Jump past the semicolon, if we stopped at one */ + if (**p == ';') + *p = end + 1; + return ret; +} + +static char *slurp_value(char **p) +{ + char *equal = index(*p, '='); + if (equal) { + *p = equal + 1; + return slurp_to_semi(p); + } else { + return NULL; + } +} + +static int is_opt(const char *opt, char *p) +{ + int ret = 0; + if ((strncmp(p, opt, strlen(opt)) == 0) && + (p[strlen(opt)] == '=')) { + ret = 1; + } + return ret; +} + +static tgtadm_err bs_glfs_init(struct scsi_lu *lu, char *bsopts) +{ + struct bs_thread_info *info = BS_THREAD_I(lu); + char *logfile = NULL; + int loglevel = 0; + char *sloglevel; + + while (bsopts && strlen(bsopts)) { + if (is_opt("logfile", bsopts)) + logfile = slurp_value(&bsopts); + else if (is_opt("loglevel", bsopts)) { + sloglevel = slurp_value(&bsopts); + loglevel = atoi(sloglevel); + } + } + + GFSP(lu)->logfile = logfile; + GFSP(lu)->loglevel = loglevel; + + return bs_thread_open(info, bs_glfs_request, nr_iothreads); +} + +static void bs_glfs_exit(struct scsi_lu *lu) +{ + struct bs_thread_info *info = BS_THREAD_I(lu); + + if (GFSP(lu)->gfd) + glfs_close(GFSP(lu)->gfd); + + if (GFSP(lu)->fs) + glfs_fini(GFSP(lu)->fs); + + bs_thread_close(info); +} + +static struct backingstore_template glfs_bst = { + .bs_name = "glfs", + .bs_datasize = sizeof(struct active_glfs) + + sizeof(struct bs_thread_info), + .bs_open = bs_glfs_open, + .bs_close = bs_glfs_close, + .bs_init = bs_glfs_init, + .bs_exit = bs_glfs_exit, + .bs_cmd_submit = bs_thread_cmd_submit, + .bs_oflags_supported = ALLOWED_BSOFLAGS +}; + +void register_bs_module(void) +{ + register_backingstore_template(&glfs_bst); +} -- 1.8.5.3 -- To unsubscribe from this list: send the line "unsubscribe stgt" in the body of a message to majordomo@xxxxxxxxxxxxxxx More majordomo info at http://vger.kernel.org/majordomo-info.html