Recent changes (master)

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



The following changes since commit b7f5c00db0d450aff09b35301bedf16e26e25d3a:

  verify: turn off numberio verification for meta and time_based (2014-08-07 15:27:31 -0600)

are available in the git repository at:

  git://git.kernel.dk/fio.git master

for you to fetch changes up to 1b10477b21157800f030c3ec91511a810e75e4c7:

  Add support for HDFS IO engine (2014-08-13 13:36:52 -0600)

----------------------------------------------------------------
Jens Axboe (1):
      Add missing IO engines to the HOWTO

Manish Mandlik (1):
      Add support for HDFS IO engine

 HOWTO             |   30 +++++--
 Makefile          |    9 +-
 configure         |   30 +++++++
 engines/libhdfs.c |  240 +++++++++++++++++++++++++++++++++++++++++++++++++++++
 fio.1             |    3 +
 options.c         |    6 +-
 6 files changed, 309 insertions(+), 9 deletions(-)
 create mode 100644 engines/libhdfs.c

---

Diff of recent changes:

diff --git a/HOWTO b/HOWTO
index 1c4b308..d728353 100644
--- a/HOWTO
+++ b/HOWTO
@@ -670,15 +670,31 @@ ioengine=str	Defines how the job issues io to the file. The following
 				channel semantics (Send/Recv) for the
 				InfiniBand, RoCE and iWARP protocols.
 
-			falloc   IO engine that does regular fallocate to
-				 simulate data transfer as fio ioengine.
-				 DDIR_READ  does fallocate(,mode = keep_size,)
-				 DDIR_WRITE does fallocate(,mode = 0)
-				 DDIR_TRIM  does fallocate(,mode = punch_hole)
+			falloc	IO engine that does regular fallocate to
+				simulate data transfer as fio ioengine.
+				DDIR_READ  does fallocate(,mode = keep_size,)
+				DDIR_WRITE does fallocate(,mode = 0)
+				DDIR_TRIM  does fallocate(,mode = punch_hole)
 
 			e4defrag IO engine that does regular EXT4_IOC_MOVE_EXT
-				 ioctls to simulate defragment activity in
-				 request to DDIR_WRITE event
+				ioctls to simulate defragment activity in
+				request to DDIR_WRITE event
+
+			rbd	IO engine supporting direct access to Ceph
+				Rados Block Devices (RBD) via librbd without
+				the need to use the kernel rbd driver. This
+				ioengine defines engine specific options.
+
+			gfapi	Using Glusterfs libgfapi sync interface to
+				direct access to Glusterfs volumes without
+				options.
+
+			gfapi_async Using Glusterfs libgfapi async interface
+				to direct access to Glusterfs volumes without
+				having to go through FUSE. This ioengine
+				defines engine specific options.
+
+			hdfs	Read and write through Hadoop (HDFS).
 
 			external Prefix to specify loading an external
 				IO engine object file. Append the engine
diff --git a/Makefile b/Makefile
index 65e95be..8d86269 100644
--- a/Makefile
+++ b/Makefile
@@ -38,6 +38,13 @@ SOURCE := gettime.c ioengines.c init.c stat.c log.c time.c filesetup.c \
 		profiles/tiobench.c profiles/act.c io_u_queue.c filelock.c \
 		lib/tp.c
 
+ifdef CONFIG_LIBHDFS
+  HDFSFLAGS= -I $(JAVA_HOME)/include -I $(JAVA_HOME)/include/linux -I $(FIO_LIBHDFS_INCLUDE)
+  HDFSLIB= $(JAVA_HOME)/jre/lib/amd64/server/libjvm.so $(FIO_LIBHDFS_LIB)/liblibhdfs.a
+  CFLAGS += $(HDFSFLAGS)
+  SOURCE += engines/libhdfs.c
+endif
+
 ifdef CONFIG_64BIT_LLP64
   CFLAGS += -DBITS_PER_LONG=32
 endif
@@ -268,7 +275,7 @@ t/ieee754: $(T_IEEE_OBJS)
 	$(QUIET_LINK)$(CC) $(LDFLAGS) $(CFLAGS) -o $@ $(T_IEEE_OBJS) $(LIBS)
 
 fio: $(FIO_OBJS)
-	$(QUIET_LINK)$(CC) $(LDFLAGS) $(CFLAGS) -o $@ $(FIO_OBJS) $(LIBS)
+	$(QUIET_LINK)$(CC) $(LDFLAGS) $(CFLAGS) -o $@ $(FIO_OBJS) $(LIBS) $(HDFSLIB)
 
 gfio: $(GFIO_OBJS)
 	$(QUIET_LINK)$(CC) $(LDFLAGS) -o gfio $(GFIO_OBJS) $(LIBS) $(GTK_LDFLAGS)
diff --git a/configure b/configure
index 1494dd7..33d1327 100755
--- a/configure
+++ b/configure
@@ -134,6 +134,7 @@ cpu=""
 show_help="no"
 exit_val=0
 gfio="no"
+libhdfs="no"
 
 # parse options
 for opt do
@@ -160,6 +161,8 @@ for opt do
   ;;
   --disable-gfapi) disable_gfapi="yes"
   ;;
+  --enable-libhdfs) libhdfs="yes"
+  ;;
   --help)
     show_help="yes"
     ;;
@@ -178,6 +181,7 @@ if test "$show_help" = "yes" ; then
   echo "--esx                  Configure build options for esx"
   echo "--enable-gfio          Enable building of gtk gfio"
   echo "--disable-numa         Disable libnuma even if found"
+  echo "--enable-libhdfs       Enable hdfs support"
   exit $exit_val
 fi
 
@@ -1243,6 +1247,29 @@ if compile_prog "" "" "s390_z196_facilities"; then
   fi
 fi
 echo "s390_z196_facilities          $s390_z196_facilities"
+
+##########################################
+# Check if we have required environment variables configured for libhdfs
+if test "$libhdfs" = "yes" ; then
+  hdfs_conf_error=0
+  if test "$JAVA_HOME" = "" ; then
+    echo "configure: JAVA_HOME should be defined to jdk/jvm path"
+    hdfs_conf_error=1
+  fi
+  if test "$FIO_LIBHDFS_INCLUDE" = "" ; then
+    echo "configure: FIO_LIBHDFS_INCLUDE should be defined to libhdfs inlude path"
+    hdfs_conf_error=1
+  fi
+  if test "$FIO_LIBHDFS_LIB" = "" ; then
+    echo "configure: FIO_LIBHDFS_LIB should be defined to libhdfs library path"
+    hdfs_conf_error=1
+  fi
+  if test "$hdfs_conf_error" = "1" ; then
+    exit 1
+  fi
+fi
+echo "HDFS engine                   $libhdfs"
+
 #############################################################################
 
 if test "$wordsize" = "64" ; then
@@ -1384,6 +1411,9 @@ fi
 if test "$gf_fadvise" = "yes" ; then
   output_sym "CONFIG_GF_FADVISE"
 fi
+if test "$libhdfs" = "yes" ; then
+  output_sym "CONFIG_LIBHDFS"
+fi
 
 if test "$zlib" = "no" ; then
   echo "Consider installing zlib-dev (zlib-devel), some fio features depend on it."
diff --git a/engines/libhdfs.c b/engines/libhdfs.c
new file mode 100644
index 0000000..773d46b
--- /dev/null
+++ b/engines/libhdfs.c
@@ -0,0 +1,240 @@
+/*
+ * libhdfs engine
+ *
+ * this engine helps perform read/write operations on hdfs cluster using
+ * libhdfs. hdfs doesnot support modification of data once file is created.
+ *
+ * so to mimic that create many files of small size (e.g 256k), and this
+ * engine select a file based on the offset generated by fio.
+ *
+ * thus, random reads and writes can also be achieved with this logic.
+ *
+ * NOTE: please set environment variables FIO_HDFS_BS and FIO_HDFS_FCOUNT
+ * to appropriate value to work this engine properly
+ *
+ */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <unistd.h>
+#include <sys/uio.h>
+#include <errno.h>
+#include <assert.h>
+
+#include "../fio.h"
+
+#include "hdfs.h"
+
+struct hdfsio_data {
+	char host[256];
+	int port;
+	hdfsFS fs;
+	hdfsFile fp;
+	unsigned long fsbs;
+	unsigned long fscount;
+	unsigned long curr_file_id;
+	unsigned int numjobs;
+	unsigned int fid_correction;
+};
+
+static int fio_hdfsio_setup_fs_params(struct hdfsio_data *hd)
+{
+	/* make sure that hdfsConnect is invoked before executing this function */
+	hdfsSetWorkingDirectory(hd->fs, "/.perftest");
+	hd->fp = hdfsOpenFile(hd->fs, ".fcount", O_RDONLY, 0, 0, 0);
+	if (hd->fp) {
+		hdfsRead(hd->fs, hd->fp, &(hd->fscount), sizeof(hd->fscount));
+		hdfsCloseFile(hd->fs, hd->fp);
+	}
+	hd->fp = hdfsOpenFile(hd->fs, ".fbs", O_RDONLY, 0, 0, 0);
+	if (hd->fp) {
+		hdfsRead(hd->fs, hd->fp, &(hd->fsbs), sizeof(hd->fsbs));
+		hdfsCloseFile(hd->fs, hd->fp);
+	}
+
+	return 0;
+}
+
+static int fio_hdfsio_prep(struct thread_data *td, struct io_u *io_u)
+{
+	struct hdfsio_data *hd;
+	hdfsFileInfo *fi;
+	unsigned long f_id;
+	char fname[80];
+	int open_flags = 0;
+
+	hd = td->io_ops->data;
+
+	if (hd->curr_file_id == -1) {
+		/* see comment in fio_hdfsio_setup() function */
+		fio_hdfsio_setup_fs_params(hd);
+	}
+
+	/* find out file id based on the offset generated by fio */
+	f_id = (io_u->offset / hd->fsbs) + hd->fid_correction;
+
+	if (f_id == hd->curr_file_id) {
+		/* file is already open */
+		return 0;
+	}
+
+	if (hd->curr_file_id != -1) {
+		hdfsCloseFile(hd->fs, hd->fp);
+	}
+
+	if (io_u->ddir == DDIR_READ) {
+		open_flags = O_RDONLY;
+	} else if (io_u->ddir == DDIR_WRITE) {
+		open_flags = O_WRONLY;
+	} else {
+		printf("Invalid I/O Operation\n");
+	}
+
+	hd->curr_file_id = f_id;
+	do {
+		sprintf(fname, ".f%lu", f_id);
+		fi = hdfsGetPathInfo(hd->fs, fname);
+		if (fi->mSize >= hd->fsbs || io_u->ddir == DDIR_WRITE) {
+			/* file has enough data to read OR file is opened in write mode */
+			hd->fp =
+			    hdfsOpenFile(hd->fs, fname, open_flags, 0, 0,
+					 hd->fsbs);
+			if (hd->fp) {
+				break;
+			}
+		}
+		/* file is empty, so try next file for reading */
+		f_id = (f_id + 1) % hd->fscount;
+	} while (1);
+
+	return 0;
+}
+
+static int fio_io_end(struct thread_data *td, struct io_u *io_u, int ret)
+{
+	if (ret != (int)io_u->xfer_buflen) {
+		if (ret >= 0) {
+			io_u->resid = io_u->xfer_buflen - ret;
+			io_u->error = 0;
+			return FIO_Q_COMPLETED;
+		} else
+			io_u->error = errno;
+	}
+
+	if (io_u->error)
+		td_verror(td, io_u->error, "xfer");
+
+	return FIO_Q_COMPLETED;
+}
+
+static int fio_hdfsio_queue(struct thread_data *td, struct io_u *io_u)
+{
+	struct hdfsio_data *hd;
+	int ret = 0;
+
+	hd = td->io_ops->data;
+
+	if (io_u->ddir == DDIR_READ) {
+		ret =
+		    hdfsRead(hd->fs, hd->fp, io_u->xfer_buf, io_u->xfer_buflen);
+	} else if (io_u->ddir == DDIR_WRITE) {
+		ret =
+		    hdfsWrite(hd->fs, hd->fp, io_u->xfer_buf,
+			      io_u->xfer_buflen);
+	} else {
+		printf("Invalid I/O Operation\n");
+	}
+
+	return fio_io_end(td, io_u, ret);
+}
+
+int fio_hdfsio_open_file(struct thread_data *td, struct fio_file *f)
+{
+	struct hdfsio_data *hd;
+
+	hd = td->io_ops->data;
+	hd->fs = hdfsConnect(hd->host, hd->port);
+	hdfsSetWorkingDirectory(hd->fs, "/.perftest");
+	hd->fid_correction = (getpid() % hd->numjobs);
+
+	return 0;
+}
+
+int fio_hdfsio_close_file(struct thread_data *td, struct fio_file *f)
+{
+	struct hdfsio_data *hd;
+
+	hd = td->io_ops->data;
+	hdfsDisconnect(hd->fs);
+
+	return 0;
+}
+
+static int fio_hdfsio_setup(struct thread_data *td)
+{
+	struct hdfsio_data *hd;
+	struct fio_file *f;
+	static unsigned int numjobs = 1;	/* atleast one job has to be there! */
+	numjobs = (td->o.numjobs > numjobs) ? td->o.numjobs : numjobs;
+
+	if (!td->io_ops->data) {
+		hd = malloc(sizeof(*hd));;
+
+		memset(hd, 0, sizeof(*hd));
+		td->io_ops->data = hd;
+
+		/* separate host and port from filename */
+		*(strchr(td->o.filename, ',')) = ' ';
+		sscanf(td->o.filename, "%s%d", hd->host, &(hd->port));
+
+		/* read fbs and fcount and based on that set f->real_file_size */
+		f = td->files[0];
+#if 0
+		/* IMHO, this should be done here instead of fio_hdfsio_prep()
+		 * but somehow calling it here doesn't seem to work,
+		 * some problem with libhdfs that needs to be debugged */
+		hd->fs = hdfsConnect(hd->host, hd->port);
+		fio_hdfsio_setup_fs_params(hd);
+		hdfsDisconnect(hd->fs);
+#else
+		/* so, as an alternate, using environment variables */
+		if (getenv("FIO_HDFS_FCOUNT") && getenv("FIO_HDFS_BS")) {
+			hd->fscount = atol(getenv("FIO_HDFS_FCOUNT"));
+			hd->fsbs = atol(getenv("FIO_HDFS_BS"));
+		} else {
+			fprintf(stderr,
+				"FIO_HDFS_FCOUNT and/or FIO_HDFS_BS not set.\n");
+			return 1;
+		}
+#endif
+		f->real_file_size = hd->fscount * hd->fsbs;
+
+		td->o.nr_files = 1;
+		hd->curr_file_id = -1;
+		hd->numjobs = numjobs;
+		fio_file_set_size_known(f);
+	}
+
+	return 0;
+}
+
+static struct ioengine_ops ioengine_hdfs = {
+	.name = "libhdfs",
+	.version = FIO_IOOPS_VERSION,
+	.setup = fio_hdfsio_setup,
+	.prep = fio_hdfsio_prep,
+	.queue = fio_hdfsio_queue,
+	.open_file = fio_hdfsio_open_file,
+	.close_file = fio_hdfsio_close_file,
+	.flags = FIO_SYNCIO,
+};
+
+static void fio_init fio_hdfsio_register(void)
+{
+	register_ioengine(&ioengine_hdfs);
+}
+
+static void fio_exit fio_hdfsio_unregister(void)
+{
+	unregister_ioengine(&ioengine_hdfs);
+}
diff --git a/fio.1 b/fio.1
index 5291126..b5ff3cc 100644
--- a/fio.1
+++ b/fio.1
@@ -612,6 +612,9 @@ options.
 Using Glusterfs libgfapi async interface to direct access to Glusterfs volumes without
 having to go through FUSE. This ioengine defines engine specific
 options.
+.TP
+.B hdfs
+Read and write through Hadoop (HDFS)
 .RE
 .P
 .RE
diff --git a/options.c b/options.c
index 3a3321f..484efc1 100644
--- a/options.c
+++ b/options.c
@@ -1541,7 +1541,11 @@ struct fio_option fio_options[FIO_MAX_OPTS] = {
 			    .help = "Glusterfs libgfapi(async) based engine"
 			  },
 #endif
-
+#ifdef CONFIG_LIBHDFS
+			  { .ival = "hdfs",
+			    .help = "Hadoop Distributed Filesystem (HDFS) engine"
+			  },
+#endif
 			  { .ival = "external",
 			    .help = "Load external engine (append name)",
 			  },
--
To unsubscribe from this list: send the line "unsubscribe fio" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at  http://vger.kernel.org/majordomo-info.html




[Index of Archives]     [Linux Kernel]     [Linux SCSI]     [Linux IDE]     [Linux USB Devel]     [Video for Linux]     [Linux Audio Users]     [Yosemite News]     [Linux SCSI]

  Powered by Linux