Recent changes (master)

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



The following changes since commit 6899b6cb0d996c530e42195cfc29e8e0b02aeae3:

  mutex: move pthread_cond_signal() outside of lock (2014-07-08 09:46:37 +0200)

are available in the git repository at:

  git://git.kernel.dk/fio.git master

for you to fetch changes up to ebea21332ee7d7d12f17cbe9718611bcec558210:

  Fix cases where td->terminate is set, but terminate clock not marked (2014-07-09 11:24:12 +0200)

----------------------------------------------------------------
Jens Axboe (7):
      iolog: propagate errors to caller
      iolog: fix compile error for non-zlib
      iolog: fix link without zlib
      Allow threads 60 seconds to exit before being forceful
      iolog: run compression work at slightly elevated nice level
      Add thread number to log filename
      Fix cases where td->terminate is set, but terminate clock not marked

 HOWTO     |   16 ++++++++++------
 backend.c |   26 ++++++++++++++++++++++----
 fio.1     |   14 +++++++++-----
 fio.h     |    8 ++++++++
 init.c    |   20 ++++++++++----------
 iolog.c   |   39 +++++++++++++++++++++++++++++++++------
 lib/tp.c  |   11 +++++++++++
 lib/tp.h  |    1 +
 libfio.c  |   13 ++++++++++++-
 verify.c  |    4 ++--
 10 files changed, 118 insertions(+), 34 deletions(-)

---

Diff of recent changes:

diff --git a/HOWTO b/HOWTO
index fbc455d..ac96069 100644
--- a/HOWTO
+++ b/HOWTO
@@ -1307,7 +1307,9 @@ write_bw_log=str If given, write a bandwidth log of the jobs in this job
 		jobs in their lifetime. The included fio_generate_plots
 		script uses gnuplot to turn these text files into nice
 		graphs. See write_lat_log for behaviour of given
-		filename. For this option, the suffix is _bw.log.
+		filename. For this option, the suffix is _bw.x.log, where
+		x is the index of the job (1..N, where N is the number of
+		jobs).
 
 write_lat_log=str Same as write_bw_log, except that this option stores io
 		submission, completion, and total latencies instead. If no
@@ -1317,14 +1319,16 @@ write_lat_log=str Same as write_bw_log, except that this option stores io
 
 		write_lat_log=foo
 
-		The actual log names will be foo_slat.log, foo_clat.log,
-		and foo_lat.log. This helps fio_generate_plot fine the logs
-		automatically.
+		The actual log names will be foo_slat.x.log, foo_clat.x.log,
+		and foo_lat.x.log, where x is the index of the job (1..N,
+		where N is the number of jobs). This helps fio_generate_plot
+		fine the logs automatically.
 
 write_iops_log=str Same as write_bw_log, but writes IOPS. If no filename is
 		given with this option, the default filename of
-		"jobname_type.log" is used. Even if the filename is given,
-		fio will still append the type of log.
+		"jobname_type.x.log" is used,where x is the index of the job
+		(1..N, where N is the number of jobs). Even if the filename
+		is given, fio will still append the type of log.
 
 log_avg_msec=int By default, fio will log an entry in the iops, latency,
 		or bw log for every IO that completes. When writing to the
diff --git a/backend.c b/backend.c
index 68540ab..30f78b7 100644
--- a/backend.c
+++ b/backend.c
@@ -391,7 +391,7 @@ static int break_on_this_error(struct thread_data *td, enum fio_ddir ddir,
 			 * fill_device option is set.
 			 */
 			td_clear_error(td);
-			td->terminate = 1;
+			fio_mark_td_terminate(td);
 			return 1;
 		} else {
 			/*
@@ -460,7 +460,7 @@ static void do_verify(struct thread_data *td, uint64_t verify_bytes)
 		if (runtime_exceeded(td, &td->tv_cache)) {
 			__update_tv_cache(td);
 			if (runtime_exceeded(td, &td->tv_cache)) {
-				td->terminate = 1;
+				fio_mark_td_terminate(td);
 				break;
 			}
 		}
@@ -714,7 +714,7 @@ static uint64_t do_io(struct thread_data *td)
 		if (runtime_exceeded(td, &td->tv_cache)) {
 			__update_tv_cache(td);
 			if (runtime_exceeded(td, &td->tv_cache)) {
-				td->terminate = 1;
+				fio_mark_td_terminate(td);
 				break;
 			}
 		}
@@ -922,7 +922,7 @@ reap:
 
 	if (td->o.fill_device && td->error == ENOSPC) {
 		td->error = 0;
-		td->terminate = 1;
+		fio_mark_td_terminate(td);
 	}
 	if (!td->error) {
 		struct fio_file *f;
@@ -1603,6 +1603,13 @@ static int fork_main(int shmid, int offset)
 	return (int) (uintptr_t) ret;
 }
 
+static void dump_td_info(struct thread_data *td)
+{
+	log_err("fio: job '%s' hasn't exited in %lu seconds, it appears to "
+		"be stuck. Doing forceful exit of this job.\n", td->o.name,
+			(unsigned long) time_since_now(&td->terminate_time));
+}
+
 /*
  * Run over the job map and reap the threads that have exited, if any.
  */
@@ -1681,6 +1688,17 @@ static void reap_threads(unsigned int *nr_running, unsigned int *t_rate,
 		}
 
 		/*
+		 * If the job is stuck, do a forceful timeout of it and
+		 * move on.
+		 */
+		if (td->terminate &&
+		    time_since_now(&td->terminate_time) >= FIO_REAP_TIMEOUT) {
+			dump_td_info(td);
+			td_set_runstate(td, TD_REAPED);
+			goto reaped;
+		}
+
+		/*
 		 * thread is not dead, continue
 		 */
 		pending++;
diff --git a/fio.1 b/fio.1
index c58e817..22d6b1e 100644
--- a/fio.1
+++ b/fio.1
@@ -1191,17 +1191,21 @@ If given, write a bandwidth log of the jobs in this job file. Can be used to
 store data of the bandwidth of the jobs in their lifetime. The included
 fio_generate_plots script uses gnuplot to turn these text files into nice
 graphs. See \fBwrite_lat_log\fR for behaviour of given filename. For this
-option, the postfix is _bw.log.
+option, the postfix is _bw.x.log, where x is the index of the job (1..N,
+where N is the number of jobs)
 .TP
 .BI write_lat_log \fR=\fPstr
 Same as \fBwrite_bw_log\fR, but writes I/O completion latencies.  If no
-filename is given with this option, the default filename of "jobname_type.log"
-is used. Even if the filename is given, fio will still append the type of log.
+filename is given with this option, the default filename of
+"jobname_type.x.log" is used, where x is the index of the job (1..N, where
+N is the number of jobs). Even if the filename is given, fio will still
+append the type of log.
 .TP
 .BI write_iops_log \fR=\fPstr
 Same as \fBwrite_bw_log\fR, but writes IOPS. If no filename is given with this
-option, the default filename of "jobname_type.log" is used. Even if the
-filename is given, fio will still append the type of log.
+option, the default filename of "jobname_type.x.log" is used, where x is the
+index of the job (1..N, where N is the number of jobs). Even if the filename
+is given, fio will still append the type of log.
 .TP
 .BI log_avg_msec \fR=\fPint
 By default, fio will log an entry in the iops, latency, or bw log for every
diff --git a/fio.h b/fio.h
index df0d020..c694f2c 100644
--- a/fio.h
+++ b/fio.h
@@ -254,6 +254,7 @@ struct thread_data {
 	struct timeval epoch;	/* time job was started */
 	struct timeval last_issue;
 	struct timeval tv_cache;
+	struct timeval terminate_time;
 	unsigned int tv_cache_nr;
 	unsigned int tv_cache_mask;
 	unsigned int ramp_time_over;
@@ -486,8 +487,15 @@ extern void td_set_runstate(struct thread_data *, int);
 extern int td_bump_runstate(struct thread_data *, int);
 extern void td_restore_runstate(struct thread_data *, int);
 
+/*
+ * Allow 60 seconds for a job to quit on its own, otherwise reap with
+ * a vengeance.
+ */
+#define FIO_REAP_TIMEOUT	60
+
 #define TERMINATE_ALL		(-1)
 extern void fio_terminate_threads(int);
+extern void fio_mark_td_terminate(struct thread_data *);
 
 /*
  * Memory helpers
diff --git a/init.c b/init.c
index 8268ed5..57aa702 100644
--- a/init.c
+++ b/init.c
@@ -1167,14 +1167,14 @@ static int add_job(struct thread_data *td, const char *jobname, int job_add_num,
 		else
 			suf = "log";
 
-		snprintf(logname, sizeof(logname), "%s_lat.%s",
-				o->lat_log_file, suf);
+		snprintf(logname, sizeof(logname), "%s_lat.%d.%s",
+				o->lat_log_file, td->thread_number, suf);
 		setup_log(&td->lat_log, &p, logname);
-		snprintf(logname, sizeof(logname), "%s_slat.%s",
-				o->lat_log_file, suf);
+		snprintf(logname, sizeof(logname), "%s_slat.%d.%s",
+				o->lat_log_file, td->thread_number, suf);
 		setup_log(&td->slat_log, &p, logname);
-		snprintf(logname, sizeof(logname), "%s_clat.%s",
-				o->lat_log_file, suf);
+		snprintf(logname, sizeof(logname), "%s_clat.%d.%s",
+				o->lat_log_file, td->thread_number, suf);
 		setup_log(&td->clat_log, &p, logname);
 	}
 	if (o->bw_log_file) {
@@ -1193,8 +1193,8 @@ static int add_job(struct thread_data *td, const char *jobname, int job_add_num,
 		else
 			suf = "log";
 
-		snprintf(logname, sizeof(logname), "%s_bw.%s",
-				o->bw_log_file, suf);
+		snprintf(logname, sizeof(logname), "%s_bw.%d.%s",
+				o->bw_log_file, td->thread_number, suf);
 		setup_log(&td->bw_log, &p, logname);
 	}
 	if (o->iops_log_file) {
@@ -1213,8 +1213,8 @@ static int add_job(struct thread_data *td, const char *jobname, int job_add_num,
 		else
 			suf = "log";
 
-		snprintf(logname, sizeof(logname), "%s_iops.%s",
-				o->iops_log_file, suf);
+		snprintf(logname, sizeof(logname), "%s_iops.%d.%s",
+				o->iops_log_file, td->thread_number, suf);
 		setup_log(&td->iops_log, &p, logname);
 	}
 
diff --git a/iolog.c b/iolog.c
index 5eca53b..79c189b 100644
--- a/iolog.c
+++ b/iolog.c
@@ -706,6 +706,7 @@ static int z_stream_init(z_stream *stream, int gz_hdr)
 
 struct inflate_chunk_iter {
 	unsigned int seq;
+	int err;
 	void *buf;
 	size_t buf_size;
 	size_t buf_used;
@@ -760,6 +761,7 @@ static size_t inflate_chunk(struct iolog_compress *ic, int gz_hdr, FILE *f,
 		err = inflate(stream, Z_NO_FLUSH);
 		if (err < 0) {
 			log_err("fio: failed inflating log: %d\n", err);
+			iter->err = err;
 			break;
 		}
 
@@ -782,7 +784,7 @@ static size_t inflate_chunk(struct iolog_compress *ic, int gz_hdr, FILE *f,
  * Inflate stored compressed chunks, or write them directly to the log
  * file if so instructed.
  */
-static void inflate_gz_chunks(struct io_log *log, FILE *f)
+static int inflate_gz_chunks(struct io_log *log, FILE *f)
 {
 	struct inflate_chunk_iter iter = { .chunk_sz = log->log_gz, };
 	z_stream stream;
@@ -797,8 +799,10 @@ static void inflate_gz_chunks(struct io_log *log, FILE *f)
 			size_t ret;
 
 			ret = fwrite(ic->buf, ic->len, 1, f);
-			if (ret != 1 || ferror(f))
+			if (ret != 1 || ferror(f)) {
+				iter.err = errno;
 				log_err("fio: error writing compressed log\n");
+			}
 		} else
 			inflate_chunk(ic, log->log_gz_store, f, &stream, &iter);
 
@@ -809,6 +813,8 @@ static void inflate_gz_chunks(struct io_log *log, FILE *f)
 		finish_chunk(&stream, f, &iter);
 		free(iter.buf);
 	}
+
+	return iter.err;
 }
 
 /*
@@ -870,6 +876,8 @@ int iolog_file_inflate(const char *file)
 		total -= ret;
 		if (!total)
 			break;
+		if (iter.err)
+			break;
 
 		ic.seq++;
 		ic.len -= ret;
@@ -882,13 +890,20 @@ int iolog_file_inflate(const char *file)
 	}
 
 	free(buf);
-	return 0;
+	return iter.err;
 }
 
 #else
 
-static void inflate_gz_chunks(struct io_log *log, FILE *f)
+static int inflate_gz_chunks(struct io_log *log, FILE *f)
+{
+	return 0;
+}
+
+int iolog_file_inflate(const char *file)
 {
+	log_err("fio: log inflation not possible without zlib\n");
+	return 1;
 }
 
 #endif
@@ -978,7 +993,8 @@ static int gz_work(struct tp_work *work)
 		ret = deflate(&stream, Z_NO_FLUSH);
 		if (ret < 0) {
 			log_err("fio: deflate log (%d)\n", ret);
-			break;
+			free_chunk(c);
+			goto err;
 		}
 
 		c->len = GZ_CHUNK - stream.avail_out;
@@ -1015,13 +1031,23 @@ static int gz_work(struct tp_work *work)
 		pthread_mutex_unlock(&data->log->chunk_lock);
 	}
 
+	ret = 0;
+done:
 	if (work->wait) {
 		work->done = 1;
 		pthread_cond_signal(&work->cv);
 	} else
 		free(data);
 
-	return 0;
+	return ret;
+err:
+	while (!flist_empty(&list)) {
+		c = flist_first_entry(list.next, struct iolog_compress, list);
+		flist_del(&c->list);
+		free_chunk(c);
+	}
+	ret = 1;
+	goto done;
 }
 
 /*
@@ -1061,6 +1087,7 @@ int iolog_flush(struct io_log *log, int wait)
 	} else
 		data->work.wait = 0;
 
+	data->work.prio = 1;
 	tp_queue_work(tdat, &data->work);
 
 	if (wait) {
diff --git a/lib/tp.c b/lib/tp.c
index 386e31a..7462f5b 100644
--- a/lib/tp.c
+++ b/lib/tp.c
@@ -11,6 +11,7 @@
 #include <unistd.h>
 #include <errno.h>
 #include <pthread.h>
+#include <string.h>
 
 #include "../smalloc.h"
 #include "../log.h"
@@ -21,9 +22,19 @@ static void tp_flush_work(struct flist_head *list)
 	struct tp_work *work;
 
 	while (!flist_empty(list)) {
+		int prio;
+
 		work = flist_entry(list->next, struct tp_work, list);
 		flist_del(&work->list);
+
+		prio = work->prio;
+		if (nice(prio) < 0)
+			log_err("fio: nice %s\n", strerror(errno));
+
 		work->fn(work);
+
+		if (nice(prio) < 0)
+			log_err("fio: nice %s\n", strerror(errno));
 	}
 }
 
diff --git a/lib/tp.h b/lib/tp.h
index 5b07cc6..9147cc2 100644
--- a/lib/tp.h
+++ b/lib/tp.h
@@ -10,6 +10,7 @@ struct tp_work {
 	struct flist_head list;
 	tp_work_fn *fn;
 	int wait;
+	int prio;
 	pthread_cond_t cv;
 	pthread_mutex_t lock;
 	volatile int done;
diff --git a/libfio.c b/libfio.c
index 8af1129..5a996f9 100644
--- a/libfio.c
+++ b/libfio.c
@@ -187,6 +187,13 @@ void td_restore_runstate(struct thread_data *td, int old_state)
 	td_set_runstate(td, old_state);
 }
 
+void fio_mark_td_terminate(struct thread_data *td)
+{
+	fio_gettime(&td->terminate_time, NULL);
+	write_barrier();
+	td->terminate = 1;
+}
+
 void fio_terminate_threads(int group_id)
 {
 	struct thread_data *td;
@@ -199,7 +206,11 @@ void fio_terminate_threads(int group_id)
 		if (group_id == TERMINATE_ALL || groupid == td->groupid) {
 			dprint(FD_PROCESS, "setting terminate on %s/%d\n",
 						td->o.name, (int) td->pid);
-			td->terminate = 1;
+
+			if (td->terminate)
+				continue;
+
+			fio_mark_td_terminate(td);
 			td->o.start_delay = 0;
 
 			/*
diff --git a/verify.c b/verify.c
index 11963e1..e59a4b2 100644
--- a/verify.c
+++ b/verify.c
@@ -847,7 +847,7 @@ int verify_io_u(struct thread_data *td, struct io_u *io_u)
 
 done:
 	if (ret && td->o.verify_fatal)
-		td->terminate = 1;
+		fio_mark_td_terminate(td);
 
 	return ret;
 }
@@ -1205,7 +1205,7 @@ static void *verify_async_thread(void *data)
 	if (ret) {
 		td_verror(td, ret, "async_verify");
 		if (td->o.verify_fatal)
-			td->terminate = 1;
+			fio_mark_td_terminate(td);
 	}
 
 done:
--
To unsubscribe from this list: send the line "unsubscribe fio" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at  http://vger.kernel.org/majordomo-info.html




[Index of Archives]     [Linux Kernel]     [Linux SCSI]     [Linux IDE]     [Linux USB Devel]     [Video for Linux]     [Linux Audio Users]     [Yosemite News]     [Linux SCSI]

  Powered by Linux