[PATCH 10/15] job-runner: use config to limit job frequency

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



From: Derrick Stolee <dstolee@xxxxxxxxxxxxx>

We want to run different maintenance tasks at different rates. That
means we cannot rely only on the time between job-runner loop pauses
to reduce the frequency of specific jobs. This means we need to
persist a timestamp for the previous run somewhere. A natural place
is the Git config file for that repo.

Create the job.<job-namme>.lastRun config option to store the
timestamp of the previous run of that job. Set this config option
after a successful run of 'git -C <repo> run-job <job-name>'.

To maximize code reuse, we dynamically construct the config key
and parse the config value into a timestamp in a generic way. This
makes the introduction of another config option extremely trivial:

The job.<job-name>.interval option allows a user to specify a
minimum number of seconds between two calls to 'git run-job
<job-name>' on a given repo. This could be stored in the global
or system config to provide an update on the default for all repos,
or could be updated on a per-repo basis. This is checked on every
iteration of the job loop, so a user could update this and see the
effect without restarting the job-runner process.

RFC QUESTION: I'm using a 'git -C <repo> config <option>' process
to test if enough time has elapsed before running the 'run-job'
process. These 'config' processes are pretty light-weight, so
hopefully they are not too noisy. An alternative would be to
always run 'git -C <repo> run-job <job-name>' and rely on that
process to no-op based on the configured values and how recently
they were run. However, that will likely interrupt users who want
to test the jobs in the foreground. Finally, that user disruption
would be mitigated by adding a "--check-latest" option. A user
running a job manually would not include that argument by default
and would succeed. However, any logging that we might do for the
job-runner would make it look like we are running the run-job
commands all the time even if they are no-ops. Advice welcome!

Signed-off-by: Derrick Stolee <dstolee@xxxxxxxxxxxxx>
---
 Documentation/config/job.txt  |  10 ++
 Documentation/git-run-job.txt |   3 +
 builtin/job-runner.c          | 177 +++++++++++++++++++++++++++++++++-
 3 files changed, 189 insertions(+), 1 deletion(-)

diff --git a/Documentation/config/job.txt b/Documentation/config/job.txt
index 2dfed3935fa..7c799d66221 100644
--- a/Documentation/config/job.txt
+++ b/Documentation/config/job.txt
@@ -1,3 +1,13 @@
+job.<job-name>.interval::
+	The minimum number of seconds between runs of
+	`git run-job <job-name>` when running `git job-runner`.
+
+job.<job-name>.lastRun::
+	The Unix epoch for the timestamp of the previous run of
+	`git run-job <job-name>` when running `git job-runner`. You
+	can manually update this to a later time to delay a specific
+	job on this repository.
+
 job.pack-files.batchSize::
 	This string value `<size>` will be passed to the
 	`git multi-pack-index repack --batch-size=<size>` command as
diff --git a/Documentation/git-run-job.txt b/Documentation/git-run-job.txt
index cdd6417f7c9..c6d5674d699 100644
--- a/Documentation/git-run-job.txt
+++ b/Documentation/git-run-job.txt
@@ -90,6 +90,9 @@ pack-files, the job does not attempt to repack. Otherwise, the batch
 size is the sum of all pack-file sizes minus the largest pack-file size.
 The batch size is capped at two gigabytes. This intends to pack all
 small pack-files into a single pack-file.
++
+The `--batch-size=<size>` option will override the dynamic or configured
+batch size.
 
 
 GIT
diff --git a/builtin/job-runner.c b/builtin/job-runner.c
index bed401f94bf..aee55c106e8 100644
--- a/builtin/job-runner.c
+++ b/builtin/job-runner.c
@@ -4,11 +4,175 @@
 #include "run-command.h"
 #include "string-list.h"
 
+#define MAX_TIMESTAMP ((timestamp_t)-1)
+
 static char const * const builtin_job_runner_usage[] = {
 	N_("git job-runner [<options>]"),
 	NULL
 };
 
+static char *config_name(const char *prefix,
+			 const char *job,
+			 const char *postfix)
+{
+	int postfix_dot = 0;
+	struct strbuf name = STRBUF_INIT;
+
+	if (prefix) {
+		strbuf_addstr(&name, prefix);
+		strbuf_addch(&name, '.');
+	}
+
+	if (job) {
+		strbuf_addstr(&name, job);
+		postfix_dot = 1;
+	}
+
+	if (postfix) {
+		if (postfix_dot)
+			strbuf_addch(&name, '.');
+		strbuf_addstr(&name, postfix);
+	}
+
+	return strbuf_detach(&name, NULL);
+}
+
+static int try_get_config(const char *job,
+			  const char *repo,
+			  const char *postfix,
+			  char **value)
+{
+	int result = 0;
+	FILE *proc_out;
+	struct strbuf line = STRBUF_INIT;
+	char *cname = config_name("job", job, postfix);
+	struct child_process *config_proc = xmalloc(sizeof(*config_proc));
+
+	child_process_init(config_proc);
+
+	argv_array_push(&config_proc->args, "git");
+	argv_array_push(&config_proc->args, "-C");
+	argv_array_push(&config_proc->args, repo);
+	argv_array_push(&config_proc->args, "config");
+	argv_array_push(&config_proc->args, cname);
+	free(cname);
+
+	config_proc->out = -1;
+
+	if (start_command(config_proc)) {
+		warning(_("failed to start process for repo '%s'"),
+			repo);
+		result = 1;
+		goto cleanup;
+	}
+
+	proc_out = xfdopen(config_proc->out, "r");
+
+	/* if there is no line, leave the value as given */
+	if (!strbuf_getline(&line, proc_out))
+		*value = strbuf_detach(&line, NULL);
+	else
+		*value = NULL;
+
+	strbuf_release(&line);
+
+	fclose(proc_out);
+
+	result = finish_command(config_proc);
+
+cleanup:
+	free(config_proc);
+	return result;
+}
+
+static int try_get_timestamp(const char *job,
+			     const char *repo,
+			     const char *postfix,
+			     timestamp_t *t)
+{
+	char *value;
+	int result = try_get_config(job, repo, postfix, &value);
+
+	if (!result) {
+		*t = atol(value);
+		free(value);
+	}
+
+	return result;
+}
+
+static timestamp_t get_last_run(const char *job,
+				const char *repo)
+{
+	timestamp_t last_run = 0;
+
+	/* In an error state, do not run the job */
+	if (try_get_timestamp(job, repo, "lastrun", &last_run))
+		return MAX_TIMESTAMP;
+
+	return last_run;
+}
+
+static timestamp_t get_interval(const char *job,
+				const char *repo)
+{
+	timestamp_t interval = MAX_TIMESTAMP;
+
+	/* In an error state, do not run the job */
+	if (try_get_timestamp(job, repo, "interval", &interval))
+		return MAX_TIMESTAMP;
+
+	if (interval == MAX_TIMESTAMP) {
+		/* load defaults for each job */
+		if (!strcmp(job, "commit-graph") || !strcmp(job, "fetch"))
+			interval = 60 * 60; /* 1 hour */
+		else
+			interval = 24 * 60 * 60; /* 1 day */
+	}
+
+	return interval;
+}
+
+static int set_last_run(const char *job,
+			const char *repo,
+			timestamp_t last_run)
+{
+	int result = 0;
+	struct strbuf last_run_string = STRBUF_INIT;
+	struct child_process *config_proc = xmalloc(sizeof(*config_proc));
+	char *cname = config_name("job", job, "lastrun");
+
+	strbuf_addf(&last_run_string, "%"PRItime, last_run);
+
+	child_process_init(config_proc);
+
+	argv_array_push(&config_proc->args, "git");
+	argv_array_push(&config_proc->args, "-C");
+	argv_array_push(&config_proc->args, repo);
+	argv_array_push(&config_proc->args, "config");
+	argv_array_push(&config_proc->args, cname);
+	argv_array_push(&config_proc->args, last_run_string.buf);
+	free(cname);
+	strbuf_release(&last_run_string);
+
+	if (start_command(config_proc)) {
+		warning(_("failed to start process for repo '%s'"),
+			repo);
+		result = 1;
+		goto cleanup;
+	}
+
+	if (finish_command(config_proc)) {
+		warning(_("failed to finish process for repo '%s'"),
+			repo);
+		result = 1;
+	}
+
+cleanup:
+	free(config_proc);
+	return result;
+}
+
 static struct string_list arg_repos = STRING_LIST_INIT_DUP;
 
 static int arg_repos_append(const struct option *opt,
@@ -54,9 +218,20 @@ static int load_active_repos(struct string_list *repos)
 
 static int run_job(const char *job, const char *repo)
 {
+	int result;
 	struct argv_array cmd = ARGV_ARRAY_INIT;
+	timestamp_t now = time(NULL);
+	timestamp_t last_run = get_last_run(job, repo);
+	timestamp_t interval = get_interval(job, repo);
+
+	if (last_run + interval > now)
+		return 0;
+
 	argv_array_pushl(&cmd, "-C", repo, "run-job", job, NULL);
-	return run_command_v_opt(cmd.argv, RUN_GIT_CMD);
+	result = run_command_v_opt(cmd.argv, RUN_GIT_CMD);
+
+	set_last_run(job, repo, now);
+	return result;
 }
 
 static int run_job_loop_step(struct string_list *list)
-- 
gitgitgadget




[Index of Archives]     [Linux Kernel Development]     [Gcc Help]     [IETF Annouce]     [DCCP]     [Netdev]     [Networking]     [Security]     [V4L]     [Bugtraq]     [Yosemite]     [MIPS Linux]     [ARM Linux]     [Linux Security]     [Linux RAID]     [Linux SCSI]     [Fedora Users]

  Powered by Linux