[tip:perf/core] tracing/filter: Use a tree instead of stack for filter_match_preds()

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



Commit-ID:  61e9dea20e1ada886cc49a9ec6fe3c6ac0de7324
Gitweb:     http://git.kernel.org/tip/61e9dea20e1ada886cc49a9ec6fe3c6ac0de7324
Author:     Steven Rostedt <srostedt@xxxxxxxxxx>
AuthorDate: Thu, 27 Jan 2011 22:54:33 -0500
Committer:  Steven Rostedt <rostedt@xxxxxxxxxxx>
CommitDate: Mon, 7 Feb 2011 20:56:19 -0500

tracing/filter: Use a tree instead of stack for filter_match_preds()

Currently the filter_match_preds() requires a stack to push
and pop the preds to determine if the filter matches the record or not.
This has two drawbacks:

1) It requires a stack to store state information. As this is done
   in fast paths we can't allocate the storage for this stack, and
   we can't use a global as it must be re-entrant. The stack is stored
   on the kernel stack and this greatly limits how many preds we
   may allow.

2) All conditions are calculated even when a short circuit exists.
   a || b  will always calculate a and b even though a was determined
   to be true.

Using a tree we can walk a constant structure that will save
the state as we go. The algorithm is simply:

  pred = root;
  do {
	switch (move) {
	case MOVE_DOWN:
		if (OR or AND) {
			pred = left;
			continue;
		}
		if (pred == root)
			break;
		match = pred->fn();
		pred = pred->parent;
		move = left child ? MOVE_UP_FROM_LEFT : MOVE_UP_FROM_RIGHT;
		continue;

	case MOVE_UP_FROM_LEFT:
		/* Only OR or AND can be a parent */
		if (match && OR || !match && AND) {
			/* short circuit */
			if (pred == root)
				break;
			pred = pred->parent;
			move = left child ?
				MOVE_UP_FROM_LEFT :
				MOVE_UP_FROM_RIGHT;
			continue;
		}
		pred = pred->right;
		move = MOVE_DOWN;
		continue;

	case MOVE_UP_FROM_RIGHT:
		if (pred == root)
			break;
		pred = pred->parent;
		move = left child ? MOVE_UP_FROM_LEFT : MOVE_UP_FROM_RIGHT;
		continue;
	}
	done = 1;
  } while (!done);

This way there's no strict limit to how many preds we allow
and it also will short circuit the logical operations when possible.

Cc: Tom Zanussi <tzanussi@xxxxxxxxx>
Signed-off-by: Steven Rostedt <rostedt@xxxxxxxxxxx>
---
 kernel/trace/trace.h               |    9 ++-
 kernel/trace/trace_events_filter.c |  231 +++++++++++++++++++++++++++++-------
 2 files changed, 194 insertions(+), 46 deletions(-)

diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h
index 254d04a..bba34a7 100644
--- a/kernel/trace/trace.h
+++ b/kernel/trace/trace.h
@@ -664,6 +664,7 @@ struct event_filter {
 	int			n_preds;	/* Number assigned */
 	int			a_preds;	/* allocated */
 	struct filter_pred	*preds;
+	struct filter_pred	*root;
 	char			*filter_string;
 };
 
@@ -675,6 +676,9 @@ struct event_subsystem {
 	int			nr_events;
 };
 
+#define FILTER_PRED_INVALID	((unsigned short)-1)
+#define FILTER_PRED_IS_RIGHT	(1 << 15)
+
 struct filter_pred;
 struct regex;
 
@@ -704,7 +708,10 @@ struct filter_pred {
 	int 			offset;
 	int 			not;
 	int 			op;
-	int 			pop_n;
+	unsigned short		index;
+	unsigned short		parent;
+	unsigned short		left;
+	unsigned short		right;
 };
 
 extern struct list_head ftrace_common_fields;
diff --git a/kernel/trace/trace_events_filter.c b/kernel/trace/trace_events_filter.c
index 2f5458e..1039049 100644
--- a/kernel/trace/trace_events_filter.c
+++ b/kernel/trace/trace_events_filter.c
@@ -123,6 +123,11 @@ struct filter_parse_state {
 	} operand;
 };
 
+struct pred_stack {
+	struct filter_pred	**preds;
+	int			index;
+};
+
 #define DEFINE_COMPARISON_PRED(type)					\
 static int filter_pred_##type(struct filter_pred *pred, void *event)	\
 {									\
@@ -357,52 +362,95 @@ static void filter_build_regex(struct filter_pred *pred)
 	pred->not ^= not;
 }
 
+enum move_type {
+	MOVE_DOWN,
+	MOVE_UP_FROM_LEFT,
+	MOVE_UP_FROM_RIGHT
+};
+
+static struct filter_pred *
+get_pred_parent(struct filter_pred *pred, struct filter_pred *preds,
+		int index, enum move_type *move)
+{
+	if (pred->parent & FILTER_PRED_IS_RIGHT)
+		*move = MOVE_UP_FROM_RIGHT;
+	else
+		*move = MOVE_UP_FROM_LEFT;
+	pred = &preds[pred->parent & ~FILTER_PRED_IS_RIGHT];
+
+	return pred;
+}
+
 /* return 1 if event matches, 0 otherwise (discard) */
 int filter_match_preds(struct event_filter *filter, void *rec)
 {
-	int match = -1, top = 0, val1 = 0, val2 = 0;
-	int stack[MAX_FILTER_PRED];
+	int match = -1;
+	enum move_type move = MOVE_DOWN;
 	struct filter_pred *preds;
 	struct filter_pred *pred;
+	struct filter_pred *root;
 	int n_preds = ACCESS_ONCE(filter->n_preds);
-	int i;
+	int done = 0;
 
 	/* no filter is considered a match */
 	if (!n_preds)
 		return 1;
 
 	/*
-	 * n_preds and filter->preds is protect with preemption disabled.
+	 * n_preds, root and filter->preds are protect with preemption disabled.
 	 */
 	preds = rcu_dereference_sched(filter->preds);
+	root = rcu_dereference_sched(filter->root);
+	if (!root)
+		return 1;
 
-	for (i = 0; i < n_preds; i++) {
-		pred = &preds[i];
-		if (!pred->pop_n) {
+	pred = root;
+
+	/* match is currently meaningless */
+	match = -1;
+
+	do {
+		switch (move) {
+		case MOVE_DOWN:
+			/* only AND and OR have children */
+			if (pred->left != FILTER_PRED_INVALID) {
+				/* keep going to leaf node */
+				pred = &preds[pred->left];
+				continue;
+			}
 			match = pred->fn(pred, rec);
-			stack[top++] = match;
+			/* If this pred is the only pred */
+			if (pred == root)
+				break;
+			pred = get_pred_parent(pred, preds,
+					       pred->parent, &move);
+			continue;
+		case MOVE_UP_FROM_LEFT:
+			/* Check for short circuits */
+			if ((match && pred->op == OP_OR) ||
+			    (!match && pred->op == OP_AND)) {
+				if (pred == root)
+					break;
+				pred = get_pred_parent(pred, preds,
+						       pred->parent, &move);
+				continue;
+			}
+			/* now go down the right side of the tree. */
+			pred = &preds[pred->right];
+			move = MOVE_DOWN;
+			continue;
+		case MOVE_UP_FROM_RIGHT:
+			/* We finished this equation. */
+			if (pred == root)
+				break;
+			pred = get_pred_parent(pred, preds,
+					       pred->parent, &move);
 			continue;
 		}
-		if (pred->pop_n > top) {
-			WARN_ON_ONCE(1);
-			return 0;
-		}
-		val1 = stack[--top];
-		val2 = stack[--top];
-		switch (pred->op) {
-		case OP_AND:
-			match = val1 && val2;
-			break;
-		case OP_OR:
-			match = val1 || val2;
-			break;
-		default:
-			WARN_ONCE(1, "filter op is not AND or OR");
-		}
-		stack[top++] = match;
-	}
+		done = 1;
+	} while (!done);
 
-	return stack[--top];
+	return match;
 }
 EXPORT_SYMBOL_GPL(filter_match_preds);
 
@@ -539,10 +587,58 @@ static void filter_clear_pred(struct filter_pred *pred)
 	pred->regex.len = 0;
 }
 
-static int filter_set_pred(struct filter_pred *dest,
+static int __alloc_pred_stack(struct pred_stack *stack, int n_preds)
+{
+	stack->preds = kzalloc(sizeof(*stack->preds)*(n_preds + 1), GFP_KERNEL);
+	if (!stack->preds)
+		return -ENOMEM;
+	stack->index = n_preds;
+	return 0;
+}
+
+static void __free_pred_stack(struct pred_stack *stack)
+{
+	kfree(stack->preds);
+	stack->index = 0;
+}
+
+static int __push_pred_stack(struct pred_stack *stack,
+			     struct filter_pred *pred)
+{
+	int index = stack->index;
+
+	if (WARN_ON(index == 0))
+		return -ENOSPC;
+
+	stack->preds[--index] = pred;
+	stack->index = index;
+	return 0;
+}
+
+static struct filter_pred *
+__pop_pred_stack(struct pred_stack *stack)
+{
+	struct filter_pred *pred;
+	int index = stack->index;
+
+	pred = stack->preds[index++];
+	if (!pred)
+		return NULL;
+
+	stack->index = index;
+	return pred;
+}
+
+static int filter_set_pred(struct event_filter *filter,
+			   int idx,
+			   struct pred_stack *stack,
 			   struct filter_pred *src,
 			   filter_pred_fn_t fn)
 {
+	struct filter_pred *dest = &filter->preds[idx];
+	struct filter_pred *left;
+	struct filter_pred *right;
+
 	*dest = *src;
 	if (src->field_name) {
 		dest->field_name = kstrdup(src->field_name, GFP_KERNEL);
@@ -550,8 +646,25 @@ static int filter_set_pred(struct filter_pred *dest,
 			return -ENOMEM;
 	}
 	dest->fn = fn;
+	dest->index = idx;
 
-	return 0;
+	if (dest->op == OP_OR || dest->op == OP_AND) {
+		right = __pop_pred_stack(stack);
+		left = __pop_pred_stack(stack);
+		if (!left || !right)
+			return -EINVAL;
+		dest->left = left->index;
+		dest->right = right->index;
+		left->parent = dest->index;
+		right->parent = dest->index | FILTER_PRED_IS_RIGHT;
+	} else
+		/*
+		 * Make dest->left invalid to be used as a quick
+		 * way to know this is a leaf node.
+		 */
+		dest->left = FILTER_PRED_INVALID;
+
+	return __push_pred_stack(stack, dest);
 }
 
 static void __free_preds(struct event_filter *filter)
@@ -574,6 +687,7 @@ static void reset_preds(struct event_filter *filter)
 	int i;
 
 	filter->n_preds = 0;
+	filter->root = NULL;
 	if (!filter->preds)
 		return;
 
@@ -707,6 +821,7 @@ static int filter_add_pred_fn(struct filter_parse_state *ps,
 			      struct ftrace_event_call *call,
 			      struct event_filter *filter,
 			      struct filter_pred *pred,
+			      struct pred_stack *stack,
 			      filter_pred_fn_t fn)
 {
 	int idx, err;
@@ -718,7 +833,7 @@ static int filter_add_pred_fn(struct filter_parse_state *ps,
 
 	idx = filter->n_preds;
 	filter_clear_pred(&filter->preds[idx]);
-	err = filter_set_pred(&filter->preds[idx], pred, fn);
+	err = filter_set_pred(filter, idx, stack, pred, fn);
 	if (err)
 		return err;
 
@@ -803,6 +918,7 @@ static int filter_add_pred(struct filter_parse_state *ps,
 			   struct ftrace_event_call *call,
 			   struct event_filter *filter,
 			   struct filter_pred *pred,
+			   struct pred_stack *stack,
 			   bool dry_run)
 {
 	struct ftrace_event_field *field;
@@ -812,13 +928,10 @@ static int filter_add_pred(struct filter_parse_state *ps,
 
 	fn = pred->fn = filter_pred_none;
 
-	if (pred->op == OP_AND) {
-		pred->pop_n = 2;
+	if (pred->op == OP_AND)
 		goto add_pred_fn;
-	} else if (pred->op == OP_OR) {
-		pred->pop_n = 2;
+	else if (pred->op == OP_OR)
 		goto add_pred_fn;
-	}
 
 	field = find_event_field(call, pred->field_name);
 	if (!field) {
@@ -867,7 +980,7 @@ static int filter_add_pred(struct filter_parse_state *ps,
 
 add_pred_fn:
 	if (!dry_run)
-		return filter_add_pred_fn(ps, call, filter, pred, fn);
+		return filter_add_pred_fn(ps, call, filter, pred, stack, fn);
 	return 0;
 }
 
@@ -1248,6 +1361,7 @@ static int replace_preds(struct ftrace_event_call *call,
 	char *operand1 = NULL, *operand2 = NULL;
 	struct filter_pred *pred;
 	struct postfix_elt *elt;
+	struct pred_stack stack = { }; /* init to NULL */
 	int err;
 	int n_preds = 0;
 
@@ -1262,9 +1376,12 @@ static int replace_preds(struct ftrace_event_call *call,
 		return err;
 
 	if (!dry_run) {
-		err = __alloc_preds(filter, n_preds);
+		err = __alloc_pred_stack(&stack, n_preds);
 		if (err)
 			return err;
+		err = __alloc_preds(filter, n_preds);
+		if (err)
+			goto fail;
 	}
 
 	n_preds = 0;
@@ -1276,14 +1393,16 @@ static int replace_preds(struct ftrace_event_call *call,
 				operand2 = elt->operand;
 			else {
 				parse_error(ps, FILT_ERR_TOO_MANY_OPERANDS, 0);
-				return -EINVAL;
+				err = -EINVAL;
+				goto fail;
 			}
 			continue;
 		}
 
 		if (WARN_ON(n_preds++ == MAX_FILTER_PRED)) {
 			parse_error(ps, FILT_ERR_TOO_MANY_PREDS, 0);
-			return -ENOSPC;
+			err = -ENOSPC;
+			goto fail;
 		}
 
 		if (elt->op == OP_AND || elt->op == OP_OR) {
@@ -1293,22 +1412,44 @@ static int replace_preds(struct ftrace_event_call *call,
 
 		if (!operand1 || !operand2) {
 			parse_error(ps, FILT_ERR_MISSING_FIELD, 0);
-			return -EINVAL;
+			err = -EINVAL;
+			goto fail;
 		}
 
 		pred = create_pred(elt->op, operand1, operand2);
 add_pred:
-		if (!pred)
-			return -ENOMEM;
-		err = filter_add_pred(ps, call, filter, pred, dry_run);
+		if (!pred) {
+			err = -ENOMEM;
+			goto fail;
+		}
+		err = filter_add_pred(ps, call, filter, pred, &stack, dry_run);
 		filter_free_pred(pred);
 		if (err)
-			return err;
+			goto fail;
 
 		operand1 = operand2 = NULL;
 	}
 
-	return 0;
+	if (!dry_run) {
+		/* We should have one item left on the stack */
+		pred = __pop_pred_stack(&stack);
+		if (!pred)
+			return -EINVAL;
+		/* This item is where we start from in matching */
+		filter->root = pred;
+		/* Make sure the stack is empty */
+		pred = __pop_pred_stack(&stack);
+		if (WARN_ON(pred)) {
+			err = -EINVAL;
+			filter->root = NULL;
+			goto fail;
+		}
+	}
+
+	err = 0;
+fail:
+	__free_pred_stack(&stack);
+	return err;
 }
 
 static int replace_system_preds(struct event_subsystem *system,
--
To unsubscribe from this list: send the line "unsubscribe linux-tip-commits" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[Index of Archives]     [Linux Stable Commits]     [Linux Stable Kernel]     [Linux Kernel]     [Linux USB Devel]     [Linux Video &Media]     [Linux Audio Users]     [Yosemite News]     [Linux SCSI]

  Powered by Linux