[RFC PATCH] build dominator tree and dominance frontier fast!

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



Here is the git branch

https://git.kernel.org/pub/scm/devel/sparse/chrisl/sparse.git/log/?h=dominator-flat

This is what I have been working on for a while. I want to try some textbook
way to fix up the SSA issue in the sparse. A big step towards SSA conversion
is calculating the dominance frontier, that is where the phi node needs to
be place on.

This patch is actually not big and runs very fast. For the full kernel
build, this is the base line:

1278.08user 510.61system 1:22.54elapsed 2166%CPU (0avgtext+0avgdata
238344maxresident)k
16inputs+13816outputs (0major+135777070minor)pagefaults 0swaps
1281.58user 511.58system 1:22.61elapsed 2170%CPU (0avgtext+0avgdata
238296maxresident)k
0inputs+13816outputs (0major+135776958minor)pagefaults 0swaps

This is the dominator patch:
1291.94user 516.02system 1:23.24elapsed 2171%CPU (0avgtext+0avgdata
240832maxresident)k
8inputs+13824outputs (0major+136059495minor)pagefaults 0swaps
1289.89user 516.04system 1:23.15elapsed 2171%CPU (0avgtext+0avgdata
240820maxresident)k
0inputs+13816outputs (0major+136059689minor)pagefaults 0swaps

So the performance slow down is about 0.76%. That is including rebuild
dominator *tree* and DF when CFG changes.

After this perform the proper SSA conversion on memory to register
should be pretty straight forward.

BTW, I really appreciate if some one can write some validation function
to double check the output dominator tree and DF is correct and bug free.


Chris

Make memops use find_dominace_frontier

When dominator_valid = 0,  the DF need to be rebuild.
---
 Makefile    |   1 +
 dominator.c | 155 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
 dominator.h |   8 ++++
 flow.c      |   3 ++
 linearize.c |  16 ++++++-
 linearize.h |   3 ++
 memops.c    |   4 ++
 7 files changed, 189 insertions(+), 1 deletion(-)
 create mode 100644 dominator.c
 create mode 100644 dominator.h

diff --git a/Makefile b/Makefile
index 815d2bf..8869977 100644
--- a/Makefile
+++ b/Makefile
@@ -120,6 +120,7 @@ LIB_H=    token.h parse.h lib.h symbol.h scope.h
expression.h target.h \
 LIB_OBJS= target.o parse.o tokenize.o pre-process.o symbol.o lib.o scope.o \
    expression.o show-parse.o evaluate.o expand.o inline.o linearize.o \
    char.o sort.o allocate.o compat-$(OS).o ptrlist.o \
+   dominator.o \
    builtin.o \
    stats.o \
    flow.o cse.o simplify.o memops.o liveness.o storage.o unssa.o dissect.o
diff --git a/dominator.c b/dominator.c
new file mode 100644
index 0000000..4caba95
--- /dev/null
+++ b/dominator.c
@@ -0,0 +1,155 @@
+/*
+ * Dominator - walk the CFG, construct dominator tree.
+ *
+ * Copyright (C) 2017 Christopher Li
+ */
+
+#include "dominator.h"
+#include "flow.h"
+
+int dominator_valid = 0;
+
+static void postorder_walk(struct basic_block *b, int generation,
struct basic_block_list **po, int *nr)
+{
+ struct basic_block *child;
+
+ b->generation = generation;
+ FOR_EACH_PTR(b->children, child) {
+ if (child->generation != generation && child->ep)
+ postorder_walk(child, generation, po, nr);
+ } END_FOR_EACH_PTR(child);
+ add_bb(po, b);
+ b->nr = --*nr;
+ b->idom = NULL;
+ if (b->df)
+ free_ptr_list(&b->df);
+}
+
+static struct basic_block* intersect(struct basic_block *b1, struct
basic_block *b2)
+{
+ while (b1 != b2) {
+ /*
+ * In the paper the nodes are numbered by post visit order.
+ * The entry point has the largest number.
+ *
+ * In this implementation the node number are order in
+ * the REVERSE post visit order. E.g. The entry point
+ * has the smallest node number. There for the compare
+ * is inverted here as well.
+ */
+ while (b1->nr > b2->nr) {
+ b1 = b1->idom;
+ }
+ while (b2->nr > b1->nr) {
+ b2 = b2->idom;
+ }
+ }
+ return b1;
+}
+
+/*
+ * A Simple, Fast Dominance Algorithm
+ * Keith D. Cooper, Timothy J. Harvey, and Ken Kennedy
+ */
+static void compute_dominator(struct basic_block *root, struct
basic_block_list *po)
+{
+ struct basic_block *b;
+ int changed;
+ unsigned long generation = bb_generation;
+
+ root->idom = root;
+ do {
+ changed = 0;
+ /* reverse postorder except root */
+ FOR_EACH_PTR_REVERSE(po, b) {
+ struct basic_block *new_idom = NULL;
+ struct basic_block *p;
+ new_idom = NULL;
+ FOR_EACH_PTR(b->parents, p) {
+ /* skip unprocessed or not reachable node */
+ if (!p->idom || p->generation != generation)
+ continue;
+
+ if (!new_idom) {
+ new_idom = p;
+ continue;
+ }
+ new_idom = intersect(p, new_idom);
+ } END_FOR_EACH_PTR(p);
+ if (b->idom != new_idom) {
+ b->idom = new_idom;
+ changed = 1;
+ }
+ } END_FOR_EACH_PTR_REVERSE(b);
+ } while (changed);
+ root->idom = NULL;
+}
+
+static int test_dominace(struct basic_block *begin, struct basic_block *end)
+{
+ if (!begin->idom)
+ return 1; /* begin is root */
+ while (begin != end) {
+ struct basic_block *idom = end->idom;
+ if (!idom)
+ return 0; /* end reach to root */
+ end = idom;
+ }
+ return 1;
+}
+
+static void add_new_bb(struct basic_block_list **list, struct
basic_block *new_bb)
+{
+ struct basic_block *b;
+ FOR_EACH_PTR(*list, b) {
+ if (b == new_bb)
+ return;
+ } END_FOR_EACH_PTR(b);
+ add_bb(list, new_bb);
+}
+
+
+static void compute_dominace_frontier(struct basic_block *root,
struct basic_block_list *po)
+{
+ struct basic_block *b;
+ FOR_EACH_PTR(po, b) {
+ struct basic_block *idom = b->idom;
+ struct basic_block *y, *w;
+
+ /* This loop compute DF_local(b) */
+ FOR_EACH_PTR(b->children, y) {
+ if (y->idom != b)
+ add_new_bb(&b->df, y);
+ } END_FOR_EACH_PTR(y);
+
+ /* This loop compute b branch part of DF_up(idom) */
+ FOR_EACH_PTR(b->df, w) {
+ if (idom == w || !test_dominace(idom, w))
+ add_new_bb(&idom->df, w);
+ } END_FOR_EACH_PTR(w);
+ } END_FOR_EACH_PTR(b);
+}
+
+void find_dominace_frontier(struct entrypoint *ep)
+{
+ struct basic_block_list *po = NULL;
+ struct basic_block *root = ep->entry->bb;
+ int nr = bb_list_size(ep->bbs);
+
+ dominator_valid = 1;
+ postorder_walk(root, ++bb_generation, &po, &nr);
+ ep->nr = root->nr;
+
+ /* last entry is root */
+ delete_last_basic_block(&po);
+
+ /* return if just one root node */
+ if (!po)
+ return;
+
+ compute_dominator(root, po);
+ compute_dominace_frontier(root, po);
+
+ free_ptr_list(&po);
+}
+
diff --git a/dominator.h b/dominator.h
new file mode 100644
index 0000000..f490d0b
--- /dev/null
+++ b/dominator.h
@@ -0,0 +1,8 @@
+#ifndef DOMINATOR_H
+#define DOMINATOR_H
+
+#include "linearize.h"
+void find_dominace_frontier(struct entrypoint *ep);
+extern int dominator_valid;
+
+#endif
diff --git a/flow.c b/flow.c
index 6b2c879..7c85c4c 100644
--- a/flow.c
+++ b/flow.c
@@ -17,6 +17,7 @@
 #include "linearize.h"
 #include "flow.h"
 #include "target.h"
+#include "dominator.h"

 unsigned long bb_generation;

@@ -36,6 +37,7 @@ static int rewrite_branch(struct basic_block *bb,
  /* We might find new if-conversions or non-dominating CSEs */
  /* we may also create new dead cycles */
  repeat_phase |= REPEAT_CSE | REPEAT_CFG_CLEANUP;
+ dominator_valid = 0;
  *ptr = new;
  replace_bb_in_list(&bb->children, old, new, 1);
  remove_bb_from_list(&old->parents, bb, 1);
@@ -1003,6 +1005,7 @@ out:
  * Merge the two.
  */
  repeat_phase |= REPEAT_CSE;
+ dominator_valid = 0;

  parent->children = bb->children;
  bb->children = NULL;
diff --git a/linearize.c b/linearize.c
index ba76397..65c0309 100644
--- a/linearize.c
+++ b/linearize.c
@@ -21,6 +21,7 @@
 #include "linearize.h"
 #include "flow.h"
 #include "target.h"
+#include "dominator.h"

 pseudo_t linearize_statement(struct entrypoint *ep, struct statement *stmt);
 pseudo_t linearize_expression(struct entrypoint *ep, struct expression *expr);
@@ -509,7 +510,6 @@ void show_bb(struct basic_block *bb)
  FOR_EACH_PTR(bb->defines, defines) {
  printf("  **defines %s **\n", show_pseudo(defines));
  } END_FOR_EACH_PTR(defines);
-
  if (bb->parents) {
  struct basic_block *from;
  FOR_EACH_PTR(bb->parents, from) {
@@ -525,6 +525,16 @@ void show_bb(struct basic_block *bb)
  stream_name(to->pos.stream), to->pos.line, to->pos.pos);
  } END_FOR_EACH_PTR(to);
  }
+ if (bb->idom)
+ printf("  **idom .L%u\n", bb->idom->nr);
+ if (bb->df) {
+ struct basic_block *df;
+ FOR_EACH_PTR(bb->df, df) {
+ printf("  **df .L%u (%s:%d:%d)**\n", df->nr,
+ stream_name(df->pos.stream), df->pos.line, df->pos.pos);
+ } END_FOR_EACH_PTR(df);
+ }
+
  }

  FOR_EACH_PTR(bb->insns, insn) {
@@ -674,6 +684,7 @@ void insert_branch(struct basic_block *bb, struct
instruction *jmp, struct basic
  remove_parent(child, bb);
  } END_FOR_EACH_PTR(child);
  PACK_PTR_LIST(&bb->children);
+ dominator_valid = 0;
 }


@@ -2226,12 +2237,15 @@ static struct entrypoint *linearize_fn(struct
symbol *sym, struct symbol *base_t
  show_entry(ep);
  }

+
  /*
  * Do trivial flow simplification - branches to
  * branches, kill dead basicblocks etc
  */
  kill_unreachable_bbs(ep);

+ dominator_valid = 0;
+
  /*
  * Turn symbols into pseudos
  */
diff --git a/linearize.h b/linearize.h
index bac82d7..de6cdf7 100644
--- a/linearize.h
+++ b/linearize.h
@@ -227,6 +227,8 @@ struct basic_block {
  unsigned long generation;
  int context;
  struct entrypoint *ep;
+ struct basic_block *idom; /* immediate dominator */
+ struct basic_block_list *df; /* dominance frontier */
  struct basic_block_list *parents; /* sources */
  struct basic_block_list *children; /* destinations */
  struct instruction_list *insns; /* Linear list of instructions */
@@ -326,6 +328,7 @@ struct entrypoint {
  struct basic_block_list *bbs;
  struct basic_block *active;
  struct instruction *entry;
+ int nr;
 };

 extern void insert_select(struct basic_block *bb, struct instruction
*br, struct instruction *phi, pseudo_t if_true, pseudo_t if_false);
diff --git a/memops.c b/memops.c
index aeacdf5..e3b2c51 100644
--- a/memops.c
+++ b/memops.c
@@ -15,6 +15,7 @@
 #include "expression.h"
 #include "linearize.h"
 #include "flow.h"
+#include "dominator.h"

 static int find_dominating_parents(pseudo_t pseudo, struct instruction *insn,
  struct basic_block *bb, unsigned long generation, struct pseudo_list
**dominators,
@@ -187,6 +188,9 @@ void simplify_memops(struct entrypoint *ep)
 {
  struct basic_block *bb;

+ if (!dominator_valid)
+ find_dominace_frontier(ep);
+
  FOR_EACH_PTR_REVERSE(ep->bbs, bb) {
  simplify_loads(bb);
  } END_FOR_EACH_PTR_REVERSE(bb);
-- 
2.14.3
--
To unsubscribe from this list: send the line "unsubscribe linux-sparse" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at  http://vger.kernel.org/majordomo-info.html



[Index of Archives]     [Newbies FAQ]     [LKML]     [IETF Annouce]     [DCCP]     [Netdev]     [Networking]     [Security]     [Bugtraq]     [Yosemite]     [MIPS Linux]     [ARM Linux]     [Linux Security]     [Linux RAID]     [Linux SCSI]     [Trinity Fuzzer Tool]

  Powered by Linux