theory Tarjan
imports Main
begin
text \
Tarjan's algorithm computes the strongly connected components of
a finite graph using depth-first search. We formalize a functional
version of the algorithm in Isabelle/HOL, following a development
of Lévy et al. in Why3 that is available at
\url{http://pauillac.inria.fr/~levy/why3/graph/abs/scct/1/scc.html}.
\
declare Let_def[simp] -- \expand let-constructions automatically\
text \
Definition of an auxiliary data structure holding local variables
during the execution of Tarjan's algorithm.
\
record 'v env =
blacks:: "'v set"
stack:: "'v list"
sccs:: "'v set set"
sn:: nat
num:: "'v \ int"
locale graph =
fixes vertices :: "'v set"
and successors :: "'v \ 'v set"
assumes vfin: "finite vertices"
and sclosed: "\x \ vertices. successors x \ vertices"
context graph
begin
section {* Reachability in graphs *}
definition edge where
-- \the edge relation over vertices\
"edge x y \ x \ vertices \ y \ successors x"
definition xedge_to where
-- \@{text ys} is a suffix of @{text xs}, @{text y} appears in @{text ys},
and there is an edge from some node in the prefix of @{text xs} to @{text y}\
"xedge_to xs ys y \
y \ set ys
\ (\zs. xs = zs @ ys \ (\z \ set zs. edge z y))"
inductive reachable where
reachable_refl[simp]: "reachable x x"
| reachable_succ: "\x \ vertices; y \ successors x\ \ reachable x y"
| reachable_trans: "\reachable x y; reachable y z\ \ reachable x z"
text {*
Given some set @{text S} and two vertices @{text x} and @{text y}
such that @{text y} is reachable from @{text x}, and @{text x} is
an element of @{text S} but @{text y} is not, then there exists
some vertices @{text x'} and @{text y'} linked by an edge such that
@{text x'} is an element of @{text S}, @{text y'} is not,
@{text x'} is reachable from @{text x}, and @{text y} is reachable
from @{text y'}.
*}
lemma reachable_crossing_set:
assumes 1: "reachable x y" and 2: "x \ S" and 3: "y \ S"
obtains x' y' where
"x' \ S" "y' \ S" "edge x' y'" "reachable x x'" "reachable y' y"
proof -
from assms
have "\x' y'. x' \ S \ y' \ S \ edge x' y' \ reachable x x' \ reachable y' y"
unfolding edge_def using reachable_refl reachable_trans
by induct (blast+)
with that show ?thesis by blast
qed
section {* Strongly connected components *}
definition is_subscc where
"is_subscc S \ \x \ S. \y \ S. reachable x y"
definition is_scc where
"is_scc S \ S \ {} \ is_subscc S \ (\S'. is_subscc S' \ S \ S' \ S' = S)"
lemma subscc_add:
assumes "is_subscc S" and "x \ S"
and "reachable x y" and "reachable y x"
shows "is_subscc (insert y S)"
using assms unfolding is_subscc_def by (metis insert_iff reachable_trans)
lemma sccE:
-- \Two vertices that are reachable from each other are in the same SCC.\
assumes "is_scc S" and "x \ S"
and "reachable x y" and "reachable y x"
shows "y \ S"
using assms unfolding is_scc_def by (metis insertI1 subscc_add subset_insertI)
lemma scc_partition:
-- \Two SCCs that contain a common element are identical.\
assumes "is_scc S" and "is_scc S'" and "x \ S \ S'"
shows "S = S'"
using assms unfolding is_scc_def is_subscc_def
by (metis IntE assms(2) sccE subsetI)
section {* Auxiliary functions *}
definition infty ("\") where
-- \integer exceeding any one used as a vertex number during the algorithm\
"\ = int (card vertices)"
definition set_infty where
-- \set @{text "f x"} to @{text \} for all x in xs\
"set_infty xs f = fold (\x g. g (x := \)) xs f"
lemma set_infty:
"(set_infty xs f) x = (if x \ set xs then \ else f x)"
unfolding set_infty_def by (induct xs arbitrary: f) auto
text {*
Split a list at the first occurrence of a given element.
Returns the two sublists of elements strictly before and
strictly after the element. If the element does not occur
in the list, returns a pair formed by the entire list and
the empty list.
*}
fun split_list where
"split_list x [] = ([], [])"
| "split_list x (y # xs) =
(if x = y then ([], xs) else
(let (l, r) = split_list x xs in
(y # l, r)))"
lemma split_list_concat:
-- \Concatenating the two sublists produced by @{text "split_list"}
yields back the original list.\
assumes "x \ set xs"
shows "(fst (split_list x xs)) @ (x # snd (split_list x xs)) = xs"
using assms by (induct xs) (auto simp: split_def)
lemma split_list_fst:
"x \ set (fst (split_list x xs))"
by (induct xs) (auto simp: split_def)
lemma unique_split_list:
-- \An element that occurs only once identifies a unique
decomposition of a list.\
assumes "x \ set xs" and "x \ set ys"
shows "xs @ x # ys = xs' @ x # ys' \ (xs = xs' \ ys = ys')"
using assms
by(auto simp: append_eq_Cons_conv Cons_eq_append_conv append_eq_append_conv2)
text {*
Push a vertex on the stack and increment the sequence number.
The pushed vertex is associated with the (old) sequence number.
*}
definition add_stack_incr where
"add_stack_incr x e =
(let n = sn e in
e \ stack := x # (stack e),
sn := n+1,
num := (num e) (x := int n) \)"
definition add_blacks where
-- \Add vertex @{text x} to the set of black vertices in @{text e}.\
"add_blacks x e = e \ blacks := insert x (blacks e) \"
section \Main functions used for Tarjan's algorithms\
subsection \Function definitions\
text {*
We define two mutually recursive functions that contain the essence
of Tarjan's algorithm. Their arguments are respectively a single
vertex and a set of vertices, as well as an environment that contains
the local variables of the algorithm, and an auxiliary parameter
representing the set of ``gray'' vertices, which is used only for
the proof. The main function is then obtained by specializing the
function operating on a set of vertices.
*}
function (domintros) dfs1 and dfs' where
"dfs1 x e grays =
(let (n1, e1) =
dfs' (successors x) (add_stack_incr x e) (insert x grays) in
if n1 < int (sn e) then (n1, add_blacks x e1)
else
(let (l,r) = split_list x (stack e1) in
(\,
\ blacks = insert x (blacks e1),
stack = r,
sccs = insert (insert x (set l)) (sccs e1),
sn = sn e1,
num = set_infty (x # l) (num e1) \ )))"
| "dfs' roots e grays =
(if roots = {} then (\, e)
else
(let x = SOME x. x \ roots;
res1 = (if num e x \ -1 then (num e x, e) else dfs1 x e grays)
in
(let res2 = dfs' (roots - {x}) (snd res1) grays in
(min (fst res1) (fst res2), (snd res2)) )))"
by pat_completeness auto
definition init_env where
"init_env \ \ blacks = {},
stack = [],
sccs = {},
sn = 0,
num = \_. -1 \"
definition tarjan where
"tarjan \ sccs (snd (dfs' vertices init_env {}))"
subsection \Well-definedness of the functions\
text \
We did not prove termination for the two mutually recursive
functions @{text dfs1} and @{text dfs'} defined above, and
indeed it is easy to see that they do not terminate for
arbitrary arguments. Isabelle allows us to define ``partial''
recursive functions, for which it introduces an auxiliary
domain predicate that characterizes their domain of definition.
We now make this more concrete and prove that the two functions
terminate when called for nodes of the graph, also assuming an
elementary well-definedness condition for environments. These
conditions are met in the cases of interest, and in particular
in the call to @{text dfs'} in the main function @{text tarjan}.
Intuitively, the reason is that every (possibly indirect)
recursive call to @{text dfs'} either decreases the set of roots
or increases the set of gray nodes.
\
(** unnecessary
lemma blacks_increasing:
-- \Whenever the functions terminate, vertices that are colored
black in the calling environment are still black in the
environment that they return.\
"dfs1_dfs'_dom (Inl (x,e,grays)) \
blacks e \ blacks (snd (dfs1 x e grays))"
"dfs1_dfs'_dom (Inr (roots,e,grays)) \
blacks e \ blacks (snd (dfs' roots e grays))"
proof (induct rule: dfs1_dfs'.pinduct)
case (1 x e grays)
then show ?case
by (auto simp: dfs1.psimps case_prod_beta
add_blacks_def add_stack_incr_def)
next
case (2 roots e grays)
then show ?case
by (fastforce simp: dfs'.psimps case_prod_beta)
qed
**)
text \
We are only interested in environments that assign positive
numbers to gray nodes, and we show that calls to @{text dfs1}
and @{text dfs'} preserve this property.
\
definition grays_num_defined where
"grays_num_defined e grays \ \x \ grays. num e x \ -1"
lemma grays_num_defined:
"\dfs1_dfs'_dom (Inl (x,e,grays)); grays_num_defined e grays\ \
grays_num_defined (snd (dfs1 x e grays)) grays"
"\dfs1_dfs'_dom (Inr (roots,e,grays)); grays_num_defined e grays\ \
grays_num_defined (snd (dfs' roots e grays)) grays"
proof (induct rule: dfs1_dfs'.pinduct)
case (1 x e grays)
then show ?case
by (auto simp: dfs1.psimps case_prod_beta grays_num_defined_def
add_blacks_def add_stack_incr_def set_infty infty_def)
next
case (2 roots e grays)
then show ?case
by (fastforce simp: dfs'.psimps case_prod_beta)
qed
text \
The following relation underlies the termination argument used
for proving well-definedness of the functions @{text dfs1} and
@{text dfs'}. It is defined on the disjoint sum of the types of
arguments of the two functions and relates the arguments of
(mutually) recursive calls.
\
definition dfs1_dfs'_term where
"dfs1_dfs'_term \
{ (Inl(x, e::'v env, grays), Inr(roots,e,grays)) |
x e grays roots .
roots \ vertices \ x \ roots \ grays \ vertices }
\ { (Inr(roots, e::'v env, insert x grays), Inl(x, e', grays)) |
x e e' grays roots .
grays \ vertices \ x \ vertices - grays }
\ { (Inr(roots, e::'v env, grays), Inr(roots', e', grays)) |
roots roots' e e' grays .
roots' \ vertices \ roots \ roots' \ grays \ vertices }"
text \
We prove well-foundedness of the above relation using the
following function that embeds it into triples whose first
component is the complement of the gray nodes, whose second
component is the set of root nodes, and whose third component
is 1 or 2 depending on the function being called. The third
component corresponds to the first case in the definition of
@{text dfs1_dfs'_term}.
\
fun dfs1_dfs'_to_tuple where
"dfs1_dfs'_to_tuple (Inl(x::'v, e::'v env, grays)) = (vertices - grays, {x}, 1::nat)"
| "dfs1_dfs'_to_tuple (Inr(roots, e::'v env, grays)) = (vertices - grays, roots, 2)"
lemma wf_term: "wf dfs1_dfs'_term"
proof -
let ?r = "(finite_psubset :: ('v set \ 'v set) set)
<*lex*> (finite_psubset :: ('v set \ 'v set) set)
<*lex*> pred_nat"
have "wf ?r"
using wf_finite_psubset wf_pred_nat by blast
moreover
have "dfs1_dfs'_term \ inv_image ?r dfs1_dfs'_to_tuple"
unfolding dfs1_dfs'_term_def pred_nat_def
using vfin by (auto dest: finite_subset)
ultimately show ?thesis
using wf_inv_image wf_subset by blast
qed
text \
The following theorem establishes sufficient conditions under
which the two functions @{text dfs1} and @{text dfs'} terminate.
The proof proceeds by well-founded induction using the relation
@{text dfs1_dfs'_term} and makes use of the theorem
@{text dfs1_dfs'.domintros} that was generated by Isabelle from
the mutually recursive definitions in order to characterize the
domain conditions for these functions.
\
theorem dfs1_dfs'_termination:
"\grays \ vertices; x \ vertices - grays; grays_num_defined e grays\
\ dfs1_dfs'_dom (Inl(x,e,grays))"
"\grays \ vertices; roots \ vertices; grays_num_defined e grays\
\ dfs1_dfs'_dom (Inr(roots,e,grays))"
proof -
{ fix args
have "(case args
of Inl(x,e,grays) \
grays \ vertices \ x \ vertices - grays \ grays_num_defined e grays
| Inr(roots,e,grays) \
grays \ vertices \ roots \ vertices \ grays_num_defined e grays)
\ dfs1_dfs'_dom args" (is "?P args \ ?Q args")
proof (rule wf_induct[OF wf_term])
fix arg :: "('v \ 'v env \ 'v set) + ('v set \ 'v env \ 'v set)"
assume ih: "\arg'. (arg',arg) \ dfs1_dfs'_term
\ (?P arg' \ ?Q arg')"
show "?P arg \ ?Q arg"
proof
assume P: "?P arg"
show "?Q arg"
proof (cases arg)
case (Inl a)
then obtain x e grays where a: "arg = Inl(x,e,grays)"
using dfs1.cases by metis
have "?Q (Inl(x,e,grays))"
proof (rule dfs1_dfs'.domintros)
let ?recarg = "Inr (successors x, add_stack_incr x e, insert x grays)"
from a P have "(?recarg, arg) \ dfs1_dfs'_term"
by (auto simp: dfs1_dfs'_term_def)
moreover
from a P sclosed have "?P ?recarg"
by (auto simp: add_stack_incr_def grays_num_defined_def)
ultimately show "?Q ?recarg"
using ih by auto
qed
with a show ?thesis by simp
next
case (Inr b)
then obtain roots e grays where b: "arg = Inr(roots,e,grays)"
using dfs'.cases by metis
let ?sx = "SOME x. x \ roots"
let ?rec1arg = "Inl (?sx, e, grays)"
let ?rec2arg = "Inr (roots - {?sx}, e, grays)"
let ?rec3arg = "Inr (roots - {?sx}, snd (dfs1 ?sx e grays), grays)"
have "?Q (Inr(roots,e,grays))"
proof (rule dfs1_dfs'.domintros)
fix x
assume 1: "x \ roots"
and 2: "num e ?sx = -1"
and 3: "\ dfs1_dfs'_dom ?rec1arg"
from 1 have sx: "?sx \ roots" by (rule someI)
with P b have "(?rec1arg, arg) \ dfs1_dfs'_term"
by (auto simp: dfs1_dfs'_term_def)
moreover
from sx 2 P b have "?P ?rec1arg"
by (auto simp: grays_num_defined_def)
ultimately show False
using ih 3 by auto
next
fix x
assume "x \ roots"
hence sx: "?sx \ roots" by (rule someI)
from sx b P have "(?rec2arg, arg) \ dfs1_dfs'_term"
by (auto simp: dfs1_dfs'_term_def)
moreover
from P b have "?P ?rec2arg" by auto
ultimately show "dfs1_dfs'_dom ?rec2arg"
using ih by auto
next
fix x
assume 1: "x \ roots" and 2: "num e ?sx = -1"
from 1 have sx: "?sx \ roots" by (rule someI)
from sx b P have "(?rec3arg, arg) \ dfs1_dfs'_term"
by (auto simp: dfs1_dfs'_term_def)
moreover
have "dfs1_dfs'_dom ?rec1arg"
proof -
from sx P b have "(?rec1arg, arg) \ dfs1_dfs'_term"
by (auto simp: dfs1_dfs'_term_def)
moreover
from sx 2 P b have "?P ?rec1arg"
by (auto simp: grays_num_defined_def)
ultimately show ?thesis
using ih by auto
qed
with P b have "grays_num_defined (snd (dfs1 ?sx e grays)) grays"
by (force elim: grays_num_defined)
with P b have "?P ?rec3arg" by auto
ultimately show "dfs1_dfs'_dom ?rec3arg"
using ih by auto
qed
with b show ?thesis by simp
qed
qed
qed
}
note dom = this
from dom
show "\grays \ vertices; x \ vertices - grays; grays_num_defined e grays\
\ dfs1_dfs'_dom (Inl(x,e,grays))"
by auto
from dom
show "\grays \ vertices; roots \ vertices; grays_num_defined e grays\
\ dfs1_dfs'_dom (Inr(roots,e,grays))"
by auto
qed
section {* Auxiliary notions for the proof of partial correctness *}
text \
The proof of partial correctness is more challenging and requires
some further concepts that we now define.
We need to reason about the relative order of elements in a list
(specifically, the stack used in the algorithm).
\
definition precedes ("_ \ _ in _" [100,100,100] 39) where
-- \@{text x} has an occurrence in @{text xs} that
precedes an occurrence of @{text y}.\
"x \ y in xs \ \l r. xs = l @ (x # r) \ y \ set (x # r)"
lemma precedes_mem:
assumes "x \ y in xs"
shows "x \ set xs" "y \ set xs"
using assms unfolding precedes_def by auto
lemma head_precedes:
assumes "y \ set (x # xs)"
shows "x \ y in (x # xs)"
using assms unfolding precedes_def by force
lemma precedes_in_tail:
assumes "x \ z"
shows "x \ y in (z # zs) \ x \ y in zs"
using assms unfolding precedes_def by (auto simp: Cons_eq_append_conv)
lemma tail_not_precedes:
assumes "y \ x in (x # xs)" "x \ set xs"
shows "x = y"
using assms unfolding precedes_def
by (metis Cons_eq_append_conv Un_iff list.inject set_append)
lemma split_list_precedes:
assumes "y \ set (ys @ [x])"
shows "y \ x in (ys @ x # xs)"
proof (cases "y \ set ys")
case True
from this[THEN split_list] show ?thesis
unfolding precedes_def by force
next
case False
with assms show ?thesis
unfolding precedes_def by auto
qed
lemma precedes_refl [simp]: "(x \ x in xs) = (x \ set xs)"
proof
assume "x \ x in xs" thus "x \ set xs"
by (simp add: precedes_mem)
next
assume "x \ set xs"
from this[THEN split_list] show "x \ x in xs"
unfolding precedes_def by auto
qed
lemma precedes_append_left:
assumes "x \ y in xs"
shows "x \ y in (ys @ xs)"
using assms unfolding precedes_def by (metis append.assoc)
lemma precedes_append_left_iff:
assumes "x \ set ys"
shows "x \ y in (ys @ xs) \ x \ y in xs" (is "?lhs = ?rhs")
proof
assume "?lhs"
then obtain l r where lr: "ys @ xs = l @ (x # r)" "y \ set (x # r)"
unfolding precedes_def by blast
then obtain us where
"(ys = l @ us \ us @ xs = x # r) \ (ys @ us = l \ xs = us @ (x # r))"
by (auto simp: append_eq_append_conv2)
thus ?rhs
proof
assume us: "ys = l @ us \ us @ xs = x # r"
with assms have "us = []"
by (metis Cons_eq_append_conv in_set_conv_decomp)
with us lr show ?rhs
unfolding precedes_def by auto
next
assume us: "ys @ us = l \ xs = us @ (x # r)"
with \y \ set (x # r)\ show ?rhs
unfolding precedes_def by blast
qed
next
assume "?rhs" thus "?lhs" by (rule precedes_append_left)
qed
lemma precedes_append_right:
assumes "x \