LeftistHeap.ml

(* This code is inspired by Chapter 3 of Chris Okasaki's book, "Purely
   Functional Data Structures". *)

(* A heap is either an empty heap [E] or a non-empty heap [T (r, x, h1, h2)]
   where [r] is the rank, [x] is an element, and [h1] and [h2] are sub-heaps. *)

type element =
  int

type heap =
| E
| T of int * element * heap * heap

type queue =
  heap

(* The rank of a (sub-)heap is the length of its right spine. The leftist
   invariant states that the rank of a left child is at least as large as
   the rank of its right sibling. *)

(* This implies that the length of each right spine is at most logarithmic.
   Indeed, one can prove (by induction) that [size h >= 2^(rank h) - 1]
   holds for every leftist heap [h]. (Exercise: do it!) *)

(* The rank of a heap can be determined in constant time (because we store it). *)

let rank h =
  match h with
  | E ->
      0
  | T (r, _, _, _) ->
      r

(* This smart constructor creates a new [T] node. The rank is computed on
   the fly, and the children are swapped if necessary, so as to maintain
   the leftist invariant. *)

let makeT x h1 h2 =
  let r1 = rank h1
  and r2 = rank h2 in
  if r1 >= r2 then
    T (r2 + 1, x, h1, h2)
  else
    T (r1 + 1, x, h2, h1)

(* Because we always use [makeT] when we build a new tree, we can be certain
   that our trees respect the leftist invariant. (Because the type [queue]
   is abstract, the user cannot build her own trees directly. She must go
   through us.) *)

(* Somewhat surprisingly, we can begin with [merge]. *)

let rec merge h1 h2 =
  match h1, h2 with
  | E, h
  | h, E ->
      h
  | T (_, x1, a1, b1), T (_, x2, a2, b2) ->
      if x1 <= x2 then
        (* Clearly [x1] must be at the root, so as to preserve the heap
           property.

           So, we wish to call [makeT x1 ? ?], where the two question marks
           stand for two as-yet-undetermined sub-heaps. We have three sub-heaps
           at hand, namely [a1], [b1], [h2]. It seems natural to perform one
           recursive call to [merge], after which we will have only two
           sub-heaps at hand, which we can use to fill the two question marks.
           The question is, which two of the three sub-heaps should we combine?

           As far as the heap property is concerned, all three combinations are
           permitted. As far the leftist invariant is concerned, all three
           combinations are permitted too, provided we use [makeT].

           The key insight is that the sum of the ranks of the arguments must
           decrease in the recursive call, so as to guarantee that [merge] has
           complexity [O(rank h1 + rank h2)], hence logarithmic complexity. This
           means that the sub-tree [a1], whose rank is not controlled, cannot be
           an argument to the recursive call. This leaves just one possibility.
           (Using [merge b1 h2] or [merge h2 b1] makes no fundamental
           difference.) *)
        makeT x1 a1 (merge b1 h2)
      else
        makeT x2 a2 (merge h1 b2)

let empty =
  E

let singleton x =
  T(1, x, E, E)
  (* makeT x empty empty *)

let insert x h =
  merge (singleton x) h

let extract h =
  match h with
  | E ->
      None
  | T (_, x, h1, h2) ->
      Some (x, merge h1 h2)