DFS.ml

open Cascade

(* ------------------------------------------------------------------------ *)

(* This is a functor, also known as a parameterized module. *)

module Make
(* The first parameter is a graph [G]. We assume that [G] defines a
   type [node] and a function [successors] which finds the successors
   of a node. For simplicity, we assume every node has exactly two
   successors. This may seem surprising; in fact, it is not completely
   crazy: at the cost of creating auxiliary nodes, every graph can be
   represented in this form. *)
(G : sig
  type node
  val successors: node -> node * node
end)
(* The second parameter, [H], offers hashing and equality functions
   for nodes. *)
(H : Hashtbl.HashedType with type t = G.node)

= struct

(* ------------------------------------------------------------------------ *)

  (* We create a hash table, which allows marking nodes. The table is
     initially empty, so every node is unmarked. The table is created
     when the functor is applied, so if one wishes to perform two
     independent traversals of the graph, one must apply the functor
     twice. *)

  module T = Hashtbl.Make(H)
  let table = T.create 1023
  let marked node = T.mem table node
  let mark node = T.add table node ()

(* ------------------------------------------------------------------------ *)

  (* A depth-first traversal can be written as a recursive function.
     Here, a prefix enumeration is presented in [iter] style. *)

  let rec iter consume node =
    if not (marked node) then begin
      mark node;
      consume node;
      let left, right = G.successors node in
      iter consume left;
      iter consume right
    end

  (* The above code is nice and simple, but produced by the controller -- not
     by the client, hence not as easy to use as one might like. *)

(* ------------------------------------------------------------------------ *)

  (* Let us instead write this prefix enumeration as a cascade. Since it
     might not be obvious how to do this, let us do it in two steps, as
     follows. (When one is familiar with cascades, one can go directly to
     step 2.) *)

  (* Step 1. Adopt the most obvious approach: build a cascade exactly in
     the same way as one would build a list. Simple, but inefficient, as
     it uses concatenation, which has bad time complexity. *)

  let rec elements_naive node : G.node cascade =
    fun () ->
      if not (marked node) then begin
        mark node;
        let left, right = G.successors node in
        let elements_left = elements_naive left in
        let elements_right = elements_naive right in
        Cons (node, Cascade.concat elements_left elements_right)
      end
      else
        Nil

  (* Step 2. In order to eliminate the need for concatenation, take an
     accumulator as an extra parameter. The call [elements_accu node accu] is
     supposed to produce the sequence of the (unmarked) elements reachable
     from [node], followed with the sequence [accu]. *)

  let rec elements_accu node (accu : G.node cascade) : G.node cascade =
    fun () ->
      if not (marked node) then begin
        mark node;
        let left, right = G.successors node in
        Cons (node, elements_accu left (elements_accu right accu))
      end
      else
        accu()

  let elements node =
    elements_accu node (fun () -> Nil)

  (* One should note that the cascade [elements node] is not persistent: it
     can be used only once. Indeed, it marks the nodes that it finds. After
     the cascade has been used once, all nodes are marked, so an attempt to
     use the cascade again would produce an empty sequence of elements.
     Exercise: instead of producing a cascade, produce a stream, as in Amphi
     07. Thanks to memoization, a stream is automatically persistent. *)

  (* It may not be obvious how or why this code works. Contrary to [iter],
     it does not use an implicit stack of size O(graph-depth). It is able
     to interrupt itself after producing each element, so it must somehow
     store its current state using an explicit data structure. This data
     structure, which is allocated in the heap, is a chain of closures.
     In order to better see this, let us defunctionalise this code. *)

(* ------------------------------------------------------------------------ *)

  (* Defunctionalisation. *)

  (* The above code allocates closures, also known as continuations, in
     two places: [fun () -> ...] appears in the main function [elements]
     and at the beginning of [elements_accu]. So, instead of representing
     continuations as closures, we could represent them as an algebraic
     data type with two constructors, [Root] and [Node]. *)

  type continuation =
  | Root
  | Node of G.node * continuation

  (* We find that a continuation is a list of nodes: an explicit stack! *)

  (* The type [cascade_now] must also be specialized so as to mention a
     continuation instead of a function. Let us call it [result]. *)

  type result =
  | Nil
  | Cons of G.node * continuation

  (* The closure allocations [fun () -> ...] in [elements_accu] and [elements]
     are replaced with allocations of [Root] and [Node] objects. *)

  let elements_accu_defun node k =
    Node (node, k)

  let elements_defun node =
    elements_accu_defun node Root

  (* Since a continuation is not a function, it cannot be called. Instead, the
     meaning of a continuation is given by a function [apply], which simulates
     the effect of calling a continuation. This function examines the label
     ([Root] or [Node]) to see what kind of continuation we have, then
     executes the appropriate code, which corresponds to the body of the
     anonymous function [fun () -> ...] in the previous version of the
     code. *)

  let rec apply (k : continuation) : result =
    match k with
    | Root ->
        Nil
    | Node (node, k) ->
        if not (marked node) then begin
          mark node;
          let left, right = G.successors node in
          Cons (node, elements_accu_defun left (elements_accu_defun right k))
        end
        else
          apply k

  (* Although [apply] is still recursive, the only recursive call is a
     tail call, so it uses O(1) space on the implicit stack, and could
     be rewritten as a loop if desired (exercise!). *)

  (* We find that [apply] takes a continuation and returns either nothing or a
     pair of a node and a continuation. A continuation is really an immutable
     iterator in the style of TD06! *)

  (* Because the above code uses a non-standard type of cascades, we need to
     convert it to some other form to test it. Here, we convert it back to a
     list. *)

  let rec run (k : continuation) : G.node list =
    match apply k with
    | Nil ->
        []
    | Cons (x, k) ->
        x :: run k

end