//#############################################################################
//#
//# Copyright 2008-2019, Mississippi State University
//#
//# This file is part of the Loci Framework.
//#
//# The Loci Framework is free software: you can redistribute it and/or modify
//# it under the terms of the Lesser GNU General Public License as published by
//# the Free Software Foundation, either version 3 of the License, or
//# (at your option) any later version.
//#
//# The Loci Framework is distributed in the hope that it will be useful,
//# but WITHOUT ANY WARRANTY; without even the implied warranty of
//# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
//# Lesser GNU General Public License for more details.
//#
//# You should have received a copy of the Lesser GNU General Public License
//# along with the Loci Framework.  If not, see <http://www.gnu.org/licenses>
//#
//#############################################################################
#ifndef VISITOR_H
#define VISITOR_H

#include <vector>
#include <map>
#include <set>
#include <utility>
#include <Tools/intervalSet.h>
#include <Tools/digraph.h>
#include "sched_tools.h"
#include "visitorabs.h"
#include <algorithm>

namespace Loci {

  class orderVisitor: public visitor {
  public:
    virtual ~orderVisitor() {} ;
    virtual void visit(loop_compiler& lc) ;
    virtual void visit(dag_compiler& dc) ;
    virtual void visit(conditional_compiler& cc) ;
  public:
    static std::vector<digraph::vertexSet>
    order_dag(const digraph &g,
              digraph::vertexSet start_vertices = EMPTY,
              digraph::vertexSet only_vertices =
              interval(UNIVERSE_MIN,UNIVERSE_MAX)
              ) ;
  } ;

  class assembleVisitor: public visitor {
  public:
    assembleVisitor(fact_db& fd, sched_db& sd,
                    const variableSet& arv,
                    const std::map<variable,
                    std::pair<rule,CPTR<joiner> > >& ri,
                    const variableSet& dt,
                    const std::map<variable,std::string>& sc,
                    const std::map<variable,std::set<std::string> >& sac,
                    const std::map<variable,std::set<std::string> >& drc) ;
    virtual ~assembleVisitor() {}
    virtual void visit(loop_compiler& lc) ;
    virtual void visit(dag_compiler& dc) ;
    virtual void visit(conditional_compiler& cc) ;
  private:
    fact_db& facts ;
    sched_db& scheds ;
    const variableSet all_reduce_vars ;
    std::map<variable,std::pair<rule,CPTR<joiner> > > reduceInfo ;
    // targets generated by rules in dynamic keyspace
    // we don't create barrier and reduce var compiler for them
    variableSet dynamic_targets ;
    void compile_dag_sched(std::vector<rule_compilerP> &dag_comp,
                           const std::vector<digraph::vertexSet> &dag_sched,
                           const rulecomp_map& rcm,
                           const digraph& dag) ;
    variableSet keyspace_critical_vars ;
    std::map<variable, std::string> var2keyspace ;
    // these are used for the dynamic clone invalidator
    std::map<variable, std::string> self_clone ;
    std::map<variable, std::set<std::string> > shadow_clone ;
    variableSet self_clone_vars ;
    variableSet shadow_clone_vars ;
    variableSet clone_vars ;
    // these are used for dynamic rule control reset
    std::map<variable, std::set<std::string> > drule_ctrl ;
    variableSet drule_inputs ;
  } ;

  class graphVisualizeVisitor: public visitor {
  public:
    virtual ~graphVisualizeVisitor() {}
    virtual void visit(loop_compiler& lc) ;
    virtual void visit(dag_compiler& dc) ;
    virtual void visit(conditional_compiler& cc) ;
  } ;
  
  // generate allocation information table
  // used in top - down order
  class allocInfoVisitor: public visitor {
  public:
    // need graph_sn information
    allocInfoVisitor(const std::set<int>& gsn,
                     const std::map<variable,variableSet>& rvs2t,
                     const std::map<variable,variableSet>& rvt2s,
                     const variableSet& allrecurvars,
                     const std::set<int>& lsn,
                     const std::map<int,variableSet>& rot_vt,
                     const std::map<int,variableSet>& lsharedt,
                     const variableSet& untyped_vars):
      allocated_vars(untyped_vars), graph_sn(gsn),
      recurs2t(rvs2t),recurt2s(rvt2s),
      all_recur_vars(allrecurvars),loop_sn(lsn),
      rotate_vtable(rot_vt),loop_shared_table(lsharedt) {}
    virtual ~allocInfoVisitor() {} 
    virtual void visit(loop_compiler& lc) ;
    virtual void visit(dag_compiler& dc) ;
    virtual void visit(conditional_compiler& cc) ;
    std::map<int,variableSet> get_alloc_table() const
    {return alloc_table ;}
    std::map<int,variableSet> get_loop_alloc_table() const
    {return loop_alloc_table ;}
  protected:
    // gather information for gather_info function use
    // get working_vars in the graph
    variableSet get_start_info(const digraph& gr,int id) ;
    // return the all the variables that allocated
    // in the graph
    variableSet gather_info(const digraph& gr,
                            const variableSet& working_vars) ;
    std::map<int,variableSet> alloc_table ;
    // holds info about the allocation of every loop super node
    std::map<int,variableSet> loop_alloc_table ;
    // variables that have been allocated up to now
    // or reserved not to be allocated
    variableSet allocated_vars ; 
    // super nodes that have a graph inside it
    std::set<int> graph_sn ;
    // the recurrence variables mapping table
    std::map<variable,variableSet> recurs2t ;
    std::map<variable,variableSet> recurt2s ;
    // the set of all recurrence variables
    variableSet all_recur_vars ;
    // set that holds all the loop node id
    std::set<int> loop_sn ;
    // table that holds rotate list variables in each loop
    std::map<int,variableSet> rotate_vtable ;
    // table that holds the shared variables
    // between adv & col part of each loop
    std::map<int,variableSet> loop_shared_table ;
  } ;

  // used to decorate the graph to include allocation rules
  class allocGraphVisitor: public visitor {
  public:
    allocGraphVisitor(const std::map<int,variableSet>& t,
                      const std::set<int>& lsn,
                      const std::map<int,variableSet>& rot_vt,
                      const std::map<variable,variableSet>& prio_s2t,
                      const variableSet& prio_sources)
      :alloc_table(t),loop_sn(lsn),rotate_vtable(rot_vt),
       prio_s2t(prio_s2t),prio_sources(prio_sources) {}
    virtual ~allocGraphVisitor() {}
    virtual void visit(loop_compiler& lc) ;
    virtual void visit(dag_compiler& dc) ;
    virtual void visit(conditional_compiler& cc) ;
  protected:
    void edit_gr(digraph& gr,rulecomp_map& rcm,int id) ;
    std::map<int,variableSet> alloc_table ;
    // set that holds all the loop node id
    std::set<int> loop_sn ;
    // table that holds rotate list variables in each loop
    std::map<int,variableSet> rotate_vtable ;
    // priority rules information
    std::map<variable,variableSet> prio_s2t ;
    variableSet prio_sources ;
  } ;

  // memory profiling allocate decoration compiler
  class memProfileAllocDecoVisitor: public visitor {
  public:
    memProfileAllocDecoVisitor(const std::map<int,variableSet>& t,
                               const std::map<int,variableSet>& rot_vt) ;
    virtual ~memProfileAllocDecoVisitor() {}
    virtual void visit(loop_compiler& lc) ;
    virtual void visit(dag_compiler& dc) ;
    virtual void visit(conditional_compiler& cc) ;
  protected:
    void edit_gr(digraph& gr,rulecomp_map& rcm,int id) ;
    std::map<int,variableSet> alloc_table ;
    // all variables need to be allocated
    variableSet all_alloc_vars ;
    // loop rotation variables
    variableSet loop_rotation_vars ;
  } ;

  // generate delete information table
  // used in top - down order
  class deleteInfoVisitor: public visitor {
  public:
    // need loop allocation info and recurrence variable info
    deleteInfoVisitor(const std::map<int,variableSet>& lat,
                      const std::map<variable,variableSet>& rvt2s,
                      const std::map<variable,variableSet>& rvs2t,
                      const std::set<int>& gsn,
                      const std::map<int,int>& pnt,
                      const std::map<int,int>& lct,
                      const std::set<int>& lsn,
                      const std::set<int>& csn,
                      const std::map<int,variableSet>& rot_vt,
                      const std::map<int,variableSet>& lsharedt,
                      const variableSet& promoted_rep,
                      const variableSet& reserved_vars) ;
    virtual ~deleteInfoVisitor() {}
    virtual void visit(loop_compiler& lc) ;
    virtual void visit(dag_compiler& dc) ;
    virtual void visit(conditional_compiler& cc) ;
    std::map<int,variableSet> get_delete_table() const
    {return delete_table ;}
    std::map<variable,ruleSet> get_recur_source_other_rules() const
    {return recur_source_other_rules ;}
  protected:
    // determine whether variables in working_vars
    // should be deleted in this graph

    // return all the variables that deleted
    // in the graph
    variableSet gather_info(const digraph& gr,
                            const variableSet& working_vars) ;

    // gather information for gather_info function use
    // get working_vars in the graph
    variableSet get_start_info(const digraph& gr,int id) ;
    // constrain deletions of graphs that are inside n number of loops
    int only_loop_alloc(variableSet& working_vars,int id,int loop_num) ;
    // the looping algorithm for schedule deletion of variables
    // in the multilevel graph
    void looping_algr(const variableSet& working_vars,const digraph& dg,
                      int id,int start_loop_num) ;
    // given a rule set and a graph
    // return true if there is only one super node and
    // all other rules have path to the super node, or if
    // it is only one super node
    // return false otherwise
    bool let_it_go(const digraph& gr, ruleSet rules, const variable& v) ;
    // the delete information table
    std::map<int,variableSet> delete_table ;
    // variables that have been deleted up to now or reserved
    // not to be deleted
    variableSet deleted_vars ;
    // info about recurrence variables (target to source map)
    std::map<variable,variableSet> recur_vars_t2s ;
    // source to target map
    std::map<variable,variableSet> recur_vars_s2t ;
    // if a variable is a recurrence target variable,
    // and there are other rules (other than the recurrence rule)
    // that its recurrence source variables reach in the same graph,
    // we then record such rules in this table
    std::map<variable,ruleSet> recur_source_other_rules ;
    // info about every loop's allocation
    std::map<int,variableSet> loop_alloc_table ;
    // super nodes that have a graph inside it
    std::set<int> graph_sn ;
    // table that records the parent node 
    std::map<int,int> pnode_table ;
    // table that holds the collapse node of each loop
    std::map<int,int> loop_ctable ;
    // set that holds all the loop node id
    std::set<int> loop_sn ;
    // set that holds all the conditional node id
    std::set<int> cond_sn ;
    // table that holds rotate list variables in each loop
    std::map<int,variableSet> rotate_vtable ;
    // table that holds the shared variables
    // between adv & col part of each loop
    std::map<int,variableSet> loop_shared_table ;
    // all the loop rotate variables
    variableSet all_rot_vars ;
    // representitive variable for deletion
    // of eachpromoted variable cluster
    variableSet promoted_rep ;
    // variable that stores the current loop shared variables
    // for determine the deletion in a loop
    variableSet current_lshared_vars ;

    // The following additions are here to fix problems in
    // the current deallocation algorithms that do not communicate
    // information properly between super nodes. In the future,
    // it would be better to reimplement the allocation and deallocation
    // algorithms entirely in a much cleaner way.
    
    // this structure is used to record the responsibility to process
    // deallocation requests for interface variables for every super node.
    // interface variables are those variables that across the boundary
    // of super nodes, i.e., the inputs and outputs of a super node.
    // This structure essentially records the escape information computed
    // in the "let_it_go" function.
    std::map<int,variableSet> sn_del_interface ;
  } ;

  // visitor that get all the recurrence variables in the
  // multilevel graph
  class recurInfoVisitor: public visitor {
    public:
    virtual ~recurInfoVisitor() {}
    virtual void visit(loop_compiler& lc) ;
    virtual void visit(dag_compiler& dc) ;
    virtual void visit(conditional_compiler& cc) ;
    virtual void visit(impl_recurse_compiler& irc) ;
    virtual void visit(recurse_compiler& rc) ;
    std::map<variable,variableSet> get_recur_vars_t2s() const
    {return recur_vars_t2s ;}
    std::map<variable,variableSet> get_recur_vars_s2t() const
    {return recur_vars_s2t ;}
    variableSet get_recur_source_vars() const
    {return recur_source_vars ;}
    variableSet get_recur_target_vars() const
    {return recur_target_vars ;}
    
    std::map<variable,variableSet> get_generalize_t2s() const
    {return generalize_t2s ;}
    std::map<variable,variableSet> get_generalize_s2t() const
    {return generalize_s2t ;}
    variableSet get_generalize_source_vars() const
    {return generalize_source_vars ;}
    variableSet get_generalize_target_vars() const
    {return generalize_target_vars ;}
    
    std::map<variable,variableSet> get_promote_t2s() const
    {return promote_t2s ;}
    std::map<variable,variableSet> get_promote_s2t() const
    {return promote_s2t ;}
    variableSet get_promote_source_vars() const
    {return promote_source_vars ;}
    variableSet get_promote_target_vars() const
    {return promote_target_vars ;}
    
    std::map<variable,variableSet> get_priority_t2s() const
    {return priority_t2s ;}
    std::map<variable,variableSet> get_priority_s2t() const
    {return priority_s2t ;}
    variableSet get_priority_source_vars() const
    {return priority_source_vars ;}
    variableSet get_priority_target_vars() const
    {return priority_target_vars ;}
    
    std::map<variable,variableSet> get_rename_t2s() const
    {return rename_t2s ;}
    std::map<variable,variableSet> get_rename_s2t() const
    {return rename_s2t ;}
    variableSet get_rename_source_vars() const
    {return rename_source_vars ;}
    variableSet get_rename_target_vars() const
    {return rename_target_vars ;}

    // this function returns *all* reachable recurrence
    // variables from the given variableSet. we look reachable
    // variables in both the "recur_vars_t2s" and "recur_vars_s2t"
    // table, i.e., these two tables form a directed graph for
    // recurrence variable
    variableSet
    get_reachable(const variableSet&) const ;
  protected:
    void gather_info(const digraph& gr) ;
    void gather_info2(const ruleSet& rs) ;
    // from x{n} -> x, i.e. from target -> source
    std::map<variable,variableSet> recur_vars_t2s ; 
    // from source -> target, e.g. x -> x{n}
    std::map<variable,variableSet> recur_vars_s2t ;

    std::map<variable,variableSet> generalize_t2s ;
    std::map<variable,variableSet> generalize_s2t ;

    std::map<variable,variableSet> promote_t2s ;
    std::map<variable,variableSet> promote_s2t ;

    std::map<variable,variableSet> priority_t2s ;
    std::map<variable,variableSet> priority_s2t ;

    std::map<variable,variableSet> rename_t2s ;
    std::map<variable,variableSet> rename_s2t ;
    
    // set of recurrence source and target variables
    variableSet recur_source_vars ;
    variableSet recur_target_vars ;

    variableSet generalize_source_vars ;
    variableSet generalize_target_vars ;

    variableSet promote_source_vars ;
    variableSet promote_target_vars ;

    variableSet priority_source_vars ;
    variableSet priority_target_vars ;

    variableSet rename_source_vars ;
    variableSet rename_target_vars ;
  } ;

  // used to decorate the graph to include deletion rules
  class deleteGraphVisitor: public visitor {
  public:
    deleteGraphVisitor(const std::map<int,variableSet>& t,
                       const std::map<variable,ruleSet>& rsor)
        :delete_table(t),recur_source_other_rules(rsor){}
    virtual ~deleteGraphVisitor() {} 
    virtual void visit(loop_compiler& lc) ;
    virtual void visit(dag_compiler& dc) ;
    virtual void visit(conditional_compiler& cc) ;
  protected:
    void edit_gr(digraph& gr,rulecomp_map& rcm,int id) ;
    std::map<int,variableSet> delete_table ;
    // if a variable is a recurrence target variable,
    // and there are other rules (other than the recurrence rule)
    // that its recurrence source variables reach in the same graph,
    // this table holds such rules
    std::map<variable,ruleSet> recur_source_other_rules ;
  } ;

  // visitor to get some inter- super node information
  class snInfoVisitor: public visitor {
    public:
    virtual ~snInfoVisitor() {}
    virtual void visit(loop_compiler& lc) ;
    virtual void visit(dag_compiler& dc) ;
    virtual void visit(conditional_compiler& cc) ;
    std::set<int> get_graph_sn() const
    {return graph_sn ;}
    std::set<int> get_loop_sn() const
    {return loop_sn ;}
    std::set<int> get_cond_sn() const
      {return cond_sn ;}
    std::map<int,std::set<int> > get_subnode_table() const
    {return subnode_table ;}
    std::map<int,int> get_loop_col_table() const
    {return loop_col_table ;}
  protected:
    void fill_subnode_table(const digraph& gr, int id) ;
    // super node that has a graph in it
    // i.e. the loop, dag, and conditional super node
    std::set<int> graph_sn ;
    // all the loop super node id
    std::set<int> loop_sn ;
    // all the conditional node id
    std::set<int> cond_sn ;
    // table that holds all the super nodes' id inside a super node
    std::map<int,std::set<int> > subnode_table ;
    // table that holds the collapse node of each loop
    std::map<int,int> loop_col_table ;
  } ;

  // check the if the allocation and deletion table are
  // consistent
  class allocDeleteStat {
    variableSet allocated_vars ;
    variableSet deleted_vars ;
    std::map<variable,variableSet> recur_vars_t2s ;
    std::map<variable,variableSet> recur_vars_s2t ;
    variableSet recur_source_vars ;
    variableSet recur_target_vars ;
  public:
    allocDeleteStat(const std::map<int,variableSet>& alloc_table,
                    const std::map<int,variableSet>& delete_table,
                    const std::map<variable,variableSet>& t2s,
                    const std::map<variable,variableSet>& s2t,
                    const variableSet& rsv,const variableSet& rtv) ;
    std::ostream& report(std::ostream& s) const ;
  } ;

  // function to get the multilevel graph parent hierarchy table
  std::map<int,int>
  get_parentnode_table(const std::map<int,std::set<int> >& subnode_table) ;
  // function to get the allocation of each loop
  std::map<int,variableSet>
  get_loop_alloc_table(const std::map<int,variableSet>& alloc_table,
                       const std::map<int,std::set<int> >& subnode_table,
                       const std::set<int>& loop_sn,
                       const std::set<int>& graph_sn,
                       const std::map<variable,variableSet>& rvs2t) ;

  // visitor to compute the loop_rotate lists
  class rotateListVisitor: public visitor {
  public:
    rotateListVisitor(sched_db& sd,
                      // these are the rename tables
                      const std::map<variable,variableSet>& s2t,
                      const std::map<variable,variableSet>& t2s)
        :scheds(sd),rvs2t(s2t),rvt2s(t2s) {}
    virtual ~rotateListVisitor() {}
    virtual void visit(loop_compiler& lc) ;
    virtual void visit(dag_compiler& dc) {}
    virtual void visit(conditional_compiler& cc) {}
    std::map<int,variableSet> get_rotate_vars_table() const
    {return rotate_vars_table ;}
    std::map<int,variableSet> get_loop_shared_table() const
    {return loop_shared_table ;}
    const std::map<variable,variableSet>& get_overlap_rotvars() const
    {return overlap_rotvars ;}
  private:
    // reference to the schedule database
    sched_db& scheds ;
    // table that holds variables in each loop's
    // rotate list
    std::map<int,variableSet> rotate_vars_table ;
    // table holds the shared varibles between adv & col part of loop
    std::map<int,variableSet> loop_shared_table ;
    //   rename source -> target table
    // & rename target -> source table
    std::map<variable,variableSet> rvs2t, rvt2s ;
    // this map records the overlapped rotation var list
    std::map<variable,variableSet> overlap_rotvars ;
  } ;

  // visitor that checks if a graph has cycle
  class dagCheckVisitor: public visitor {
  public:
      dagCheckVisitor(bool v=false):viz(v) {}
    virtual ~dagCheckVisitor() {}
    virtual void visit(loop_compiler& lc) ;
    virtual void visit(dag_compiler& dc) ;
    virtual void visit(conditional_compiler& cc) ;
  private:
    bool viz ;
    digraph cycle ;
    bool check_dag(digraph gr) ;
    std::ostream& visualize(std::ostream& s) const ;
  } ;

  // visitor that discover all the un-typed variables
  // in the multilevel graph
  class unTypedVarVisitor: public visitor {
  public:
    unTypedVarVisitor(const std::map<variable,variableSet>& s2t,
                      const std::map<variable,variableSet>& t2s,
                      const variableSet& input) ;

    virtual ~unTypedVarVisitor() {}
    virtual void visit(loop_compiler& lc) ;
    virtual void visit(dag_compiler& dc) ;
    virtual void visit(conditional_compiler& cc) ;
    variableSet get_untyped_vars() const
    {return untyped_vars ;}
  private:
    void discover(const digraph& gr) ;
    variableSet untyped_vars ;
    variableSet typed_vars ;
    // recurrence variable mapping table
    // source to target
    std::map<variable,variableSet> recur_vars_s2t ;
    // target to source
    std::map<variable,variableSet> recur_vars_t2s ;
  } ;

  // visitor that collects all the variables
  // in the multilevel graph
  class getAllVarVisitor: public visitor {
    public:
    virtual ~getAllVarVisitor() {}
    virtual void visit(loop_compiler& lc) ;
    virtual void visit(dag_compiler& dc) ;
    virtual void visit(conditional_compiler& cc) ;
    variableSet get_all_vars_ingraph() const
    {return all_vars ;}
  private:
    void collect_vars(const digraph& gr) ;
    variableSet all_vars ;
  } ;

  // visitor that does preprocess for promoted variables
  // determine which promoted variable should be deleted.
  // e.g. there are x -> x{n} -> x{n,it}, we can only delete
  // one of them. For generalize,priority and renamed variables,
  // we should always delete and process the final one.
  class promotePPVisitor: public visitor {
  public:
    promotePPVisitor(const std::map<variable,variableSet>& pt2s,
                     const std::map<variable,variableSet>& ps2t,
                     const variableSet& psource,
                     const variableSet& ptarget,
                     const std::set<int>& gsn,
                     const std::set<int>& csn,
                     variableSet& input) ;
    virtual ~promotePPVisitor() {}
    virtual void visit(loop_compiler& lc) ;
    virtual void visit(dag_compiler& dc) ;
    virtual void visit(conditional_compiler& cc) ;
    variableSet get_rep() const
    {return rep ;}
    variableSet get_remaining() const
    {return remaining ;}
  private:
    // set that holds the representitive promote variable
    variableSet rep ;
    // set that holds the remaining promote variables
    // that we want to reserve
    variableSet remaining ;
    variableSet processed ;
    bool is_rep(const digraph& gr, ruleSet rules) ;
    void pick_rep(const digraph& gr) ;
    variableSet promote_source_vars, promote_target_vars ;
    std::map<variable,variableSet> promote_t2s, promote_s2t ;
    std::set<int> graph_sn ;
    // set that holds all the conditional node id
    std::set<int> cond_sn ;
    variableSet reserved_vars ;
  } ;

  // function to analyze the renamed target variable cluster
  // and find out a representitive variable for each cluster,
  // only this variable is allowed to be deleted
  variableSet pick_rename_target(const std::map<variable,variableSet>& s2t,
                                 const std::map<variable,variableSet>& t2s,
                                 const variableSet& allvars) ;

  // function that checks some preconditions that all the recurrence
  // variables should meet
  void check_recur_precondition(const recurInfoVisitor& v,
                                const variableSet& input) ;

  // visitor that discover all the variables that
  // are not suitable for chomping
  class chompPPVisitor: public visitor {
  public:
    chompPPVisitor(fact_db& fd,
                   const std::map<int,variableSet>& rot_vt,
                   const std::map<int,variableSet>& lsharedt,
                   const variableSet& rv) ;
    virtual ~chompPPVisitor() {}
    virtual void visit(loop_compiler& lc) ;
    virtual void visit(dag_compiler& dc) ;
    virtual void visit(conditional_compiler& cc) ;
    variableSet get_good_vars() const
      {return good_vars ;}
    variableSet get_bad_vars() const
      {return bad_vars ;}
  private:
    void discover(const digraph& gr) ;
    // variables that are candidate for chomping
    variableSet good_vars ;
    // variables that cannot be chomped
    variableSet bad_vars ;
    // variables that have seen
    variableSet seen_vars ;
    // reference to the fact database
    fact_db& facts ;
    // all rotate lists variables
    variableSet rotate_vars ;
    // all loop shared variables
    variableSet loop_shared_vars ;
    // all the rename variables all (source + target)
    variableSet rename_vars ;
  } ;

  // visitor that finds rule chains that are suitable for chomping
  typedef std::pair<digraph,variableSet> chomp_chain ;
  class chompRuleVisitor: public visitor {
  public:
    chompRuleVisitor(const variableSet& gv,
                     const variableSet& bv,
                     const std::map<rule,rule>& a2u)
        :good_vars(gv),bad_vars(bv),apply2unit(a2u) {}
    virtual ~chompRuleVisitor() {}
    virtual void visit(loop_compiler& lc) ;
    virtual void visit(dag_compiler& dc) ;
    virtual void visit(conditional_compiler& cc) ;
    std::map<int,std::list<chomp_chain> > get_all_chains() const
      {return all_chains ;}
    variableSet get_all_chomped_vars() const
      {return all_chomped_vars ;}
    std::ostream& visualize(std::ostream& s) const ;
    std::ostream& summary(std::ostream& s) const ;
  private:
    std::list<chomp_chain> find_chain(const digraph& gr) ;
    void edit_gr(digraph& gr,const std::list<chomp_chain>& cc,
                 rulecomp_map& rcm) ;
    std::map<int,std::list<chomp_chain> > all_chains ;
    variableSet all_chomped_vars ;
    variableSet good_vars ;
    variableSet bad_vars ;
    // apply to unit map
    std::map<rule,rule> apply2unit ;
  } ;

  // visitor that assemble and compiler all the chomp_compilers
  class compChompVisitor: public visitor {
  public:
    compChompVisitor(const std::map<variable,
                     std::pair<rule,CPTR<joiner> > >& ri):
      reduceInfo(ri) {}
    virtual ~compChompVisitor() {}
    virtual void visit(loop_compiler& lc) ;
    virtual void visit(dag_compiler& dc) ;
    virtual void visit(conditional_compiler& cc) ;
  private:
    std::map<variable,std::pair<rule,CPTR<joiner> > > reduceInfo ;
    void process_rcm(rulecomp_map& rcm) ;
    void schedule_chomp(chomp_compiler& chc) ;
    void compile_chomp(chomp_compiler& chc, const rulecomp_map& rcm) ;
  } ;

  // graph schedule visitor that is lazy on allocation
  class simLazyAllocSchedVisitor: public visitor {
  public:
    virtual ~simLazyAllocSchedVisitor() {}
    virtual void visit(loop_compiler& lc) ;
    virtual void visit(dag_compiler& dc) ;
    virtual void visit(conditional_compiler& cc) ;
  private:
    std::vector<digraph::vertexSet>
      get_firstSched(const digraph& gr) const ;
  } ;

  // visitor that collect the unit and apply information
  class unitApplyMapVisitor: public visitor {
  public:
    virtual ~unitApplyMapVisitor() {}
    virtual void visit(loop_compiler& lc) ;
    virtual void visit(dag_compiler& dc) ;
    virtual void visit(conditional_compiler& cc) ;
    std::map<variable,std::pair<rule,CPTR<joiner> > > get_reduceInfo()
      {return reduceInfo ;}
    std::map<rule,rule> get_apply2unit()
      {return apply2unit ;}
    variableSet get_all_reduce_vars()
      {return all_reduce_vars ;}
  private:
    std::map<variable,std::pair<rule,CPTR<joiner> > > reduceInfo ;
    std::map<rule,rule> apply2unit ;
    variableSet all_reduce_vars ;
    void gather_info(const digraph& gr) ;
  } ;
  
  // visitor that reports the allocation and deletion
  // numbers in each super node
  class allocDelNumReportVisitor: public visitor {
  public:
    allocDelNumReportVisitor(std::ostream& sout=std::cout):s(sout) {}
    virtual ~allocDelNumReportVisitor() {}
    virtual void visit(loop_compiler& lc) ;
    virtual void visit(dag_compiler& dc) ;
    virtual void visit(conditional_compiler& cc) ;
  private:
    void adNum(const digraph& gr,int& alloc_num,int& del_num) ;
    std::ostream& s ;
  } ;

  // scheduling visitor that is greedy on memory
  // usage and may increase the sychronization points
  class memGreedySchedVisitor: public visitor {
  public:
    memGreedySchedVisitor(fact_db& fd): facts(fd) {}
    virtual ~memGreedySchedVisitor() {}
    virtual void visit(loop_compiler& lc) ;
    virtual void visit(dag_compiler& dc) ;
    virtual void visit(conditional_compiler& cc) ;
  private:
    std::vector<digraph::vertexSet>
      get_firstSched(const digraph& gr) ;
    fact_db& facts ;
  } ;
  
  //clear the schedules and compilers that already exist 
  class SchedClearVisitor: public visitor {
  public:
    SchedClearVisitor(){}
   
    virtual void visit(loop_compiler& lc) ;
    virtual void visit(dag_compiler& dc) ;
   virtual void visit(conditional_compiler& cc) ;
  private:
    
  } ;

  
  // abstract interface of the prioritize function
    struct PrioGraph {
    virtual ~PrioGraph() {}
    virtual void operator()(const digraph&,
                            std::map<int_type,int_type>&) const = 0 ;
    } ;



  

  // generic scheduling visitor that schedules
  // a graph according to the weight of each vertex
  class graphSchedulerVisitor: public visitor {
  public:
    graphSchedulerVisitor(const PrioGraph& pf):prio(pf) {}
    virtual ~graphSchedulerVisitor() {}
    virtual void visit(loop_compiler& lc) ;
    virtual void visit(dag_compiler& dc) ;
    virtual void visit(conditional_compiler& cc) ;
  public:
    std::vector<digraph::vertexSet>
      schedule(const digraph& gr) ;
  private:
    const PrioGraph& prio ;
  } ;  

  // overload "<<" to print out an std::set
  template<typename T>
  inline std::ostream& operator<<(std::ostream& s, const std::set<T>& ss) {
    typename std::set<T>::const_iterator si ;
    si=ss.begin() ;
    s << *si ;
    ++si ;
    for(;si!=ss.end();++si) {
      s << " " ;
      s << *si ;
    }

    return s ;
  }
  
  // visitor that modify the graph and rulecompiler map
  // for all the rules that exist in a dynamic keyspace
  // (which is a keyspace that rules will be evaluated
  // "on-site" and no static existential analysis and
  // pruning process need to be taken with these rules)

  // this visitor also collects the input signatures of
  // all the dynamic rules for later use.

  // this visitor also computes the dynamic control
  // call graph, that is, var->drule mapping to be
  // used at run-time to reset the dynamic rule control
  class DynamicKeyspaceVisitor: public visitor {
  public:
    DynamicKeyspaceVisitor(fact_db& fd, sched_db& sd,
                           const std::map<rule,rule>& a2u,
                           const std::map<variable,variableSet>& orv)
      :facts(fd),scheds(sd),apply2unit(a2u),overlap_rotvars(orv) {}
    virtual ~DynamicKeyspaceVisitor() {}
    virtual void visit(loop_compiler& lc) ;
    virtual void visit(dag_compiler& dc) ;
    virtual void visit(conditional_compiler& cc) ;
    virtual void visit(impl_recurse_compiler& irc) ;
    virtual void visit(recurse_compiler& rc) ;

    variableSet
    get_dynamic_targets() const {return dynamic_targets ;}

    const std::map<variable, std::set<std::string> >&
    get_shadow_clone() const {return shadow_clone ;}
    const std::map<variable, std::string>&
    get_self_clone() const {return self_clone ;}

    const std::map<variable, std::set<std::string> >&
    get_drule_ctrl() const {return drule_ctrl ;}
  protected:
    // reference to fact database
    fact_db& facts ;
    // ref to sched database
    sched_db& scheds ;
    void
    replace_compiler(digraph& gr,rulecomp_map& rcm,int id) ;
    void
    replace_compiler(const ruleSet& rs) ;
  private:
    std::map<rule,rule> apply2unit ;
    // this map records the overlapped rotation var list
    std::map<variable,variableSet> overlap_rotvars ;
    // all targets generated by dynamic keyspace rules
    // (excluding those apply rules targets that across keyspace)
    // these are the ones that don't need to be allocated by
    // the graph decoration process in the alloc compiler, however,
    // they do need to be deallocated.
    variableSet dynamic_targets ;
    // the dynamic rules involved. (does not include dynamic apply rules)
    ruleSet dynamic_impl_rules ;
    // the dynamic apply rules
    ruleSet dynamic_apply_rules ;
    // data-structure for the variables that are involved in a
    // input chain (e.g., A->B-C) and the existence keyspace of
    // their corresponding clone data.

    // the shadow clone is for variables referenced across
    // keyspaces, they should a dynamic clone in the referencing
    // keyspace. e.g., if "T" is a tunnel in keyspace K1 to K2,
    // and we have an input chain: A->T->C->D. In this chain, C and D
    // exist in keyspace K2 but are also accessed by keyspace K1.
    // Suppose K1 is dynamic, and therefore C and D should have
    // a dynamic clone copy in keyspace K1 so that K1 can dynamically
    // request the portion it needs. therefore in the shadow
    // clone, we record C->K1, D->K1.
    std::map<variable, std::set<std::string> > shadow_clone ;
    // the self-clone is for dynamic keyspace variable that are
    // referenced inside its own keyspace. e.g., in the above
    // example, "T" belongs to keyspace K1 and is referred to
    // in the input chain. since this is not cross keyspace
    // reference, "T"'s clone is itself in the fact_db's storage.
    // the self clone map records T->K1, meaning "T" from K1
    // should have a clone. NOTE: this is a one to one mapping,
    // i.e., if T->K1, then its not possible for T to have
    // other self clone mapping records. We assume a variable
    // cannot contain data from multiple keyspaces.
    std::map<variable, std::string> self_clone ;
    // this map is for recording the input relations that
    // a variable has on any dynamic rules
    std::map<variable, std::set<std::string> > drule_ctrl ;
  } ;

  // this visitor will create the compiler nodes in the graph
  // that does the dynamic clone region invalidation
  class DynamicCloneInvalidatorVisitor: public visitor {
  public:
    DynamicCloneInvalidatorVisitor
    (fact_db& fd,
     const std::map<variable,std::string>& sc,
     const std::map<variable,std::set<std::string> >& sac) ;
    virtual ~DynamicCloneInvalidatorVisitor() {}
    virtual void visit(loop_compiler& lc) ;
    virtual void visit(dag_compiler& dc) ;
    virtual void visit(conditional_compiler& cc) ;
  private:
    void
    edit_graph(digraph& gr, rulecomp_map& rcm) ;
    
    fact_db& facts ;
    std::map<variable, std::string> self_clone ;
    std::map<variable, std::set<std::string> > shadow_clone ;
    variableSet self_clone_vars ;
    variableSet shadow_clone_vars ;
    variableSet clone_vars ;
  } ;

  // overload "<<" to print out an std::map
  template<typename T1, typename T2>
  inline std::ostream& operator<<(std::ostream& s, const std::map<T1,T2>& m) {
    typename std::map<T1,T2>::const_iterator mi ;
    for(mi=m.begin();mi!=m.end();++mi) {
      s << mi->first
        << ": " << mi->second
        << std::endl ;
    }

    return s ;
  }

  // pretty printing of a rule's signature
  // i.e. remove namespace info, if any
  inline std::string pretty_sig(const rule& r) {
    //the following line is the simplest, but it does not
    //include the qualify, such like "SN1:" in a super node
    //return r.get_info().desc.rule_identifier() ;
    std::string name = r.get_info().name() ;
    if(r.type() == rule::INTERNAL)
      return name ;
    else {
      std::string::iterator pos ;
      pos = std::find(name.begin(),name.end(),'#') ;
      return std::string( (pos==name.end()?name.begin():pos+1),name.end()) ;
    }
  }

  // function to get all the recur targets of a variableSet
  // from a given recur mapping table
  variableSet
  get_recur_target_for_vars(const variableSet& vars,
                            const std::map<variable,variableSet>& t) ;

  // priority functions on a graph
  // computation greedy prioritize
  struct compGreedyPrio: public PrioGraph {
    void operator()(const digraph& gr,
                    std::map<int_type,int_type>& pmap) const ;
  } ;

  // memory greedy prioritize
  struct memGreedyPrio: public PrioGraph {
    memGreedyPrio(fact_db& fd): facts(fd) {}
    void operator() (const digraph& gr,
                     std::map<int_type,int_type>& pmap) const ;
    private:
    fact_db& facts ;
  } ;
  // this is a random greedy prioritize functor
  struct RandomPrio: public PrioGraph {
    void operator() (const digraph& gr,
                     std::map<int_type,int_type>& pmap) const ;
  } ;
  
  // here we define and implement a disjoint-set data structure
  // for computing incremental connected components in a graph.
  // the basic organization and algorithm of this implementation
  // is taken from the CLRS book section 21.3, i.e., a disjoint-set
  // forests with "union by rank" and "path compression" optimizations.
  // 
  // this disjoint-set data structure assumes that there are "n"
  // input objects represented by integers from 0, ..., n-1. 
  // the main data are organized in two arrays with size "n"
  // these two arrays are used for recording the disjoint-set
  // structure during the union operations and are also used to
  // extract the individual sets. the two arrays are "parent" and
  // "rank" arrays, for recording the parent and rank of each object.
  //
  // this implementation also supports "scattered" objects as long as
  // the objects are identified by positive numbers. for example,
  // objects 0, 1, 4, 5, 6, 15 can be all the objects participating
  // the disjoint sets operations. there is no need to remapped them
  // to contiguous ids (i.e., [0,1,2,3,4,5]). However the initialization
  // method must specify the disjoint set size to be "16", i.e., the
  // largest id+1. The findSet may return "-1" to indicate that the
  // object is not yet in the disjoint sets. for example, using the
  // previous example, if we make a call "findSet(10)", then it will
  // return "-1" indicating that object "10" hasn't been constructed
  // by "makeSet(10)" yet, and are therefore not in the sets.
  class DisjointSets {
    typedef DisjointSets self ;
  public:
    // initialize with "n" objects
    DisjointSets(int n)
      :parent(std::vector<int>(n,-1)),
       rank(std::vector<int>(n,0)),numObj(n) {}
    DisjointSets(const self& d)
      :parent(d.parent),rank(d.rank),numObj(d.numObj) {}
    // returns the size of the disjoint sets
    int size() const {
      return numObj ;
    }
    // this method will resize the disjoint sets
    // passing in a larger size will cause the object number
    // to increase. passing in a smaller size will cause
    // the last (newSize-size()) objects to be erased
    void resize(int s) {
      if(s < 0) {
        std::ostringstream oss ;
        oss << "DisjointSets::resize(" << s
            << ") has a negative object id!" ;
        throw Loci::StringError(oss.str()) ;
      }
      parent.resize(s,-1) ;
      rank.resize(s,0) ;
      numObj = s ;
    }
    // creats a single set containing object x
    void makeSet(int x) {
      checkRange(x) ;
      parent[x] = x ;
      rank[x] = 0 ;
    }
    // creates a single set containing object x,
    // IF x is NOT already in the sets
    void tryMakeSet(int x) {
      if(findSet(x)==-1)
        makeSet(x) ;
    }
    // creats a single set containing object x.
    // resize the set if the id of x is larger than the current size().
    void makeSetWithResize(int x) {
      if(x >= size()) {
        resize(x+1) ;
        makeSet(x) ;
      } else
        makeSet(x) ;
    }
    // the try version of the "makeSetWithResize"
    void tryMakeSetWithResize(int x) {
      if(x < 0) {
        std::ostringstream oss ;
        oss << "DisjointSets::tryMakeSetWithResize(" << x
            << ") has a negative object id!" ;
        throw Loci::StringError(oss.str()) ;
      }
      if(x >= size())
        makeSetWithResize(x) ;
      else if(parent[x]==-1)
        makeSet(x) ;
    }
    // union the two sets represented by object x and y
    void link(int x, int y) {
      checkRange(x) ; checkInit(x) ;
      checkRange(y) ; checkInit(y) ;
      if(x==y) return ;
      if(rank[x]>rank[y])
        parent[y] = x ;
      else {
        parent[x] = y ;
        if(rank[x]==rank[y])
          rank[y]=rank[y]+1 ;
      }
    }
    // union the two sets containing x and y
    void unionSets(int x, int y) {
      link(findSet(x), findSet(y)) ;
    }
    // return the representative object of the set containing
    // the object x while also performs path compression
    int findSet(int x) {
      checkRange(x) ;
      return pathCompressWithRep(x,parent) ;
    }
    // return if two objects belong to the same set
    bool sameSet(int x, int y) {
      int xs = findSet(x) ;
      int ys = findSet(y) ;
      if(xs==-1 || ys==-1)
        return false ;
      return xs==ys ;
    }
    // return how many sets are in this data structure.
    std::size_t countSets() const {
      std::size_t c = 0 ;
      for(int v=0;v!=numObj;++v)
        if(parent[v]==v) // count distinct representative objects
          ++c ;
      return c ;
    }
    // this method produces the corresponding set index
    // that each object belongs to, i.e., in the array,
    // this method returns an index for each entry that
    // is the set that object is in. returns the total
    // number of sets available
    int setIndex(std::vector<int>& index) const {
      index = parent ;
      // first we need to compress the entire sets and
      // make every object point directly to their representative obj
      for(size_t v=0;v!=index.size();++v)
        pathCompressWithRep(v,index) ;
      std::vector<int> header ;
      // first we'll record the representative object in the header
      // array. and also make representative object in the index
      // array point to the individual set index.
      int setNum = 0 ;
      for(int v=0;v!=numObj;++v)
        if(index[v]==v) { // a representative object
          header.push_back(v) ;
          index[v] = setNum++ ;
        }
      // now make the non-representative objects point to their set
      for(int v=0;v!=numObj;++v) {
        // skip uninitialized object
        if(index[v] == -1) continue ;
        if(index[v]>=setNum || header[index[v]]!=v) // non-represenative
          index[v] = index[index[v]] ;              // objs
      }

      return setNum ;
    }
    // this method extracts individual sets.
    // sets are put in the provided vector<entitySet>,
    // each entry in the vector contains one set,
    // the total size of the vector is also the number of
    // individual sets.
    void extractSets(std::vector<entitySet>& allSets) const {
      std::vector<int> index ;
      int s = setIndex(index) ;
      // we are ready to extract the sets since at this time,
      // all the object's parent are pointed to their
      // respective set index in the index array
      allSets.resize(s) ;
      for(int v=0;v!=numObj;++v) {
        // skip uninitialized objects
        if(index[v] == -1) continue ;
        allSets[index[v]] += v ;
      }
    }
  private:
    void checkRange(int x) {
      if(x<0 || x>(numObj-1)) {
        std::ostringstream oss ;
        oss << "DisjointSets obj(" << x << ") out of bound!" ;
        throw Loci::StringError(oss.str()) ;
      }
    }
    void checkInit(int x) {
      if(parent[x] == -1) {
        std::ostringstream oss ;
        oss << "DisjointSets obj(" << x
            << ") hasn't been constructed yet, call makeSet("
            << x << ") first!" ;
        throw Loci::StringError(oss.str()) ;
      }
    }
    // this method compress the path of the passed in object in
    // the disjoint forests represented in the passed array,
    // this method also returns the representative object of the
    // set that contains the object x.
    int pathCompressWithRep(int x, std::vector<int>& forests) const {
      int old = x ;
      // first find the representative object of the set
      int rep = forests[x] ;

      if(rep==-1)
        return rep ;          // obj not initialized yet

      while(rep != x) {
        x = rep ;
        rep = forests[x] ;
      }
      // path compression, make every object on the path
      // points to the representative object
      x = forests[old] ;
      while(rep != x) {
        forests[old] = rep ;
        old = x ;
        x = forests[old] ;
      }
      return rep ;
    }
    // data fields
    std::vector<int> parent, rank ;
    int numObj ;
  } ;

  // new memory greedy scheduler
  class MemGreedyScheduler: public visitor {
  public:
    MemGreedyScheduler(fact_db& fd,
                       sched_db& sd,
                       const std::map<variable,variableSet>& s,
                       const std::map<variable,variableSet>& t,
                       const std::map<variable, double>& var_info) ;
    virtual ~MemGreedyScheduler() {}
    virtual void visit(loop_compiler& lc) ;
    virtual void visit(dag_compiler& dc) ;
    virtual void visit(conditional_compiler& cc) ;
    // void
    //print_info(std::ostream& s) {
     //  s << "Total topological orders: " << std::endl ;
//       for(std::map<int,int>::const_iterator
//             mi=num_orders.begin();mi!=num_orders.end();++mi)
//         s << "Graph[" << mi->first << "] = " << mi->second << std::endl ;
    // }
  private:
    // this method generates a topological sort of the passed in graph
    std::vector<digraph::vertexSet>
    schedule(const digraph& dag) ;
    // given a variable, return its cluster representative variable
    variable get_rep(const variable& v) ;
    variableSet get_rep(const variableSet& vs) ;
    // given a representative variable in the cluster,
    // get its alloc info
    double get_alloc_size(const variable& v) ;
    // given a rule, returns the effects the rule creates,
    // in the format of allocation each rule's target variable
    // will cause. a negative number means deallocation.
    // "c" is the set of variables that have been created so far
    // by rules in the dag.
    // "e" is the results, NOTE: e records the representative
    // variable for each variable cluster
    void rule_effects(const rule& r,const variableSet& c,
                      std::vector<std::pair<variable,double> >& e) ;
    // this method simplifies the passed digraph.
    digraph simplify_graph(const digraph& g) ;
   
    // records the total topological orders for each graph
    // std::map<int,int> num_orders ;
    fact_db& facts ;
    // recurrence variable information
    std::map<variable,variableSet> s2t ;
    std::map<variable,variableSet> t2s ;
    // this one records the recurrent variable information
    DisjointSets var_cluster ;
    // records the read-in variable alloc info
    std::map<variable,double> alloc_info ;
  } ;
  
  // graph prioritize function that tries
  // to maximize the cache benefit of chomping
  struct chompingPrio: public PrioGraph {
    void operator()(const digraph& gr,
                    std::map<int_type,int_type>& pmap) const ;
  } ;

}

#endif
