#ifndef __CPU_OZONE_LW_BACK_END_HH__
#define __CPU_OZONE_LW_BACK_END_HH__

#include <list>
#include <queue>
#include <set>
#include <string>

#include "arch/faults.hh"
#include "base/timebuf.hh"
#include "cpu/inst_seq.hh"
#include "cpu/ozone/rename_table.hh"
#include "cpu/ozone/thread_state.hh"
#include "mem/functional/functional.hh"
#include "mem/mem_interface.hh"
#include "mem/mem_req.hh"
#include "sim/eventq.hh"

class ExecContext;

template <class Impl>
class OzoneThreadState;

template <class Impl>
class LWBackEnd
{
  public:
    typedef OzoneThreadState<Impl> Thread;

    typedef typename Impl::Params Params;
    typedef typename Impl::DynInst DynInst;
    typedef typename Impl::DynInstPtr DynInstPtr;
    typedef typename Impl::FullCPU FullCPU;
    typedef typename Impl::FrontEnd FrontEnd;
    typedef typename Impl::FullCPU::CommStruct CommStruct;

    struct SizeStruct {
        int size;
    };

    typedef SizeStruct DispatchToIssue;
    typedef SizeStruct IssueToExec;
    typedef SizeStruct ExecToCommit;
    typedef SizeStruct Writeback;

    TimeBuffer<DispatchToIssue> d2i;
    typename TimeBuffer<DispatchToIssue>::wire instsToDispatch;
    TimeBuffer<IssueToExec> i2e;
    typename TimeBuffer<IssueToExec>::wire instsToExecute;
    TimeBuffer<ExecToCommit> e2c;
    TimeBuffer<Writeback> numInstsToWB;

    TimeBuffer<CommStruct> *comm;
    typename TimeBuffer<CommStruct>::wire toIEW;
    typename TimeBuffer<CommStruct>::wire fromCommit;

    class TrapEvent : public Event {
      private:
        LWBackEnd<Impl> *be;

      public:
        TrapEvent(LWBackEnd<Impl> *_be);

        void process();
        const char *description();
    };

    /** LdWriteback event for a load completion. */
    class LdWritebackEvent : public Event {
      private:
        /** Instruction that is writing back data to the register file. */
        DynInstPtr inst;
        /** Pointer to IEW stage. */
        LWBackEnd *be;

        bool dcacheMiss;

      public:
        /** Constructs a load writeback event. */
        LdWritebackEvent(DynInstPtr &_inst, LWBackEnd *be);

        /** Processes writeback event. */
        virtual void process();
        /** Returns the description of the writeback event. */
        virtual const char *description();

        void setDcacheMiss() { dcacheMiss = true; be->addDcacheMiss(inst); }
    };

    LWBackEnd(Params *params);

    std::string name() const;

    void regStats();

    void setCPU(FullCPU *cpu_ptr)
    { cpu = cpu_ptr; }

    void setFrontEnd(FrontEnd *front_end_ptr)
    { frontEnd = front_end_ptr; }

    void setXC(ExecContext *xc_ptr)
    { xc = xc_ptr; }

    void setThreadState(Thread *thread_ptr)
    { thread = thread_ptr; }

    void setCommBuffer(TimeBuffer<CommStruct> *_comm);

    void tick();
    void squash();
    void generateXCEvent() { xcSquash = true; }
    void squashFromXC();
    void squashFromTrap();
    void checkInterrupts();
    bool trapSquash;
    bool xcSquash;

    template <class T>
    Fault read(MemReqPtr &req, T &data, int load_idx);

    template <class T>
    Fault write(MemReqPtr &req, T &data, int store_idx);

    Addr readCommitPC() { return commitPC; }

    Addr commitPC;

    bool robEmpty() { return instList.empty(); }

    bool isFull() { return numInsts >= numROBEntries; }
    bool isBlocked() { return status == Blocked || dispatchStatus == Blocked; }

    void fetchFault(Fault &fault);

    int wakeDependents(DynInstPtr &inst);

    /** Tells memory dependence unit that a memory instruction needs to be
     * rescheduled. It will re-execute once replayMemInst() is called.
     */
    void rescheduleMemInst(DynInstPtr &inst);

    /** Re-executes all rescheduled memory instructions. */
    void replayMemInst(DynInstPtr &inst);

    /** Completes memory instruction. */
    void completeMemInst(DynInstPtr &inst) { }

    void addDcacheMiss(DynInstPtr &inst)
    {
        waitingMemOps.insert(inst->seqNum);
        numWaitingMemOps++;
        DPRINTF(BE, "Adding a Dcache miss mem op [sn:%lli], total %i\n",
                inst->seqNum, numWaitingMemOps);
    }

    void removeDcacheMiss(DynInstPtr &inst)
    {
        assert(waitingMemOps.find(inst->seqNum) != waitingMemOps.end());
        waitingMemOps.erase(inst->seqNum);
        numWaitingMemOps--;
        DPRINTF(BE, "Removing a Dcache miss mem op [sn:%lli], total %i\n",
                inst->seqNum, numWaitingMemOps);
    }

    void addWaitingMemOp(DynInstPtr &inst)
    {
        waitingMemOps.insert(inst->seqNum);
        numWaitingMemOps++;
        DPRINTF(BE, "Adding a waiting mem op [sn:%lli], total %i\n",
                inst->seqNum, numWaitingMemOps);
    }

    void removeWaitingMemOp(DynInstPtr &inst)
    {
        assert(waitingMemOps.find(inst->seqNum) != waitingMemOps.end());
        waitingMemOps.erase(inst->seqNum);
        numWaitingMemOps--;
        DPRINTF(BE, "Removing a waiting mem op [sn:%lli], total %i\n",
                inst->seqNum, numWaitingMemOps);
    }

    void instToCommit(DynInstPtr &inst);

  private:
    void generateTrapEvent(Tick latency = 0);
    void handleFault(Fault &fault, Tick latency = 0);
    void updateStructures();
    void dispatchInsts();
    void dispatchStall();
    void checkDispatchStatus();
    void executeInsts();
    void commitInsts();
    void addToLSQ(DynInstPtr &inst);
    void writebackInsts();
    bool commitInst(int inst_num);
    void squash(const InstSeqNum &sn);
    void squashDueToBranch(DynInstPtr &inst);
    void squashDueToMemViolation(DynInstPtr &inst);
    void squashDueToMemBlocked(DynInstPtr &inst);
    void updateExeInstStats(DynInstPtr &inst);
    void updateComInstStats(DynInstPtr &inst);

  public:
    FullCPU *cpu;

    FrontEnd *frontEnd;

    ExecContext *xc;

    Thread *thread;

    enum Status {
        Running,
        Idle,
        DcacheMissStall,
        DcacheMissComplete,
        Blocked,
        TrapPending
    };

    Status status;

    Status dispatchStatus;

    Status commitStatus;

    Counter funcExeInst;

  private:
//    typedef typename Impl::InstQueue InstQueue;

//    InstQueue IQ;

    typedef typename Impl::LdstQueue LdstQueue;

    LdstQueue LSQ;
  public:
    RenameTable<Impl> commitRenameTable;

    RenameTable<Impl> renameTable;
  private:
    class DCacheCompletionEvent : public Event
    {
      private:
        LWBackEnd *be;

      public:
        DCacheCompletionEvent(LWBackEnd *_be);

        virtual void process();
        virtual const char *description();
    };

    friend class DCacheCompletionEvent;

    DCacheCompletionEvent cacheCompletionEvent;

    MemInterface *dcacheInterface;

    MemReqPtr memReq;

    // General back end width. Used if the more specific isn't given.
    int width;

    // Dispatch width.
    int dispatchWidth;
    int numDispatchEntries;
    int dispatchSize;

    int waitingInsts;

    int issueWidth;

    // Writeback width
    int wbWidth;

    // Commit width
    int commitWidth;

    /** Index into queue of instructions being written back. */
    unsigned wbNumInst;

    /** Cycle number within the queue of instructions being written
     * back.  Used in case there are too many instructions writing
     * back at the current cycle and writesbacks need to be scheduled
     * for the future. See comments in instToCommit().
     */
    unsigned wbCycle;

    int numROBEntries;
    int numInsts;

    std::set<InstSeqNum> waitingMemOps;
    typedef std::set<InstSeqNum>::iterator MemIt;
    int numWaitingMemOps;
    unsigned maxOutstandingMemOps;

    bool squashPending;
    InstSeqNum squashSeqNum;
    Addr squashNextPC;

    Fault faultFromFetch;
    bool fetchHasFault;

  private:
    struct pqCompare {
        bool operator() (const DynInstPtr &lhs, const DynInstPtr &rhs) const
        {
            return lhs->seqNum > rhs->seqNum;
        }
    };

    typedef typename std::priority_queue<DynInstPtr, std::vector<DynInstPtr>, pqCompare> ReadyInstQueue;
    ReadyInstQueue exeList;

    typedef typename std::list<DynInstPtr>::iterator InstListIt;

    std::list<DynInstPtr> instList;
    std::list<DynInstPtr> waitingList;
    std::list<DynInstPtr> replayList;
    std::list<DynInstPtr> writeback;

    int latency;

    int squashLatency;

    bool exactFullStall;

    bool fetchRedirect[Impl::MaxThreads];

    // number of cycles stalled for D-cache misses
/*    Stats::Scalar<> dcacheStallCycles;
      Counter lastDcacheStall;
*/
    Stats::Vector<> rob_cap_events;
    Stats::Vector<> rob_cap_inst_count;
    Stats::Vector<> iq_cap_events;
    Stats::Vector<> iq_cap_inst_count;
    // total number of instructions executed
    Stats::Vector<> exe_inst;
    Stats::Vector<> exe_swp;
    Stats::Vector<> exe_nop;
    Stats::Vector<> exe_refs;
    Stats::Vector<> exe_loads;
    Stats::Vector<> exe_branches;

    Stats::Vector<> issued_ops;

    // total number of loads forwaded from LSQ stores
    Stats::Vector<> lsq_forw_loads;

    // total number of loads ignored due to invalid addresses
    Stats::Vector<> inv_addr_loads;

    // total number of software prefetches ignored due to invalid addresses
    Stats::Vector<> inv_addr_swpfs;
    // ready loads blocked due to memory disambiguation
    Stats::Vector<> lsq_blocked_loads;

    Stats::Scalar<> lsqInversion;

    Stats::Vector<> n_issued_dist;
    Stats::VectorDistribution<> issue_delay_dist;

    Stats::VectorDistribution<> queue_res_dist;
/*
    Stats::Vector<> stat_fu_busy;
    Stats::Vector2d<> stat_fuBusy;
    Stats::Vector<> dist_unissued;
    Stats::Vector2d<> stat_issued_inst_type;

    Stats::Formula misspec_cnt;
    Stats::Formula misspec_ipc;
    Stats::Formula issue_rate;
    Stats::Formula issue_stores;
    Stats::Formula issue_op_rate;
    Stats::Formula fu_busy_rate;
    Stats::Formula commit_stores;
    Stats::Formula commit_ipc;
    Stats::Formula commit_ipb;
    Stats::Formula lsq_inv_rate;
*/
    Stats::Vector<> writeback_count;
    Stats::Vector<> producer_inst;
    Stats::Vector<> consumer_inst;
    Stats::Vector<> wb_penalized;

    Stats::Formula wb_rate;
    Stats::Formula wb_fanout;
    Stats::Formula wb_penalized_rate;

    // total number of instructions committed
    Stats::Vector<> stat_com_inst;
    Stats::Vector<> stat_com_swp;
    Stats::Vector<> stat_com_refs;
    Stats::Vector<> stat_com_loads;
    Stats::Vector<> stat_com_membars;
    Stats::Vector<> stat_com_branches;

    Stats::Distribution<> n_committed_dist;

    Stats::Scalar<> commit_eligible_samples;
    Stats::Vector<> commit_eligible;

    Stats::Scalar<> ROB_fcount;
    Stats::Formula ROB_full_rate;

    Stats::Vector<>  ROB_count;	 // cumulative ROB occupancy
    Stats::Formula ROB_occ_rate;
    Stats::VectorDistribution<> ROB_occ_dist;
  public:
    void dumpInsts();
};

template <class Impl>
template <class T>
Fault
LWBackEnd<Impl>::read(MemReqPtr &req, T &data, int load_idx)
{
/*    memReq->reset(addr, sizeof(T), flags);

    // translate to physical address
    Fault fault = cpu->translateDataReadReq(memReq);

    // if we have a cache, do cache access too
    if (fault == NoFault && dcacheInterface) {
        memReq->cmd = Read;
        memReq->completionEvent = NULL;
        memReq->time = curTick;
        memReq->flags &= ~INST_READ;
        MemAccessResult result = dcacheInterface->access(memReq);

        // Ugly hack to get an event scheduled *only* if the access is
        // a miss.  We really should add first-class support for this
        // at some point.
        if (result != MA_HIT && dcacheInterface->doEvents()) {
            // Fix this hack for keeping funcExeInst correct with loads that
            // are executed twice.
            --funcExeInst;

            memReq->completionEvent = &cacheCompletionEvent;
            lastDcacheStall = curTick;
//	    unscheduleTickEvent();
//	    status = DcacheMissStall;
            DPRINTF(OzoneCPU, "Dcache miss stall!\n");
        } else {
            // do functional access
            fault = thread->mem->read(memReq, data);

        }
    }
*/
/*
    if (!dcacheInterface && (memReq->flags & UNCACHEABLE))
        recordEvent("Uncached Read");
*/
    return LSQ.read(req, data, load_idx);
}

template <class Impl>
template <class T>
Fault
LWBackEnd<Impl>::write(MemReqPtr &req, T &data, int store_idx)
{
/*
    memReq->reset(addr, sizeof(T), flags);

    // translate to physical address
    Fault fault = cpu->translateDataWriteReq(memReq);

    if (fault == NoFault && dcacheInterface) {
        memReq->cmd = Write;
        memcpy(memReq->data,(uint8_t *)&data,memReq->size);
        memReq->completionEvent = NULL;
        memReq->time = curTick;
        memReq->flags &= ~INST_READ;
        MemAccessResult result = dcacheInterface->access(memReq);

        // Ugly hack to get an event scheduled *only* if the access is
        // a miss.  We really should add first-class support for this
        // at some point.
        if (result != MA_HIT && dcacheInterface->doEvents()) {
            memReq->completionEvent = &cacheCompletionEvent;
            lastDcacheStall = curTick;
//	    unscheduleTickEvent();
//	    status = DcacheMissStall;
            DPRINTF(OzoneCPU, "Dcache miss stall!\n");
        }
    }

    if (res && (fault == NoFault))
        *res = memReq->result;
        */
/*
    if (!dcacheInterface && (memReq->flags & UNCACHEABLE))
        recordEvent("Uncached Write");
*/
    return LSQ.write(req, data, store_idx);
}

#endif // __CPU_OZONE_LW_BACK_END_HH__