From 32384b2f6be42016f7bd92c9cfca591314f68e5e Mon Sep 17 00:00:00 2001 From: Ali Saidi Date: Fri, 20 Oct 2006 13:00:05 -0400 Subject: [PATCH 01/12] still working on getting past initialization --HG-- extra : convert_revision : 7a5fccb9a19d363e479ef24012a7b8598272eaa9 --- src/dev/i8254xGBe.cc | 154 +++++++++++++++++++- src/dev/i8254xGBe.hh | 7 + src/dev/i8254xGBe_defs.hh | 235 +++++++++++++++++++++++++++++- src/python/m5/objects/Ethernet.py | 2 +- 4 files changed, 387 insertions(+), 11 deletions(-) diff --git a/src/dev/i8254xGBe.cc b/src/dev/i8254xGBe.cc index 77c731899..e95cc15c6 100644 --- a/src/dev/i8254xGBe.cc +++ b/src/dev/i8254xGBe.cc @@ -30,6 +30,9 @@ /* @file * Device model for Intel's 8254x line of gigabit ethernet controllers. + * In particular an 82547 revision 2 (82547GI) MAC because it seems to have the + * fewest workarounds in the driver. It will probably work with most of the + * other MACs with slight modifications. */ #include "base/inet.hh" @@ -39,10 +42,38 @@ #include "sim/stats.hh" #include "sim/system.hh" +using namespace iGbReg; + IGbE::IGbE(Params *p) : PciDev(p), etherInt(NULL) { + // Initialized internal registers per Intel documentation + regs.tctl.reg = 0; + regs.rctl.reg = 0; + regs.ctrl.reg = 0; + regs.ctrl.fd = 1; + regs.ctrl.lrst = 1; + regs.ctrl.speed = 2; + regs.ctrl.frcspd = 1; + regs.sts.reg = 0; + regs.eecd.reg = 0; + regs.eecd.fwe = 1; + regs.eecd.ee_type = 1; + regs.eerd.reg = 0; + regs.icd.reg = 0; + regs.imc.reg = 0; + regs.rctl.reg = 0; + regs.tctl.reg = 0; + regs.manc.reg = 0; + eeOpBits = 0; + eeAddrBits = 0; + eeDataBits = 0; + eeOpcode = 0; + + memset(&flash, 0, EEPROM_SIZE); + // Magic happy checksum value + flash[0] = 0xBABA; } @@ -74,15 +105,49 @@ IGbE::read(Packet *pkt) // Only Memory register BAR is allowed assert(bar == 0); - DPRINTF(Ethernet, "Accessed devie register %#X\n", daddr); + // Only 32bit accesses allowed + assert(pkt->getSize() == 4); + + DPRINTF(Ethernet, "Read device register %#X\n", daddr); pkt->allocate(); - /// /// Handle read of register here /// + switch (daddr) { + case CTRL: + pkt->set(regs.ctrl.reg); + break; + case STATUS: + pkt->set(regs.sts.reg); + break; + case EECD: + pkt->set(regs.eecd.reg); + break; + case EERD: + pkt->set(regs.eerd.reg); + break; + case ICR: + pkt->set(regs.icd.reg); + break; + case IMC: + pkt->set(regs.imc.reg); + break; + case RCTL: + pkt->set(regs.rctl.reg); + break; + case TCTL: + pkt->set(regs.tctl.reg); + break; + case MANC: + pkt->set(regs.manc.reg); + break; + default: + panic("Read request to unknown register number: %#x\n", daddr); + }; + pkt->result = Packet::Success; return pioDelay; } @@ -93,17 +158,100 @@ IGbE::write(Packet *pkt) int bar; Addr daddr; + if (!getBAR(pkt->getAddr(), bar, daddr)) panic("Invalid PCI memory access to unmapped memory.\n"); // Only Memory register BAR is allowed assert(bar == 0); - DPRINTF(Ethernet, "Accessed devie register %#X\n", daddr); + // Only 32bit accesses allowed + assert(pkt->getSize() == sizeof(uint32_t)); + + DPRINTF(Ethernet, "Wrote device register %#X value %#X\n", daddr, pkt->get()); /// /// Handle write of register here /// + uint32_t val = pkt->get(); + + switch (daddr) { + case CTRL: + regs.ctrl.reg = val; + break; + case STATUS: + regs.sts.reg = val; + break; + case EECD: + int oldClk; + oldClk = regs.eecd.sk; + regs.eecd.reg = val; + // See if this is a eeprom access and emulate accordingly + if (!oldClk && regs.eecd.sk) { + if (eeOpBits < 8) { + eeOpcode = eeOpcode << 1 | regs.eecd.din; + eeOpBits++; + } else if (eeAddrBits < 8 && eeOpcode == EEPROM_READ_OPCODE_SPI) { + eeAddr = eeAddr << 1 | regs.eecd.din; + eeAddrBits++; + } else if (eeDataBits < 16 && eeOpcode == EEPROM_READ_OPCODE_SPI) { + assert(eeAddr < EEPROM_SIZE); + DPRINTF(Ethernet, "EEPROM bit read: %d word: %#X\n", + flash[eeAddr] >> eeDataBits & 0x1, flash[eeAddr]); + regs.eecd.dout = (flash[eeAddr] >> eeDataBits) & 0x1; + eeDataBits++; + } else if (eeDataBits < 8 && eeOpcode == EEPROM_RDSR_OPCODE_SPI) { + regs.eecd.dout = 0; + eeDataBits++; + } else + panic("What's going on with eeprom interface? opcode:" + " %#x:%d addr: %#x:%d, data: %d\n", (uint32_t)eeOpcode, + (uint32_t)eeOpBits, (uint32_t)eeAddr, + (uint32_t)eeAddrBits, (uint32_t)eeDataBits); + + // Reset everything for the next command + if ((eeDataBits == 16 && eeOpcode == EEPROM_READ_OPCODE_SPI) || + (eeDataBits == 8 && eeOpcode == EEPROM_RDSR_OPCODE_SPI)) { + eeOpBits = 0; + eeAddrBits = 0; + eeDataBits = 0; + eeOpcode = 0; + eeAddr = 0; + } + + DPRINTF(Ethernet, "EEPROM: opcode: %#X:%d\n", + (uint32_t)eeOpcode, (uint32_t) eeOpBits); + if (eeOpBits == 8 && !(eeOpcode == EEPROM_READ_OPCODE_SPI || + eeOpcode == EEPROM_RDSR_OPCODE_SPI )) + panic("Unknown eeprom opcode: %#X:%d\n", (uint32_t)eeOpcode, + (uint32_t)eeOpBits); + + + } + // If driver requests eeprom access, immediately give it to it + regs.eecd.ee_gnt = regs.eecd.ee_req; + break; + case EERD: + regs.eerd.reg = val; + break; + case ICR: + regs.icd.reg = val; + break; + case IMC: + regs.imc.reg = val; + break; + case RCTL: + regs.rctl.reg = val; + break; + case TCTL: + regs.tctl.reg = val; + break; + case MANC: + regs.manc.reg = val; + break; + default: + panic("Write request to unknown register number: %#x\n", daddr); + }; pkt->result = Packet::Success; return pioDelay; diff --git a/src/dev/i8254xGBe.hh b/src/dev/i8254xGBe.hh index 88931eb6d..8e2dd2e9c 100644 --- a/src/dev/i8254xGBe.hh +++ b/src/dev/i8254xGBe.hh @@ -39,6 +39,7 @@ #include "base/statistics.hh" #include "dev/etherint.hh" #include "dev/etherpkt.hh" +#include "dev/i8254xGBe_defs.hh" #include "dev/pcidev.hh" #include "dev/pktfifo.hh" #include "sim/eventq.hh" @@ -49,6 +50,12 @@ class IGbE : public PciDev { private: IGbEInt *etherInt; + iGbReg::Regs regs; + int eeOpBits, eeAddrBits, eeDataBits; + uint8_t eeOpcode, eeAddr; + + uint16_t flash[iGbReg::EEPROM_SIZE]; + public: struct Params : public PciDev::Params diff --git a/src/dev/i8254xGBe_defs.hh b/src/dev/i8254xGBe_defs.hh index 81d7d0d80..ae0925356 100644 --- a/src/dev/i8254xGBe_defs.hh +++ b/src/dev/i8254xGBe_defs.hh @@ -34,17 +34,18 @@ namespace iGbReg { -const uint32_t CTRL = 0x00000; -const uint32_t STATUS = 0x00008; -const uint32_t EECD = 0x00010; +const uint32_t CTRL = 0x00000; //* +const uint32_t STATUS = 0x00008; //* +const uint32_t EECD = 0x00010; //* +const uint32_t EERD = 0x00014; //* const uint32_t CTRL_EXT = 0x00018; const uint32_t PBA = 0x01000; -const uint32_t ICR = 0x000C0; +const uint32_t ICR = 0x000C0; //* const uint32_t ITR = 0x000C4; const uint32_t ICS = 0x000C8; const uint32_t IMS = 0x000D0; -const uint32_t IMC = 0x000D8; -const uint32_t RCTL = 0x00100; +const uint32_t IMC = 0x000D8; //* +const uint32_t RCTL = 0x00100; //* const uint32_t RDBAL = 0x02800; const uint32_t RDBAH = 0x02804; const uint32_t RDLEN = 0x02808; @@ -53,7 +54,7 @@ const uint32_t RDT = 0x02818; const uint32_t RDTR = 0x02820; const uint32_t RADV = 0x0282C; const uint32_t RSRPD = 0x02C00; -const uint32_t TCTL = 0x00400; +const uint32_t TCTL = 0x00400; //* const uint32_t TDBAL = 0x03800; const uint32_t TDBAH = 0x03804; const uint32_t TDLEN = 0x03808; @@ -66,6 +67,11 @@ const uint32_t TADV = 0x0282C; const uint32_t TSPMT = 0x03830; const uint32_t RXDCTL = 0x02828; const uint32_t RXCSUM = 0x05000; +const uint32_t MANC = 0x05820;//* + +const uint8_t EEPROM_READ_OPCODE_SPI = 0x03; +const uint8_t EEPROM_RDSR_OPCODE_SPI = 0x05; +const uint8_t EEPROM_SIZE = 64; struct RxDesc { Addr buf; @@ -239,4 +245,219 @@ union TxDesc { } type; }; +struct Regs { + union { // 0x0000 CTRL Register + uint32_t reg; + struct { + uint8_t fd:1; // full duplex + uint8_t bem:1; // big endian mode + uint8_t pcipr:1; // PCI priority + uint8_t lrst:1; // link reset + uint8_t tme:1; // test mode enable + uint8_t asde:1; // Auto-speed detection + uint8_t slu:1; // Set link up + uint8_t ilos:1; // invert los-of-signal + uint8_t speed:2; // speed selection bits + uint8_t be32:1; // big endian mode 32 + uint8_t frcspd:1; // force speed + uint8_t frcdpx:1; // force duplex + uint8_t duden:1; // dock/undock enable + uint8_t dudpol:1; // dock/undock polarity + uint8_t fphyrst:1; // force phy reset + uint8_t extlen:1; // external link status enable + uint8_t rsvd:1; // reserved + uint8_t sdp0d:1; // software controlled pin data + uint8_t sdp1d:1; // software controlled pin data + uint8_t sdp2d:1; // software controlled pin data + uint8_t sdp3d:1; // software controlled pin data + uint8_t sdp0i:1; // software controlled pin dir + uint8_t sdp1i:1; // software controlled pin dir + uint8_t sdp2i:1; // software controlled pin dir + uint8_t sdp3i:1; // software controlled pin dir + uint8_t rst:1; // reset + uint8_t rfce:1; // receive flow control enable + uint8_t tfce:1; // transmit flow control enable + uint8_t rte:1; // routing tag enable + uint8_t vme:1; // vlan enable + uint8_t phyrst:1; // phy reset + } ; + } ctrl; + + union { // 0x0008 STATUS + uint32_t reg; + struct { + uint8_t fd:1; // full duplex + uint8_t lu:1; // link up + uint8_t func:2; // function id + uint8_t txoff:1; // transmission paused + uint8_t tbimode:1; // tbi mode + uint8_t speed:2; // link speed + uint8_t asdv:2; // auto speed detection value + uint8_t mtxckok:1; // mtx clock running ok + uint8_t pci66:1; // In 66Mhz pci slot + uint8_t bus64:1; // in 64 bit slot + uint8_t pcix:1; // Pci mode + uint8_t pcixspd:1; // pci x speed + uint8_t reserved; // reserved + } ; + } sts; + + union { // 0x0010 EECD + uint32_t reg; + struct { + uint8_t sk:1; // clack input to the eeprom + uint8_t cs:1; // chip select to eeprom + uint8_t din:1; // data input to eeprom + uint8_t dout:1; // data output bit + uint8_t fwe:2; // flash write enable + uint8_t ee_req:1; // request eeprom access + uint8_t ee_gnt:1; // grant eeprom access + uint8_t ee_pres:1; // eeprom present + uint8_t ee_size:1; // eeprom size + uint8_t ee_sz1:1; // eeprom size + uint8_t rsvd:2; // reserved + uint8_t ee_type:1; // type of eeprom + } ; + } eecd; + + union { // 0x0014 EERD + uint32_t reg; + struct { + uint8_t start:1; // start read + uint8_t done:1; // done read + uint16_t addr:14; // address + uint16_t data; // data + }; + } eerd; + + union { // 0x00C0 ICR + uint32_t reg; + struct { + uint8_t txdw:1; // tx descr witten back + uint8_t txqe:1; // tx queue empty + uint8_t lsc:1; // link status change + uint8_t rxseq:1; // rcv sequence error + uint8_t rxdmt0:1; // rcv descriptor min thresh + uint8_t rsvd1:1; // reserved + uint8_t rxo:1; // receive overrunn + uint8_t rxt0:1; // receiver timer interrupt + uint8_t rsvd2:1; // reserved + uint8_t mdac:1; // mdi/o access complete + uint8_t rxcfg:1; // recv /c/ ordered sets + uint8_t rsvd3:1; // reserved + uint8_t phyint:1; // phy interrupt + uint8_t gpi1:1; // gpi int 1 + uint8_t gpi2:1; // gpi int 2 + uint8_t txdlow:1; // transmit desc low thresh + uint8_t srpd:1; // small receive packet detected + uint16_t rsvd4:15; // reserved + } ; + } icd; + + union { // 0x00C0 IMC + uint32_t reg; + struct { + uint8_t txdw:1; // tx descr witten back + uint8_t txqe:1; // tx queue empty + uint8_t lsc:1; // link status change + uint8_t rxseq:1; // rcv sequence error + uint8_t rxdmt0:1; // rcv descriptor min thresh + uint8_t rsvd1:1; // reserved + uint8_t rxo:1; // receive overrunn + uint8_t rxt0:1; // receiver timer interrupt + uint8_t rsvd2:1; // reserved + uint8_t mdac:1; // mdi/o access complete + uint8_t rxcfg:1; // recv /c/ ordered sets + uint8_t rsvd3:1; // reserved + uint8_t phyint:1; // phy interrupt + uint8_t gpi1:1; // gpi int 1 + uint8_t gpi2:1; // gpi int 2 + uint8_t txdlow:1; // transmit desc low thresh + uint8_t srpd:1; // small receive packet detected + uint16_t rsvd4:15; // reserved + } ; + } imc; + + union { // 0x0100 RCTL + uint32_t reg; + struct { + uint8_t rst:1; // Reset + uint8_t en:1; // Enable + uint8_t sbp:1; // Store bad packets + uint8_t upe:1; // Unicast Promiscuous enabled + uint8_t mpe:1; // Multicast promiscuous enabled + uint8_t lpe:1; // long packet reception enabled + uint8_t lbm:2; // + uint8_t rdmts:2; // + uint8_t rsvd:2; // + uint8_t mo:2; // + uint8_t mdr:1; // + uint8_t bam:1; // + uint8_t bsize:2; // + uint8_t vpe:1; // + uint8_t cfien:1; // + uint8_t cfi:1; // + uint8_t rsvd2:1; // + uint8_t dpf:1; // discard pause frames + uint8_t pmcf:1; // pass mac control frames + uint8_t rsvd3:1; // reserved + uint8_t bsex:1; // buffer size extension + uint8_t secrc:1; // strip ethernet crc from incoming packet + uint8_t rsvd1:5; // reserved + } ; + } rctl; + + union { // 0x0400 TCTL + uint32_t reg; + struct { + uint8_t rst:1; // Reset + uint8_t en:1; // Enable + uint8_t bce:1; // busy check enable + uint8_t psp:1; // pad short packets + uint8_t ct:8; // collision threshold + uint16_t cold:10; // collision distance + uint8_t swxoff:1; // software xoff transmission + uint8_t pbe:1; // packet burst enable + uint8_t rtlc:1; // retransmit late collisions + uint8_t nrtu:1; // on underrun no TX + uint8_t mulr:1; // multiple request + uint8_t rsvd:5; // reserved + } ; + } tctl; + + union { // 0x5820 MANC + uint32_t reg; + struct { + uint8_t smbus:1; // SMBus enabled ##### + uint8_t asf:1; // ASF enabled ##### + uint8_t ronforce:1; // reset of force + uint8_t rsvd:5; // reserved + uint8_t rmcp1:1; // rcmp1 filtering + uint8_t rmcp2:1; // rcmp2 filtering + uint8_t ipv4:1; // enable ipv4 + uint8_t ipv6:1; // enable ipv6 + uint8_t snap:1; // accept snap + uint8_t arp:1; // filter arp ##### + uint8_t neighbor:1; // neighbor discovery + uint8_t arp_resp:1; // arp response + uint8_t tcorst:1; // tco reset happened + uint8_t rcvtco:1; // receive tco enabled ###### + uint8_t blkphyrst:1;// block phy resets ######## + uint8_t rcvall:1; // receive all + uint8_t macaddrfltr:1; // mac address filtering ###### + uint8_t mng2host:1; // mng2 host packets ####### + uint8_t ipaddrfltr:1; // ip address filtering + uint8_t xsumfilter:1; // checksum filtering + uint8_t brfilter:1; // broadcast filtering + uint8_t smbreq:1; // smb request + uint8_t smbgnt:1; // smb grant + uint8_t smbclkin:1; // smbclkin + uint8_t smbdatain:1; // smbdatain + uint8_t smbdataout:1; // smb data out + uint8_t smbclkout:1; // smb clock out + uint8_t rsvd2:2; + }; + } manc; +}; + }; // iGbReg namespace diff --git a/src/python/m5/objects/Ethernet.py b/src/python/m5/objects/Ethernet.py index f17a6c888..a52e35511 100644 --- a/src/python/m5/objects/Ethernet.py +++ b/src/python/m5/objects/Ethernet.py @@ -71,7 +71,7 @@ class IGbE(PciDevice): class IGbEPciData(PciConfigData): VendorID = 0x8086 - DeviceID = 0x1026 + DeviceID = 0x1075 SubsystemID = 0x1008 SubsystemVendorID = 0x8086 Status = 0x0000 From 199084b33923c9ea606ac50cbdc0cadb8ece01fd Mon Sep 17 00:00:00 2001 From: Steve Reinhardt Date: Sun, 22 Oct 2006 16:22:45 -0700 Subject: [PATCH 02/12] Add DPRINTF for non-timed quiesce. --HG-- extra : convert_revision : 5487f4fc07dbea6e5a651c104ea1d2fe864fb057 --- src/sim/pseudo_inst.cc | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/sim/pseudo_inst.cc b/src/sim/pseudo_inst.cc index 4eb0866a5..d913e159b 100644 --- a/src/sim/pseudo_inst.cc +++ b/src/sim/pseudo_inst.cc @@ -74,6 +74,8 @@ namespace AlphaPseudo if (!doQuiesce) return; + DPRINTF(Quiesce, "%s: quiesce()\n", tc->getCpuPtr()->name()); + tc->suspend(); if (tc->getKernelStats()) tc->getKernelStats()->quiesce(); From 1b21d9ba5eacaaac96b731ef1abd51de274995e5 Mon Sep 17 00:00:00 2001 From: Steve Reinhardt Date: Sun, 22 Oct 2006 20:38:34 -0700 Subject: [PATCH 03/12] s/pktuest/request/ (all in comments) --HG-- extra : convert_revision : 7ce779242a15245a20322c0b6c40d02c8ddd15ad --- src/mem/cache/cache_impl.hh | 2 +- src/mem/cache/miss/blocking_buffer.hh | 18 +++++++++--------- src/mem/cache/miss/miss_queue.hh | 22 +++++++++++----------- src/mem/cache/miss/mshr.hh | 20 ++++++++++---------- src/mem/cache/miss/mshr_queue.hh | 26 +++++++++++++------------- src/mem/cache/tags/split.hh | 8 ++++---- src/mem/cache/tags/split_blk.hh | 2 +- 7 files changed, 49 insertions(+), 49 deletions(-) diff --git a/src/mem/cache/cache_impl.hh b/src/mem/cache/cache_impl.hh index 3c47762f6..3b75884c9 100644 --- a/src/mem/cache/cache_impl.hh +++ b/src/mem/cache/cache_impl.hh @@ -596,7 +596,7 @@ Cache::probe(PacketPtr &pkt, bool update, } else if (!blk && !(pkt->flags & SATISFIED)) { // update the cache state and statistics if (mshr || !writes.empty()){ - // Can't handle it, return pktuest unsatisfied. + // Can't handle it, return request unsatisfied. panic("Atomic access ran into outstanding MSHR's or WB's!"); } if (!pkt->req->isUncacheable()) { diff --git a/src/mem/cache/miss/blocking_buffer.hh b/src/mem/cache/miss/blocking_buffer.hh index 4408cfc4f..934a843a6 100644 --- a/src/mem/cache/miss/blocking_buffer.hh +++ b/src/mem/cache/miss/blocking_buffer.hh @@ -128,8 +128,8 @@ public: } /** - * Selects a outstanding pktuest to service. - * @return The pktuest to service, NULL if none found. + * Selects a outstanding request to service. + * @return The request to service, NULL if none found. */ PacketPtr getPacket(); @@ -147,7 +147,7 @@ public: void restoreOrigCmd(PacketPtr &pkt); /** - * Marks a pktuest as in service (sent on the bus). This can have side + * Marks a request as in service (sent on the bus). This can have side * effect since storage for no response commands is deallocated once they * are successfully sent. * @param pkt The request that was sent on the bus. @@ -155,14 +155,14 @@ public: void markInService(PacketPtr &pkt, MSHR* mshr); /** - * Frees the resources of the pktuest and unblock the cache. + * Frees the resources of the request and unblock the cache. * @param pkt The request that has been satisfied. - * @param time The time when the pktuest is satisfied. + * @param time The time when the request is satisfied. */ void handleResponse(PacketPtr &pkt, Tick time); /** - * Removes all outstanding pktuests for a given thread number. If a request + * Removes all outstanding requests for a given thread number. If a request * has been sent to the bus, this function removes all of its targets. * @param threadNum The thread number of the requests to squash. */ @@ -220,14 +220,14 @@ public: int size, uint8_t *data, bool compressed); /** - * Perform a writeback pktuest. + * Perform a writeback request. * @param pkt The writeback request. */ void doWriteback(PacketPtr &pkt); /** - * Returns true if there are outstanding pktuests. - * @return True if there are outstanding pktuests. + * Returns true if there are outstanding requests. + * @return True if there are outstanding requests. */ bool havePending() { diff --git a/src/mem/cache/miss/miss_queue.hh b/src/mem/cache/miss/miss_queue.hh index 2e04802fb..b67a896f4 100644 --- a/src/mem/cache/miss/miss_queue.hh +++ b/src/mem/cache/miss/miss_queue.hh @@ -77,7 +77,7 @@ class MissQueue /** The block size of the parent cache. */ int blkSize; - /** Increasing order number assigned to each incoming pktuest. */ + /** Increasing order number assigned to each incoming request. */ uint64_t order; bool prefetchMiss; @@ -212,7 +212,7 @@ class MissQueue void setPrefetcher(BasePrefetcher *_prefetcher); /** - * Handle a cache miss properly. Either allocate an MSHR for the pktuest, + * Handle a cache miss properly. Either allocate an MSHR for the request, * or forward it through the write buffer. * @param pkt The request that missed in the cache. * @param blk_size The block size of the cache. @@ -232,8 +232,8 @@ class MissQueue PacketPtr &target); /** - * Selects a outstanding pktuest to service. - * @return The pktuest to service, NULL if none found. + * Selects a outstanding request to service. + * @return The request to service, NULL if none found. */ PacketPtr getPacket(); @@ -251,7 +251,7 @@ class MissQueue void restoreOrigCmd(PacketPtr &pkt); /** - * Marks a pktuest as in service (sent on the bus). This can have side + * Marks a request as in service (sent on the bus). This can have side * effect since storage for no response commands is deallocated once they * are successfully sent. * @param pkt The request that was sent on the bus. @@ -259,14 +259,14 @@ class MissQueue void markInService(PacketPtr &pkt, MSHR* mshr); /** - * Collect statistics and free resources of a satisfied pktuest. + * Collect statistics and free resources of a satisfied request. * @param pkt The request that has been satisfied. - * @param time The time when the pktuest is satisfied. + * @param time The time when the request is satisfied. */ void handleResponse(PacketPtr &pkt, Tick time); /** - * Removes all outstanding pktuests for a given thread number. If a request + * Removes all outstanding requests for a given thread number. If a request * has been sent to the bus, this function removes all of its targets. * @param threadNum The thread number of the requests to squash. */ @@ -313,14 +313,14 @@ class MissQueue int size, uint8_t *data, bool compressed); /** - * Perform the given writeback pktuest. + * Perform the given writeback request. * @param pkt The writeback request. */ void doWriteback(PacketPtr &pkt); /** - * Returns true if there are outstanding pktuests. - * @return True if there are outstanding pktuests. + * Returns true if there are outstanding requests. + * @return True if there are outstanding requests. */ bool havePending(); diff --git a/src/mem/cache/miss/mshr.hh b/src/mem/cache/miss/mshr.hh index d92aa8a85..281ea9d49 100644 --- a/src/mem/cache/miss/mshr.hh +++ b/src/mem/cache/miss/mshr.hh @@ -44,7 +44,7 @@ class MSHR; /** * Miss Status and handling Register. This class keeps all the information - * needed to handle a cache miss including a list of target pktuests. + * needed to handle a cache miss including a list of target requests. */ class MSHR { public: @@ -63,15 +63,15 @@ class MSHR { Addr addr; /** Adress space id of the miss. */ short asid; - /** True if the pktuest has been sent to the bus. */ + /** True if the request has been sent to the bus. */ bool inService; /** Thread number of the miss. */ int threadNum; - /** The pktuest that is forwarded to the next level of the hierarchy. */ + /** The request that is forwarded to the next level of the hierarchy. */ PacketPtr pkt; /** The number of currently allocated targets. */ short ntargets; - /** The original pktuesting command. */ + /** The original requesting command. */ Packet::Command originalCmd; /** Order number of assigned by the miss queue. */ uint64_t order; @@ -88,24 +88,24 @@ class MSHR { Iterator allocIter; private: - /** List of all pktuests that match the address */ + /** List of all requests that match the address */ TargetList targets; public: /** * Allocate a miss to this MSHR. - * @param cmd The pktuesting command. + * @param cmd The requesting command. * @param addr The address of the miss. * @param asid The address space id of the miss. - * @param size The number of bytes to pktuest. + * @param size The number of bytes to request. * @param pkt The original miss. */ void allocate(Packet::Command cmd, Addr addr, int size, PacketPtr &pkt); /** - * Allocate this MSHR as a buffer for the given pktuest. - * @param target The memory pktuest to buffer. + * Allocate this MSHR as a buffer for the given request. + * @param target The memory request to buffer. */ void allocateAsBuffer(PacketPtr &target); @@ -115,7 +115,7 @@ public: void deallocate(); /** - * Add a pktuest to the list of targets. + * Add a request to the list of targets. * @param target The target. */ void allocateTarget(PacketPtr &target); diff --git a/src/mem/cache/miss/mshr_queue.hh b/src/mem/cache/miss/mshr_queue.hh index 30397d9a0..ec2ddae8a 100644 --- a/src/mem/cache/miss/mshr_queue.hh +++ b/src/mem/cache/miss/mshr_queue.hh @@ -39,7 +39,7 @@ #include "mem/cache/miss/mshr.hh" /** - * A Class for maintaining a list of pending and allocated memory pktuests. + * A Class for maintaining a list of pending and allocated memory requests. */ class MSHRQueue { private: @@ -55,7 +55,7 @@ class MSHRQueue { // Parameters /** * The total number of MSHRs in this queue. This number is set as the - * number of MSHRs pktuested plus (numReserve - 1). This allows for + * number of MSHRs requested plus (numReserve - 1). This allows for * the same number of effective MSHRs while still maintaining the reserve. */ const int numMSHRs; @@ -103,14 +103,14 @@ class MSHRQueue { bool findMatches(Addr addr, std::vector& matches) const; /** - * Find any pending pktuests that overlap the given request. + * Find any pending requests that overlap the given request. * @param pkt The request to find. * @return A pointer to the earliest matching MSHR. */ MSHR* findPending(PacketPtr &pkt) const; /** - * Allocates a new MSHR for the pktuest and size. This places the request + * Allocates a new MSHR for the request and size. This places the request * as the first target in the MSHR. * @param pkt The request to handle. * @param size The number in bytes to fetch from memory. @@ -121,12 +121,12 @@ class MSHRQueue { MSHR* allocate(PacketPtr &pkt, int size = 0); /** - * Allocate a read pktuest for the given address, and places the given + * Allocate a read request for the given address, and places the given * target on the target list. * @param addr The address to fetch. * @param asid The address space for the fetch. - * @param size The number of bytes to pktuest. - * @param target The first target for the pktuest. + * @param size The number of bytes to request. + * @param target The first target for the request. * @return Pointer to the new MSHR. */ MSHR* allocateFetch(Addr addr, int size, PacketPtr &target); @@ -135,7 +135,7 @@ class MSHRQueue { * Allocate a target list for the given address. * @param addr The address to fetch. * @param asid The address space for the fetch. - * @param size The number of bytes to pktuest. + * @param size The number of bytes to request. * @return Pointer to the new MSHR. */ MSHR* allocateTargetList(Addr addr, int size); @@ -181,14 +181,14 @@ class MSHRQueue { void markInService(MSHR* mshr); /** - * Mark an in service mshr as pending, used to resend a pktuest. + * Mark an in service mshr as pending, used to resend a request. * @param mshr The MSHR to resend. * @param cmd The command to resend. */ void markPending(MSHR* mshr, Packet::Command cmd); /** - * Squash outstanding pktuests with the given thread number. If a request + * Squash outstanding requests with the given thread number. If a request * is in service, just squashes the targets. * @param threadNum The thread to squash. */ @@ -196,7 +196,7 @@ class MSHRQueue { /** * Returns true if the pending list is not empty. - * @return True if there are outstanding pktuests. + * @return True if there are outstanding requests. */ bool havePending() const { @@ -213,8 +213,8 @@ class MSHRQueue { } /** - * Returns the pktuest at the head of the pendingList. - * @return The next pktuest to service. + * Returns the request at the head of the pendingList. + * @return The next request to service. */ PacketPtr getReq() const { diff --git a/src/mem/cache/tags/split.hh b/src/mem/cache/tags/split.hh index 748f6fb25..898d3c7a0 100644 --- a/src/mem/cache/tags/split.hh +++ b/src/mem/cache/tags/split.hh @@ -71,13 +71,13 @@ class Split : public BaseTags Addr blkMask; - /** Number of NIC pktuests that hit in the NIC partition */ + /** Number of NIC requests that hit in the NIC partition */ Stats::Scalar<> NR_NP_hits; - /** Number of NIC pktuests that hit in the CPU partition */ + /** Number of NIC requests that hit in the CPU partition */ Stats::Scalar<> NR_CP_hits; - /** Number of CPU pktuests that hit in the NIC partition */ + /** Number of CPU requests that hit in the NIC partition */ Stats::Scalar<> CR_NP_hits; - /** Number of CPU pktuests that hit in the CPU partition */ + /** Number of CPU requests that hit in the CPU partition */ Stats::Scalar<> CR_CP_hits; /** The number of nic replacements (i.e. misses) */ Stats::Scalar<> nic_repl; diff --git a/src/mem/cache/tags/split_blk.hh b/src/mem/cache/tags/split_blk.hh index 64d903579..f38516180 100644 --- a/src/mem/cache/tags/split_blk.hh +++ b/src/mem/cache/tags/split_blk.hh @@ -47,7 +47,7 @@ class SplitBlk : public CacheBlk { bool isTouched; /** Has this block been used after being brought in? (for LIFO partition) */ bool isUsed; - /** is this blk a NIC block? (i.e. pktuested by the NIC) */ + /** is this blk a NIC block? (i.e. requested by the NIC) */ bool isNIC; /** timestamp of the arrival of this block into the cache */ Tick ts; From e321a21e27b5957395679e49c8d5d0783b852fad Mon Sep 17 00:00:00 2001 From: Steve Reinhardt Date: Sun, 22 Oct 2006 21:07:38 -0700 Subject: [PATCH 04/12] Clean up cache DPRINTFs --HG-- extra : convert_revision : f836e77efd40e25259d7794dd148696586b79a09 --- src/mem/cache/base_cache.cc | 4 +-- src/mem/cache/cache_impl.hh | 60 ++++++++++++++----------------------- 2 files changed, 24 insertions(+), 40 deletions(-) diff --git a/src/mem/cache/base_cache.cc b/src/mem/cache/base_cache.cc index 0694aae6e..599958222 100644 --- a/src/mem/cache/base_cache.cc +++ b/src/mem/cache/base_cache.cc @@ -79,9 +79,7 @@ BaseCache::CachePort::recvTiming(PacketPtr pkt) && !pkt->isRead() && !pkt->isWrite()) { //Upgrade or Invalidate //Look into what happens if two slave caches on bus - DPRINTF(Cache, "%s %x ? blk_addr: %x\n", pkt->cmdString(), - pkt->getAddr() & (((ULL(1))<<48)-1), - pkt->getAddr() & ~((Addr)cache->blkSize - 1)); + DPRINTF(Cache, "%s %x ?\n", pkt->cmdString(), pkt->getAddr()); assert(!(pkt->flags & SATISFIED)); pkt->flags |= SATISFIED; diff --git a/src/mem/cache/cache_impl.hh b/src/mem/cache/cache_impl.hh index 3b75884c9..9bb72e85c 100644 --- a/src/mem/cache/cache_impl.hh +++ b/src/mem/cache/cache_impl.hh @@ -205,9 +205,10 @@ Cache::access(PacketPtr &pkt) missQueue->doWriteback(writebacks.front()); writebacks.pop_front(); } - DPRINTF(Cache, "%s %x %s blk_addr: %x\n", pkt->cmdString(), - pkt->getAddr() & (((ULL(1))<<48)-1), (blk) ? "hit" : "miss", - pkt->getAddr() & ~((Addr)blkSize - 1)); + + DPRINTF(Cache, "%s %x %s\n", pkt->cmdString(), pkt->getAddr(), + (blk) ? "hit" : "miss"); + if (blk) { // Hit hits[pkt->cmdToIndex()][0/*pkt->req->getThreadNum()*/]++; @@ -288,10 +289,8 @@ Cache::sendResult(PacketPtr &pkt, MSHR* mshr, CacheBlk::State old_state = (blk) ? blk->status : 0; CacheBlk::State new_state = coherence->getNewState(pkt,old_state); if (old_state != new_state) - DPRINTF(Cache, "Block for blk addr %x moving from " - "state %i to %i\n", - pkt->getAddr() & (((ULL(1))<<48)-1), - old_state, new_state); + DPRINTF(Cache, "Block for blk addr %x moving from state " + "%i to %i\n", pkt->getAddr(), old_state, new_state); //Set the state on the upgrade memcpy(pkt->getPtr(), blk->data, blkSize); PacketList writebacks; @@ -331,8 +330,7 @@ Cache::handleResponse(PacketPtr &pkt) //Make the response a Bad address and send it } // MemDebug::cacheResponse(pkt); - DPRINTF(Cache, "Handling reponse to %x, blk addr: %x\n",pkt->getAddr(), - pkt->getAddr() & (((ULL(1))<<48)-1)); + DPRINTF(Cache, "Handling reponse to %x\n", pkt->getAddr()); if (pkt->isCacheFill() && !pkt->isNoAllocate()) { blk = tags->findBlock(pkt); @@ -342,7 +340,7 @@ Cache::handleResponse(PacketPtr &pkt) if (old_state != new_state) DPRINTF(Cache, "Block for blk addr %x moving from " "state %i to %i\n", - pkt->getAddr() & (((ULL(1))<<48)-1), + pkt->getAddr(), old_state, new_state); blk = tags->handleFill(blk, (MSHR*)pkt->senderState, new_state, writebacks, pkt); @@ -427,8 +425,8 @@ Cache::snoop(PacketPtr &pkt) //Append the invalidate on missQueue->addTarget(mshr,invalidatePkt); - DPRINTF(Cache, "Appending Invalidate to blk_addr: %x\n", - pkt->getAddr() & (((ULL(1))<<48)-1)); + DPRINTF(Cache, "Appending Invalidate to addr: %x\n", + pkt->getAddr()); return; } } @@ -436,8 +434,8 @@ Cache::snoop(PacketPtr &pkt) //We also need to check the writeback buffers and handle those std::vector writebacks; if (missQueue->findWrites(blk_addr, writebacks)) { - DPRINTF(Cache, "Snoop hit in writeback to blk_addr: %x\n", - pkt->getAddr() & (((ULL(1))<<48)-1)); + DPRINTF(Cache, "Snoop hit in writeback to addr: %x\n", + pkt->getAddr()); //Look through writebacks for any non-uncachable writes, use that for (int i=0; i::probe(PacketPtr &pkt, bool update, { // MemDebug::cacheProbe(pkt); if (!pkt->req->isUncacheable()) { - if (pkt->isInvalidate() && !pkt->isRead() - && !pkt->isWrite()) { + if (pkt->isInvalidate() && !pkt->isRead() && !pkt->isWrite()) { //Upgrade or Invalidate, satisfy it, don't forward - DPRINTF(Cache, "%s %x ? blk_addr: %x\n", pkt->cmdString(), - pkt->getAddr() & (((ULL(1))<<48)-1), - pkt->getAddr() & ~((Addr)blkSize - 1)); + DPRINTF(Cache, "%s %x ?\n", pkt->cmdString(), pkt->getAddr()); pkt->flags |= SATISFIED; return 0; } @@ -550,9 +545,8 @@ Cache::probe(PacketPtr &pkt, bool update, int lat; BlkType *blk = tags->handleAccess(pkt, lat, writebacks, update); - DPRINTF(Cache, "%s %x %s blk_addr: %x\n", pkt->cmdString(), - pkt->getAddr() & (((ULL(1))<<48)-1), (blk) ? "hit" : "miss", - pkt->getAddr() & ~((Addr)blkSize - 1)); + DPRINTF(Cache, "%s %x %s\n", pkt->cmdString(), + pkt->getAddr(), (blk) ? "hit" : "miss"); // Need to check for outstanding misses and writes @@ -611,10 +605,8 @@ Cache::probe(PacketPtr &pkt, bool update, busPkt->time = curTick; - DPRINTF(Cache, "Sending a atomic %s for %x blk_addr: %x\n", - busPkt->cmdString(), - busPkt->getAddr() & (((ULL(1))<<48)-1), - busPkt->getAddr() & ~((Addr)blkSize - 1)); + DPRINTF(Cache, "Sending a atomic %s for %x\n", + busPkt->cmdString(), busPkt->getAddr()); lat = memSidePort->sendAtomic(busPkt); @@ -633,19 +625,13 @@ return 0; CacheBlk::State old_state = (blk) ? blk->status : 0; CacheBlk::State new_state = coherence->getNewState(busPkt, old_state); - DPRINTF(Cache, - "Receive response:%s for blk addr %x in state %i\n", - busPkt->cmdString(), - busPkt->getAddr() & (((ULL(1))<<48)-1), old_state); + DPRINTF(Cache, "Receive response: %s for addr %x in state %i\n", + busPkt->cmdString(), busPkt->getAddr(), old_state); if (old_state != new_state) - DPRINTF(Cache, "Block for blk addr %x moving from " - "state %i to %i\n", - busPkt->getAddr() & (((ULL(1))<<48)-1), - old_state, new_state); + DPRINTF(Cache, "Block for blk addr %x moving from state " + "%i to %i\n", busPkt->getAddr(), old_state, new_state); - tags->handleFill(blk, busPkt, - new_state, - writebacks, pkt); + tags->handleFill(blk, busPkt, new_state, writebacks, pkt); //Free the packet delete busPkt; From 40a04f2f4022a970b343ea7bf997febbb83acee5 Mon Sep 17 00:00:00 2001 From: Lisa Hsu Date: Mon, 23 Oct 2006 18:07:51 -0400 Subject: [PATCH 05/12] changes regarding fs.py 1) rearrange the options to be in a nice logical order 2) add an option for what i call "standard switch", which is from simple->timing->detailed 3) change the client/server naming system to testsys/drivesys 4) make checkpointing code such that checkpoints taken from the command line override checkpoint instructions compiled into binaries. 5) add an option for maximum number of checkpoints - simulation will stop at max or maxtick, whichever is first doesn't fully work because of a caching issue, but the python side of things i think should work - the counterpart of se.py does work. i think i should factor out a lot of the common code in both, but i'll do that after this checkin, just to get this in the tree. configs/example/fs.py: 1) rearrange the options to be in a nice logical order 2) add an option for what i call "standard switch", which is from simple->timing->detailed 3) change the client/server naming system to testsys/drivesys 4) make checkpointing code such that checkpoints taken from the command line override checkpoint instructions compiled into binaries. 5) add an option for maximum number of checkpoints - simulation will stop at max or maxtick, whichever is first --HG-- extra : convert_revision : 078e22800ff83f6e950bf5cc6fb16a98320e7c51 --- configs/example/fs.py | 198 +++++++++++++++++++++++++++++++----------- 1 file changed, 149 insertions(+), 49 deletions(-) diff --git a/configs/example/fs.py b/configs/example/fs.py index f0e32e97f..3573c47ac 100644 --- a/configs/example/fs.py +++ b/configs/example/fs.py @@ -40,26 +40,49 @@ if not m5.build_env['FULL_SYSTEM']: parser = optparse.OptionParser() -parser.add_option("-d", "--detailed", action="store_true") -parser.add_option("-t", "--timing", action="store_true") -parser.add_option("-n", "--num_cpus", type="int", default=1) -parser.add_option("--caches", action="store_true") -parser.add_option("-m", "--maxtick", type="int") -parser.add_option("--maxtime", type="float") +# Benchmark options parser.add_option("--dual", action="store_true", help="Simulate two systems attached with an ethernet link") parser.add_option("-b", "--benchmark", action="store", type="string", dest="benchmark", help="Specify the benchmark to run. Available benchmarks: %s"\ % DefinedBenchmarks) + +# system options +parser.add_option("-d", "--detailed", action="store_true") +parser.add_option("-t", "--timing", action="store_true") +parser.add_option("-n", "--num_cpus", type="int", default=1) +parser.add_option("--caches", action="store_true") + +# Run duration options +parser.add_option("-m", "--maxtick", type="int") +parser.add_option("--maxtime", type="float") + +# Metafile options parser.add_option("--etherdump", action="store", type="string", dest="etherdump", help="Specify the filename to dump a pcap capture of the" \ "ethernet traffic") + +# Checkpointing options +###Note that performing checkpointing via python script files will override +###checkpoint instructions built into binaries. +parser.add_option("--take_checkpoints", action="store", type="string", + help=" will take checkpoint at cycle M and every N cycles \ + thereafter") +parser.add_option("--max_checkpoints", action="store", type="int", + help="the maximum number of checkpoints to drop", + default=5) parser.add_option("--checkpoint_dir", action="store", type="string", help="Place all checkpoints in this absolute directory") -parser.add_option("-c", "--checkpoint", action="store", type="int", +parser.add_option("-r", "--checkpoint_restore", action="store", type="int", help="restore from checkpoint ") +# CPU Switching - default switch model goes from a checkpoint +# to a timing simple CPU with caches to warm up, then to detailed CPU for +# data measurement +parser.add_option("-s", "--standard_switch", action="store_true", + help="switch from one cpu mode to another") + (options, args) = parser.parse_args() if args: @@ -73,23 +96,24 @@ class MyCache(BaseCache): mshrs = 10 tgts_per_mshr = 5 -# client system CPU is always simple... note this is an assignment of +# driver system CPU is always simple... note this is an assignment of # a class, not an instance. -ClientCPUClass = AtomicSimpleCPU -client_mem_mode = 'atomic' +DriveCPUClass = AtomicSimpleCPU +drive_mem_mode = 'atomic' +# system under test can be any of these CPUs if options.detailed: - ServerCPUClass = DerivO3CPU - server_mem_mode = 'timing' + TestCPUClass = DerivO3CPU + test_mem_mode = 'timing' elif options.timing: - ServerCPUClass = TimingSimpleCPU - server_mem_mode = 'timing' + TestCPUClass = TimingSimpleCPU + test_mem_mode = 'timing' else: - ServerCPUClass = AtomicSimpleCPU - server_mem_mode = 'atomic' + TestCPUClass = AtomicSimpleCPU + test_mem_mode = 'atomic' -ServerCPUClass.clock = '2GHz' -ClientCPUClass.clock = '2GHz' +TestCPUClass.clock = '2GHz' +DriveCPUClass.clock = '2GHz' if options.benchmark: try: @@ -104,38 +128,59 @@ else: else: bm = [SysConfig()] -server_sys = makeLinuxAlphaSystem(server_mem_mode, bm[0]) +test_sys = makeLinuxAlphaSystem(test_mem_mode, bm[0]) np = options.num_cpus -server_sys.cpu = [ServerCPUClass(cpu_id=i) for i in xrange(np)] +test_sys.cpu = [TestCPUClass(cpu_id=i) for i in xrange(np)] for i in xrange(np): - if options.caches: - server_sys.cpu[i].addPrivateSplitL1Caches(MyCache(size = '32kB'), + if options.caches and not options.standard_switch: + test_sys.cpu[i].addPrivateSplitL1Caches(MyCache(size = '32kB'), MyCache(size = '64kB')) - server_sys.cpu[i].connectMemPorts(server_sys.membus) - server_sys.cpu[i].mem = server_sys.physmem + test_sys.cpu[i].connectMemPorts(test_sys.membus) + test_sys.cpu[i].mem = test_sys.physmem if len(bm) == 2: - client_sys = makeLinuxAlphaSystem(client_mem_mode, bm[1]) - client_sys.cpu = ClientCPUClass(cpu_id=0) - client_sys.cpu.connectMemPorts(client_sys.membus) - client_sys.cpu.mem = client_sys.physmem - root = makeDualRoot(server_sys, client_sys, options.etherdump) + drive_sys = makeLinuxAlphaSystem(drive_mem_mode, bm[1]) + drive_sys.cpu = DriveCPUClass(cpu_id=0) + drive_sys.cpu.connectMemPorts(drive_sys.membus) + drive_sys.cpu.mem = drive_sys.physmem + root = makeDualRoot(test_sys, drive_sys, options.etherdump) elif len(bm) == 1: - root = Root(clock = '1THz', system = server_sys) + root = Root(clock = '1THz', system = test_sys) else: print "Error I don't know how to create more than 2 systems." sys.exit(1) +if options.standard_switch: + switch_cpus = [TimingSimpleCPU(defer_registration=True, cpu_id=(np+i) for i in xrange(np))] + switch_cpus1 = [DerivO3CPU(defer_registration=True, cpu_id=(2*np+i) for i in xrange(np))] + for i in xrange(np): + switch_cpus[i].system = test_sys + switch_cpus1[i].system = test_sys + switch_cpus[i].clock = TestCPUClass.clock + switch_cpus1[i].clock = TestCPUClass.clock + if options.caches: + switch_cpus[i].addPrivateSplitL1Caches(MyCache(size = '32kB'), + MyCache(size = '64kB')) + + switch_cpus[i].mem = test_sys.physmem + switch_cpus1[i].mem = test_sys.physmem + switch_cpus[i].connectMemPorts(test_sys.membus) + root.switch_cpus = switch_cpus + root.switch_cpus1 = switch_cpus1 + switch_cpu_list = [(test_sys.cpu[i], switch_cpus[i]) for i in xrange(np)] + switch_cpu_list1 = [(switch_cpus[i], switch_cpus1[i]) for i in xrange(np)] + m5.instantiate(root) -if options.checkpoint: +if options.checkpoint_dir: + cptdir = options.checkpoint_dir +else: + cptdir = getcwd() + +if options.checkpoint_restore: from os.path import isdir from os import listdir, getcwd import re - if options.checkpoint_dir: - cptdir = options.checkpoint_dir - else: - cptdir = getcwd() if not isdir(cptdir): m5.panic("checkpoint dir %s does not exist!" % cptdir) @@ -148,10 +193,26 @@ if options.checkpoint: if match: cpts.append(match.group(1)) - if options.checkpoint > len(cpts): - m5.panic('Checkpoint %d not found' % options.checkpoint) + cpts.sort(lambda a,b: cmp(long(a), long(b))) - m5.restoreCheckpoint(root, "/".join([cptdir, "cpt.%s" % cpts[options.checkpoint - 1]])) + if options.checkpoint_restore > len(cpts): + m5.panic('Checkpoint %d not found' % options.checkpoint_restore) + + m5.restoreCheckpoint(root, "/".join([cptdir, "cpt.%s" % cpts[options.checkpoint_restore - 1]])) + +if options.standard_switch: + exit_event = m5.simulate(1000) + ## when you change to Timing (or Atomic), you halt the system given + ## as argument. When you are finished with the system changes + ## (including switchCpus), you must resume the system manually. + ## You DON'T need to resume after just switching CPUs if you haven't + ## changed anything on the system level. + m5.changeToTiming(test_sys) + m5.switchCpus(switch_cpu_list) + m5.resume(test_sys) + + exit_event = m5.simulate(300000000000) + m5.switchCpus(switch_cpu_list1) if options.maxtick: maxtick = options.maxtick @@ -162,17 +223,56 @@ elif options.maxtime: else: maxtick = -1 -exit_event = m5.simulate(maxtick) +num_checkpoints = 0 -while exit_event.getCause() == "checkpoint": - if options.checkpoint_dir: - m5.checkpoint(root, "/".join([options.checkpoint_dir, "cpt.%d"])) - else: - m5.checkpoint(root, "cpt.%d") +exit_cause = '' - if maxtick == -1: - exit_event = m5.simulate(maxtick) - else: - exit_event = m5.simulate(maxtick - m5.curTick()) +if options.take_checkpoints: + [when, period] = options.take_checkpoints.split(",", 1) + when = int(when) + period = int(period) -print 'Exiting @ cycle', m5.curTick(), 'because', exit_event.getCause() + exit_event = m5.simulate(when) + while exit_event.getCause() == "checkpoint": + exit_event = m5.simulate(when - m5.curTick()) + + if exit_event.getCause() == "simulate() limit reached": + m5.checkpoint(root, cptdir + "cpt.%d") + num_checkpoints += 1 + + sim_ticks = when + exit_cause = "maximum %d checkpoints dropped" % options.max_checkpoints + while num_checkpoints < options.max_checkpoints: + if (sim_ticks + period) > maxtick and maxtick != -1: + exit_event = m5.simulate(maxtick - sim_ticks) + exit_cause = exit_event.getCause() + break + else: + exit_event = m5.simulate(period) + sim_ticks += period + while exit_event.getCause() == "checkpoint": + exit_event = m5.simulate(period - m5.curTick()) + if exit_event.getCause() == "simulate() limit reached": + m5.checkpoint(root, cptdir + "cpt.%d") + num_checkpoints += 1 + +else: #no checkpoints being taken via this script + exit_event = m5.simulate(maxtick) + + while exit_event.getCause() == "checkpoint": + m5.checkpoint(root, cptdir + "cpt.%d") + num_checkpoints += 1 + if num_checkpoints == options.max_checkpoints: + exit_cause = "maximum %d checkpoints dropped" % options.max_checkpoints + break + + if maxtick == -1: + exit_event = m5.simulate(maxtick) + else: + exit_event = m5.simulate(maxtick - m5.curTick()) + + exit_cause = exit_event.getCause() + +if exit_cause == '': + exit_cause = exit_event.getCause() +print 'Exiting @ cycle', m5.curTick(), 'because ', exit_cause From 049f8d53a92590233db82937aecd5ba2cdbe7d0d Mon Sep 17 00:00:00 2001 From: Lisa Hsu Date: Mon, 23 Oct 2006 18:42:46 -0400 Subject: [PATCH 06/12] make a lot of the same changes as to fs.py for checkpointing. 1) rearrange the options to be in a nice logical order 2) add an option for what i call "standard switch", which is from simple->timing->detailed 3) make checkpointing code such that checkpoints taken from the command line override checkpoint instructions compiled into binaries. 4) add an option for maximum number of checkpoints - simulation will stop at max or maxtick, whichever is first --HG-- extra : convert_revision : 8d905e1b297ae664d60f8c8ba48b2aac25437fc6 --- configs/example/se.py | 170 ++++++++++++++++++++++++++++++++++++++++-- 1 file changed, 164 insertions(+), 6 deletions(-) diff --git a/configs/example/se.py b/configs/example/se.py index 6a941b9da..a053ff8df 100644 --- a/configs/example/se.py +++ b/configs/example/se.py @@ -37,6 +37,7 @@ m5.AddToPath('../common') parser = optparse.OptionParser() +# Benchmark options parser.add_option("-c", "--cmd", default="../../tests/test-progs/hello/bin/alpha/linux/hello", help="The binary to run in syscall emulation mode.") @@ -45,9 +46,35 @@ parser.add_option("-o", "--options", default="", string.") parser.add_option("-i", "--input", default="", help="A file of input to give to the binary.") + +# System options parser.add_option("-d", "--detailed", action="store_true") parser.add_option("-t", "--timing", action="store_true") +parser.add_option("--caches", action="store_true") + +# Run duration options parser.add_option("-m", "--maxtick", type="int") +parser.add_option("--maxtime", type="float") + +#Checkpointing options +###Note that performing checkpointing via python script files will override +###checkpoint instructions built into binaries. +parser.add_option("--take_checkpoints", action="store", type="string", + help=" will take checkpoint at cycle M and every N cycles \ + thereafter") +parser.add_option("--max_checkpoints", action="store", type="int", + help="the maximum number of checkpoints to drop", + default=5) +parser.add_option("--checkpoint_dir", action="store", type="string", + help="Place all checkpoints in this absolute directory") +parser.add_option("-r", "--checkpoint_restore", action="store", type="int", + help="restore from checkpoint ") + +#CPU Switching - default switch model generally goes from a checkpoint +#to a timing simple CPU with caches to warm up, then to detailed CPU for +#data measurement +parser.add_option("-s", "--standard_switch", action="store_true", + help="switch from one cpu mode to another") (options, args) = parser.parse_args() @@ -55,6 +82,13 @@ if args: print "Error: script doesn't take any positional arguments" sys.exit(1) +class MyCache(BaseCache): + assoc = 2 + block_size = 64 + latency = 1 + mshrs = 10 + tgts_per_mshr = 5 + process = LiveProcess() process.executable = options.cmd process.cmd = options.cmd + " " + options.options @@ -93,25 +127,149 @@ cpu.workload = process cpu.cpu_id = 0 system = System(cpu = cpu, - physmem = PhysicalMemory(), + physmem = PhysicalMemory(range=AddrRange("512MB")), membus = Bus()) system.physmem.port = system.membus.port system.cpu.connectMemPorts(system.membus) system.cpu.mem = system.physmem +system.cpu.clock = '2GHz' +if options.caches and not options.standard_switch: + system.cpu.addPrivateSplitL1Caches(MyCache(size = '32kB'), + MyCache(size = '64kB')) root = Root(system = system) if options.timing or options.detailed: root.system.mem_mode = 'timing' +if options.standard_switch: + switch_cpu = TimingSimpleCPU(defer_registration=True, cpu_id=1) + switch_cpu1 = DerivO3CPU(defer_registration=True, cpu_id=2) + switch_cpu.system = system + switch_cpu1.system = system + switch_cpu.clock = cpu.clock + switch_cpu1.clock = cpu.clock + if options.caches: + switch_cpu.addPrivateSplitL1Caches(MyCache(size = '32kB'), + MyCache(size = '64kB')) + + switch_cpu.workload = process + switch_cpu1.workload = process + switch_cpu.mem = system.physmem + switch_cpu1.mem = system.physmem + switch_cpu.connectMemPorts(system.membus) + root.switch_cpu = switch_cpu + root.switch_cpu1 = switch_cpu1 + switch_cpu_list = [(system.cpu, switch_cpu)] + switch_cpu_list1 = [(switch_cpu, switch_cpu1)] + # instantiate configuration m5.instantiate(root) -# simulate until program terminates -if options.maxtick: - exit_event = m5.simulate(options.maxtick) +if options.checkpoint_dir: + cptdir = options.checkpoint_dir else: - exit_event = m5.simulate() + cptdir = getcwd() + +if options.checkpoint_restore: + from os.path import isdir + from os import listdir, getcwd + import re + + if not isdir(cptdir): + m5.panic("checkpoint dir %s does not exist!" % cptdir) + + dirs = listdir(cptdir) + expr = re.compile('cpt.([0-9]*)') + cpts = [] + for dir in dirs: + match = expr.match(dir) + if match: + cpts.append(match.group(1)) + + cpts.sort(lambda a,b: cmp(long(a), long(b))) + + if options.checkpoint_restore > len(cpts): + m5.panic('Checkpoint %d not found' % options.checkpoint_restore) + + print "restoring checkpoint from ","/".join([cptdir, "cpt.%s" % cpts[options.checkpoint_restore - 1]]) + m5.restoreCheckpoint(root, "/".join([cptdir, "cpt.%s" % cpts[options.checkpoint_restore - 1]])) + +if options.standard_switch: + exit_event = m5.simulate(10000) + ## when you change to Timing (or Atomic), you halt the system given + ## as argument. When you are finished with the system changes + ## (including switchCpus), you must resume the system manually. + ## You DON'T need to resume after just switching CPUs if you haven't + ## changed anything on the system level. + m5.changeToTiming(system) + m5.switchCpus(switch_cpu_list) + m5.resume(system) + + exit_event = m5.simulate(3000000) + m5.switchCpus(switch_cpu_list1) + +if options.maxtick: + maxtick = options.maxtick +elif options.maxtime: + simtime = int(options.maxtime * root.clock.value) + print "simulating for: ", simtime + maxtick = simtime +else: + maxtick = -1 + +num_checkpoints = 0 + +exit_cause = '' + +if options.take_checkpoints: + [when, period] = options.take_checkpoints.split(",", 1) + when = int(when) + period = int(period) + + exit_event = m5.simulate(when) + while exit_event.getCause() == "checkpoint": + exit_event = m5.simulate(when - m5.curTick()) + + if exit_event.getCause() == "simulate() limit reached": + m5.checkpoint(root, cptdir + "cpt.%d") + num_checkpoints += 1 + + sim_ticks = when + exit_cause = "maximum %d checkpoints dropped" % options.max_checkpoints + while num_checkpoints < options.max_checkpoints: + if (sim_ticks + period) > maxtick and maxtick != -1: + exit_event = m5.simulate(maxtick - sim_ticks) + exit_cause = exit_event.getCause() + break + else: + exit_event = m5.simulate(period) + sim_ticks += period + while exit_event.getCause() == "checkpoint": + exit_event = m5.simulate(period - m5.curTick()) + if exit_event.getCause() == "simulate() limit reached": + m5.checkpoint(root, cptdir + "cpt.%d") + num_checkpoints += 1 + +else: #no checkpoints being taken via this script + exit_event = m5.simulate(maxtick) + + while exit_event.getCause() == "checkpoint": + m5.checkpoint(root, cptdir + "cpt.%d") + num_checkpoints += 1 + if num_checkpoints == options.max_checkpoints: + exit_cause = "maximum %d checkpoints dropped" % options.max_checkpoints + break + + if maxtick == -1: + exit_event = m5.simulate(maxtick) + else: + exit_event = m5.simulate(maxtick - m5.curTick()) + + exit_cause = exit_event.getCause() + +if exit_cause == '': + exit_cause = exit_event.getCause() +print 'Exiting @ cycle', m5.curTick(), 'because ', exit_cause -print 'Exiting @ tick', m5.curTick(), 'because', exit_event.getCause() From 0a2387f38c6887f688144a18e0d7ff50e80bd04c Mon Sep 17 00:00:00 2001 From: Lisa Hsu Date: Mon, 23 Oct 2006 18:43:56 -0400 Subject: [PATCH 07/12] make this parallel to the other cpu types so that resume works correctly. --HG-- extra : convert_revision : 3c165af27ea0e6c7f2a17819c1717d8900f54cc1 --- src/cpu/simple/atomic.cc | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/src/cpu/simple/atomic.cc b/src/cpu/simple/atomic.cc index fe421ae6c..87ecafd69 100644 --- a/src/cpu/simple/atomic.cc +++ b/src/cpu/simple/atomic.cc @@ -182,11 +182,14 @@ AtomicSimpleCPU::unserialize(Checkpoint *cp, const string §ion) void AtomicSimpleCPU::resume() { - changeState(SimObject::Running); - if (thread->status() == ThreadContext::Active) { + if (_status != SwitchedOut && _status != Idle) { assert(system->getMemoryMode() == System::Atomic); - if (!tickEvent.scheduled()) - tickEvent.schedule(curTick); + + changeState(SimObject::Running); + if (thread->status() == ThreadContext::Active) { + if (!tickEvent.scheduled()) + tickEvent.schedule(curTick); + } } } From 4da3938ed99e3691cfb16c275eea659cbaaa6c30 Mon Sep 17 00:00:00 2001 From: Lisa Hsu Date: Mon, 23 Oct 2006 18:45:30 -0400 Subject: [PATCH 08/12] get rid of the "resume" step at the end of changeToTiming/Atomic because this will cause an assertion when you do the CPU switch. instead, push the responsibility of the resume upwards towards the user - documented in se.py and fs.py so it should be ok. --HG-- extra : convert_revision : 7530cf140844e18cc26df80057f8760f29ec952b --- src/python/m5/__init__.py | 3 --- 1 file changed, 3 deletions(-) diff --git a/src/python/m5/__init__.py b/src/python/m5/__init__.py index 03e0508fb..d41fd5a61 100644 --- a/src/python/m5/__init__.py +++ b/src/python/m5/__init__.py @@ -150,7 +150,6 @@ def changeToAtomic(system): doDrain(system) print "Changing memory mode to atomic" system.changeTiming(cc_main.SimObject.Atomic) - resume(system) def changeToTiming(system): if not isinstance(system, objects.Root) and not isinstance(system, objects.System): @@ -159,7 +158,6 @@ def changeToTiming(system): doDrain(system) print "Changing memory mode to timing" system.changeTiming(cc_main.SimObject.Timing) - resume(system) def switchCpus(cpuList): print "switching cpus" @@ -190,7 +188,6 @@ def switchCpus(cpuList): cc_main.cleanupCountedDrain(drain_event) # Now all of the CPUs are ready to be switched out for old_cpu in old_cpus: - print "switching" old_cpu._ccObject.switchOut() index = 0 for new_cpu in new_cpus: From 3922b2e076e50a624e129d19a7e7811341c9e4fd Mon Sep 17 00:00:00 2001 From: Lisa Hsu Date: Mon, 23 Oct 2006 19:32:57 -0400 Subject: [PATCH 09/12] warmup of 1B cpu cycles. configs/example/fs.py: configs/example/se.py: warm up of 1B CPU cycles --HG-- extra : convert_revision : 0f3263f466fde4cd86e0663930e83617a6b3faad --- configs/example/fs.py | 2 +- configs/example/se.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/configs/example/fs.py b/configs/example/fs.py index 11b82178b..a9daf63be 100644 --- a/configs/example/fs.py +++ b/configs/example/fs.py @@ -212,7 +212,7 @@ if options.standard_switch: m5.switchCpus(switch_cpu_list) m5.resume(test_sys) - exit_event = m5.simulate(300000000000) + exit_event = m5.simulate(500000000000) m5.switchCpus(switch_cpu_list1) if options.maxtick: diff --git a/configs/example/se.py b/configs/example/se.py index a053ff8df..2e63e27da 100644 --- a/configs/example/se.py +++ b/configs/example/se.py @@ -206,7 +206,7 @@ if options.standard_switch: m5.switchCpus(switch_cpu_list) m5.resume(system) - exit_event = m5.simulate(3000000) + exit_event = m5.simulate(500000000000) m5.switchCpus(switch_cpu_list1) if options.maxtick: From 650ebe4ec309e5bf4e5ae7acf6e928601c44e548 Mon Sep 17 00:00:00 2001 From: Ali Saidi Date: Tue, 24 Oct 2006 12:59:07 -0400 Subject: [PATCH 10/12] Add more traceflags for ethernet --HG-- extra : convert_revision : a5025f501d72626d1bcb4dcc24ee353ceb160ce7 --- src/base/traceflags.py | 1 + 1 file changed, 1 insertion(+) diff --git a/src/base/traceflags.py b/src/base/traceflags.py index 298d22c2b..640e7e165 100644 --- a/src/base/traceflags.py +++ b/src/base/traceflags.py @@ -84,6 +84,7 @@ baseFlags = [ 'EthernetDMA', 'EthernetData', 'EthernetDesc', + 'EthernetEEPROM', 'EthernetIntr', 'EthernetPIO', 'EthernetSM', From 86bd01dfc99e4bb40b5d7266e12b843285847c7c Mon Sep 17 00:00:00 2001 From: Ali Saidi Date: Tue, 24 Oct 2006 13:10:31 -0400 Subject: [PATCH 11/12] Fix fs.py. Lisa did you test this? Is there some wierd python version thing? --HG-- extra : convert_revision : 6df5f90d5b66e7af27d4f524744b9dc3c703a588 --- configs/example/fs.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/configs/example/fs.py b/configs/example/fs.py index a9daf63be..76b62a066 100644 --- a/configs/example/fs.py +++ b/configs/example/fs.py @@ -152,8 +152,8 @@ else: sys.exit(1) if options.standard_switch: - switch_cpus = [TimingSimpleCPU(defer_registration=True, cpu_id=(np+i) for i in xrange(np))] - switch_cpus1 = [DerivO3CPU(defer_registration=True, cpu_id=(2*np+i) for i in xrange(np))] + switch_cpus = [TimingSimpleCPU(defer_registration=True, cpu_id=(np+i)) for i in xrange(np)] + switch_cpus1 = [DerivO3CPU(defer_registration=True, cpu_id=(2*np+i)) for i in xrange(np)] for i in xrange(np): switch_cpus[i].system = test_sys switch_cpus1[i].system = test_sys @@ -176,7 +176,7 @@ m5.instantiate(root) if options.checkpoint_dir: cptdir = options.checkpoint_dir else: - cptdir = getcwd() + cptdir = os.getcwd() if options.checkpoint_restore: from os.path import isdir From eda7148af25e8e106e8983fb37952263dcae5275 Mon Sep 17 00:00:00 2001 From: Ron Dreslinski Date: Wed, 25 Oct 2006 14:14:37 -0400 Subject: [PATCH 12/12] Fix fixPacket functionality to calculate sizes properly src/mem/packet.cc: Copy size is calculated by END-BEGIN not BEGIN-END --HG-- extra : convert_revision : 0e2725c5551f8f70ff05cb285e0822afc0bb3f87 --- src/mem/packet.cc | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/mem/packet.cc b/src/mem/packet.cc index fa8d82c46..a342af634 100644 --- a/src/mem/packet.cc +++ b/src/mem/packet.cc @@ -182,11 +182,11 @@ fixPacket(PacketPtr func, PacketPtr timing) if (funcStart >= timingStart) { memcpy(timing->getPtr() + (funcStart - timingStart), func->getPtr(), - funcStart - std::min(funcEnd, timingEnd)); + std::min(funcEnd, timingEnd) - funcStart); } else { // timingStart > funcStart memcpy(timing->getPtr(), func->getPtr() + (timingStart - funcStart), - timingStart - std::min(funcEnd, timingEnd)); + std::min(funcEnd, timingEnd) - timingStart); } // we always want to keep going with a write return true;