atomic.cc revision 5914
1/* 2 * Copyright (c) 2002-2005 The Regents of The University of Michigan 3 * All rights reserved. 4 * 5 * Redistribution and use in source and binary forms, with or without 6 * modification, are permitted provided that the following conditions are 7 * met: redistributions of source code must retain the above copyright 8 * notice, this list of conditions and the following disclaimer; 9 * redistributions in binary form must reproduce the above copyright 10 * notice, this list of conditions and the following disclaimer in the 11 * documentation and/or other materials provided with the distribution; 12 * neither the name of the copyright holders nor the names of its 13 * contributors may be used to endorse or promote products derived from 14 * this software without specific prior written permission. 15 * 16 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 17 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 18 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 19 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 20 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 21 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 22 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 23 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 24 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 25 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 26 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 27 * 28 * Authors: Steve Reinhardt 29 */ 30 31#include "arch/locked_mem.hh" 32#include "arch/mmaped_ipr.hh" 33#include "arch/utility.hh" 34#include "base/bigint.hh" 35#include "cpu/exetrace.hh" 36#include "cpu/simple/atomic.hh" 37#include "mem/packet.hh" 38#include "mem/packet_access.hh" 39#include "params/AtomicSimpleCPU.hh" 40#include "sim/system.hh" 41 42using namespace std; 43using namespace TheISA; 44 45AtomicSimpleCPU::TickEvent::TickEvent(AtomicSimpleCPU *c) 46 : Event(CPU_Tick_Pri), cpu(c) 47{ 48} 49 50 51void 52AtomicSimpleCPU::TickEvent::process() 53{ 54 cpu->tick(); 55} 56 57const char * 58AtomicSimpleCPU::TickEvent::description() const 59{ 60 return "AtomicSimpleCPU tick"; 61} 62 63Port * 64AtomicSimpleCPU::getPort(const std::string &if_name, int idx) 65{ 66 if (if_name == "dcache_port") 67 return &dcachePort; 68 else if (if_name == "icache_port") 69 return &icachePort; 70 else if (if_name == "physmem_port") { 71 hasPhysMemPort = true; 72 return &physmemPort; 73 } 74 else 75 panic("No Such Port\n"); 76} 77 78void 79AtomicSimpleCPU::init() 80{ 81 BaseCPU::init(); 82#if FULL_SYSTEM 83 for (int i = 0; i < threadContexts.size(); ++i) { 84 ThreadContext *tc = threadContexts[i]; 85 86 // initialize CPU, including PC 87 TheISA::initCPU(tc, tc->contextId()); 88 } 89#endif 90 if (hasPhysMemPort) { 91 bool snoop = false; 92 AddrRangeList pmAddrList; 93 physmemPort.getPeerAddressRanges(pmAddrList, snoop); 94 physMemAddr = *pmAddrList.begin(); 95 } 96 // Atomic doesn't do MT right now, so contextId == threadId 97 ifetch_req.setThreadContext(_cpuId, 0); // Add thread ID if we add MT 98 data_read_req.setThreadContext(_cpuId, 0); // Add thread ID here too 99 data_write_req.setThreadContext(_cpuId, 0); // Add thread ID here too 100} 101 102bool 103AtomicSimpleCPU::CpuPort::recvTiming(PacketPtr pkt) 104{ 105 panic("AtomicSimpleCPU doesn't expect recvTiming callback!"); 106 return true; 107} 108 109Tick 110AtomicSimpleCPU::CpuPort::recvAtomic(PacketPtr pkt) 111{ 112 //Snooping a coherence request, just return 113 return 0; 114} 115 116void 117AtomicSimpleCPU::CpuPort::recvFunctional(PacketPtr pkt) 118{ 119 //No internal storage to update, just return 120 return; 121} 122 123void 124AtomicSimpleCPU::CpuPort::recvStatusChange(Status status) 125{ 126 if (status == RangeChange) { 127 if (!snoopRangeSent) { 128 snoopRangeSent = true; 129 sendStatusChange(Port::RangeChange); 130 } 131 return; 132 } 133 134 panic("AtomicSimpleCPU doesn't expect recvStatusChange callback!"); 135} 136 137void 138AtomicSimpleCPU::CpuPort::recvRetry() 139{ 140 panic("AtomicSimpleCPU doesn't expect recvRetry callback!"); 141} 142 143void 144AtomicSimpleCPU::DcachePort::setPeer(Port *port) 145{ 146 Port::setPeer(port); 147 148#if FULL_SYSTEM 149 // Update the ThreadContext's memory ports (Functional/Virtual 150 // Ports) 151 cpu->tcBase()->connectMemPorts(cpu->tcBase()); 152#endif 153} 154 155AtomicSimpleCPU::AtomicSimpleCPU(AtomicSimpleCPUParams *p) 156 : BaseSimpleCPU(p), tickEvent(this), width(p->width), 157 simulate_data_stalls(p->simulate_data_stalls), 158 simulate_inst_stalls(p->simulate_inst_stalls), 159 icachePort(name() + "-iport", this), dcachePort(name() + "-iport", this), 160 physmemPort(name() + "-iport", this), hasPhysMemPort(false) 161{ 162 _status = Idle; 163 164 icachePort.snoopRangeSent = false; 165 dcachePort.snoopRangeSent = false; 166 167} 168 169 170AtomicSimpleCPU::~AtomicSimpleCPU() 171{ 172} 173 174void 175AtomicSimpleCPU::serialize(ostream &os) 176{ 177 SimObject::State so_state = SimObject::getState(); 178 SERIALIZE_ENUM(so_state); 179 BaseSimpleCPU::serialize(os); 180 nameOut(os, csprintf("%s.tickEvent", name())); 181 tickEvent.serialize(os); 182} 183 184void 185AtomicSimpleCPU::unserialize(Checkpoint *cp, const string §ion) 186{ 187 SimObject::State so_state; 188 UNSERIALIZE_ENUM(so_state); 189 BaseSimpleCPU::unserialize(cp, section); 190 tickEvent.unserialize(cp, csprintf("%s.tickEvent", section)); 191} 192 193void 194AtomicSimpleCPU::resume() 195{ 196 if (_status == Idle || _status == SwitchedOut) 197 return; 198 199 DPRINTF(SimpleCPU, "Resume\n"); 200 assert(system->getMemoryMode() == Enums::atomic); 201 202 changeState(SimObject::Running); 203 if (thread->status() == ThreadContext::Active) { 204 if (!tickEvent.scheduled()) 205 schedule(tickEvent, nextCycle()); 206 } 207} 208 209void 210AtomicSimpleCPU::switchOut() 211{ 212 assert(_status == Running || _status == Idle); 213 _status = SwitchedOut; 214 215 tickEvent.squash(); 216} 217 218 219void 220AtomicSimpleCPU::takeOverFrom(BaseCPU *oldCPU) 221{ 222 BaseCPU::takeOverFrom(oldCPU, &icachePort, &dcachePort); 223 224 assert(!tickEvent.scheduled()); 225 226 // if any of this CPU's ThreadContexts are active, mark the CPU as 227 // running and schedule its tick event. 228 for (int i = 0; i < threadContexts.size(); ++i) { 229 ThreadContext *tc = threadContexts[i]; 230 if (tc->status() == ThreadContext::Active && _status != Running) { 231 _status = Running; 232 schedule(tickEvent, nextCycle()); 233 break; 234 } 235 } 236 if (_status != Running) { 237 _status = Idle; 238 } 239 assert(threadContexts.size() == 1); 240 ifetch_req.setThreadContext(_cpuId, 0); // Add thread ID if we add MT 241 data_read_req.setThreadContext(_cpuId, 0); // Add thread ID here too 242 data_write_req.setThreadContext(_cpuId, 0); // Add thread ID here too 243} 244 245 246void 247AtomicSimpleCPU::activateContext(int thread_num, int delay) 248{ 249 DPRINTF(SimpleCPU, "ActivateContext %d (%d cycles)\n", thread_num, delay); 250 251 assert(thread_num == 0); 252 assert(thread); 253 254 assert(_status == Idle); 255 assert(!tickEvent.scheduled()); 256 257 notIdleFraction++; 258 numCycles += tickToCycles(thread->lastActivate - thread->lastSuspend); 259 260 //Make sure ticks are still on multiples of cycles 261 schedule(tickEvent, nextCycle(curTick + ticks(delay))); 262 _status = Running; 263} 264 265 266void 267AtomicSimpleCPU::suspendContext(int thread_num) 268{ 269 DPRINTF(SimpleCPU, "SuspendContext %d\n", thread_num); 270 271 assert(thread_num == 0); 272 assert(thread); 273 274 assert(_status == Running); 275 276 // tick event may not be scheduled if this gets called from inside 277 // an instruction's execution, e.g. "quiesce" 278 if (tickEvent.scheduled()) 279 deschedule(tickEvent); 280 281 notIdleFraction--; 282 _status = Idle; 283} 284 285 286template <class T> 287Fault 288AtomicSimpleCPU::read(Addr addr, T &data, unsigned flags) 289{ 290 // use the CPU's statically allocated read request and packet objects 291 Request *req = &data_read_req; 292 293 if (traceData) { 294 traceData->setAddr(addr); 295 } 296 297 //The block size of our peer. 298 int blockSize = dcachePort.peerBlockSize(); 299 //The size of the data we're trying to read. 300 int dataSize = sizeof(T); 301 302 uint8_t * dataPtr = (uint8_t *)&data; 303 304 //The address of the second part of this access if it needs to be split 305 //across a cache line boundary. 306 Addr secondAddr = roundDown(addr + dataSize - 1, blockSize); 307 308 if(secondAddr > addr) 309 dataSize = secondAddr - addr; 310 311 dcache_latency = 0; 312 313 while(1) { 314 req->setVirt(0, addr, dataSize, flags, thread->readPC()); 315 316 // translate to physical address 317 Fault fault = thread->dtb->translateAtomic(req, tc, false); 318 319 // Now do the access. 320 if (fault == NoFault) { 321 Packet pkt = Packet(req, 322 req->isLocked() ? MemCmd::LoadLockedReq : MemCmd::ReadReq, 323 Packet::Broadcast); 324 pkt.dataStatic(dataPtr); 325 326 if (req->isMmapedIpr()) 327 dcache_latency += TheISA::handleIprRead(thread->getTC(), &pkt); 328 else { 329 if (hasPhysMemPort && pkt.getAddr() == physMemAddr) 330 dcache_latency += physmemPort.sendAtomic(&pkt); 331 else 332 dcache_latency += dcachePort.sendAtomic(&pkt); 333 } 334 dcache_access = true; 335 336 assert(!pkt.isError()); 337 338 if (req->isLocked()) { 339 TheISA::handleLockedRead(thread, req); 340 } 341 } 342 343 // This will need a new way to tell if it has a dcache attached. 344 if (req->isUncacheable()) 345 recordEvent("Uncached Read"); 346 347 //If there's a fault, return it 348 if (fault != NoFault) 349 return fault; 350 //If we don't need to access a second cache line, stop now. 351 if (secondAddr <= addr) 352 { 353 data = gtoh(data); 354 if (traceData) { 355 traceData->setData(data); 356 } 357 return fault; 358 } 359 360 /* 361 * Set up for accessing the second cache line. 362 */ 363 364 //Move the pointer we're reading into to the correct location. 365 dataPtr += dataSize; 366 //Adjust the size to get the remaining bytes. 367 dataSize = addr + sizeof(T) - secondAddr; 368 //And access the right address. 369 addr = secondAddr; 370 } 371} 372 373#ifndef DOXYGEN_SHOULD_SKIP_THIS 374 375template 376Fault 377AtomicSimpleCPU::read(Addr addr, Twin32_t &data, unsigned flags); 378 379template 380Fault 381AtomicSimpleCPU::read(Addr addr, Twin64_t &data, unsigned flags); 382 383template 384Fault 385AtomicSimpleCPU::read(Addr addr, uint64_t &data, unsigned flags); 386 387template 388Fault 389AtomicSimpleCPU::read(Addr addr, uint32_t &data, unsigned flags); 390 391template 392Fault 393AtomicSimpleCPU::read(Addr addr, uint16_t &data, unsigned flags); 394 395template 396Fault 397AtomicSimpleCPU::read(Addr addr, uint8_t &data, unsigned flags); 398 399#endif //DOXYGEN_SHOULD_SKIP_THIS 400 401template<> 402Fault 403AtomicSimpleCPU::read(Addr addr, double &data, unsigned flags) 404{ 405 return read(addr, *(uint64_t*)&data, flags); 406} 407 408template<> 409Fault 410AtomicSimpleCPU::read(Addr addr, float &data, unsigned flags) 411{ 412 return read(addr, *(uint32_t*)&data, flags); 413} 414 415 416template<> 417Fault 418AtomicSimpleCPU::read(Addr addr, int32_t &data, unsigned flags) 419{ 420 return read(addr, (uint32_t&)data, flags); 421} 422 423 424template <class T> 425Fault 426AtomicSimpleCPU::write(T data, Addr addr, unsigned flags, uint64_t *res) 427{ 428 // use the CPU's statically allocated write request and packet objects 429 Request *req = &data_write_req; 430 431 if (traceData) { 432 traceData->setAddr(addr); 433 } 434 435 //The block size of our peer. 436 int blockSize = dcachePort.peerBlockSize(); 437 //The size of the data we're trying to read. 438 int dataSize = sizeof(T); 439 440 uint8_t * dataPtr = (uint8_t *)&data; 441 442 //The address of the second part of this access if it needs to be split 443 //across a cache line boundary. 444 Addr secondAddr = roundDown(addr + dataSize - 1, blockSize); 445 446 if(secondAddr > addr) 447 dataSize = secondAddr - addr; 448 449 dcache_latency = 0; 450 451 while(1) { 452 req->setVirt(0, addr, dataSize, flags, thread->readPC()); 453 454 // translate to physical address 455 Fault fault = thread->dtb->translateAtomic(req, tc, true); 456 457 // Now do the access. 458 if (fault == NoFault) { 459 MemCmd cmd = MemCmd::WriteReq; // default 460 bool do_access = true; // flag to suppress cache access 461 462 if (req->isLocked()) { 463 cmd = MemCmd::StoreCondReq; 464 do_access = TheISA::handleLockedWrite(thread, req); 465 } else if (req->isSwap()) { 466 cmd = MemCmd::SwapReq; 467 if (req->isCondSwap()) { 468 assert(res); 469 req->setExtraData(*res); 470 } 471 } 472 473 if (do_access) { 474 Packet pkt = Packet(req, cmd, Packet::Broadcast); 475 pkt.dataStatic(dataPtr); 476 477 if (req->isMmapedIpr()) { 478 dcache_latency += 479 TheISA::handleIprWrite(thread->getTC(), &pkt); 480 } else { 481 //XXX This needs to be outside of the loop in order to 482 //work properly for cache line boundary crossing 483 //accesses in transendian simulations. 484 data = htog(data); 485 if (hasPhysMemPort && pkt.getAddr() == physMemAddr) 486 dcache_latency += physmemPort.sendAtomic(&pkt); 487 else 488 dcache_latency += dcachePort.sendAtomic(&pkt); 489 } 490 dcache_access = true; 491 assert(!pkt.isError()); 492 493 if (req->isSwap()) { 494 assert(res); 495 *res = pkt.get<T>(); 496 } 497 } 498 499 if (res && !req->isSwap()) { 500 *res = req->getExtraData(); 501 } 502 } 503 504 // This will need a new way to tell if it's hooked up to a cache or not. 505 if (req->isUncacheable()) 506 recordEvent("Uncached Write"); 507 508 //If there's a fault or we don't need to access a second cache line, 509 //stop now. 510 if (fault != NoFault || secondAddr <= addr) 511 { 512 // If the write needs to have a fault on the access, consider 513 // calling changeStatus() and changing it to "bad addr write" 514 // or something. 515 if (traceData) { 516 traceData->setData(data); 517 } 518 return fault; 519 } 520 521 /* 522 * Set up for accessing the second cache line. 523 */ 524 525 //Move the pointer we're reading into to the correct location. 526 dataPtr += dataSize; 527 //Adjust the size to get the remaining bytes. 528 dataSize = addr + sizeof(T) - secondAddr; 529 //And access the right address. 530 addr = secondAddr; 531 } 532} 533 534 535#ifndef DOXYGEN_SHOULD_SKIP_THIS 536 537template 538Fault 539AtomicSimpleCPU::write(Twin32_t data, Addr addr, 540 unsigned flags, uint64_t *res); 541 542template 543Fault 544AtomicSimpleCPU::write(Twin64_t data, Addr addr, 545 unsigned flags, uint64_t *res); 546 547template 548Fault 549AtomicSimpleCPU::write(uint64_t data, Addr addr, 550 unsigned flags, uint64_t *res); 551 552template 553Fault 554AtomicSimpleCPU::write(uint32_t data, Addr addr, 555 unsigned flags, uint64_t *res); 556 557template 558Fault 559AtomicSimpleCPU::write(uint16_t data, Addr addr, 560 unsigned flags, uint64_t *res); 561 562template 563Fault 564AtomicSimpleCPU::write(uint8_t data, Addr addr, 565 unsigned flags, uint64_t *res); 566 567#endif //DOXYGEN_SHOULD_SKIP_THIS 568 569template<> 570Fault 571AtomicSimpleCPU::write(double data, Addr addr, unsigned flags, uint64_t *res) 572{ 573 return write(*(uint64_t*)&data, addr, flags, res); 574} 575 576template<> 577Fault 578AtomicSimpleCPU::write(float data, Addr addr, unsigned flags, uint64_t *res) 579{ 580 return write(*(uint32_t*)&data, addr, flags, res); 581} 582 583 584template<> 585Fault 586AtomicSimpleCPU::write(int32_t data, Addr addr, unsigned flags, uint64_t *res) 587{ 588 return write((uint32_t)data, addr, flags, res); 589} 590 591 592void 593AtomicSimpleCPU::tick() 594{ 595 DPRINTF(SimpleCPU, "Tick\n"); 596 597 Tick latency = 0; 598 599 for (int i = 0; i < width; ++i) { 600 numCycles++; 601 602 if (!curStaticInst || !curStaticInst->isDelayedCommit()) 603 checkForInterrupts(); 604 605 checkPcEventQueue(); 606 607 Fault fault = NoFault; 608 609 bool fromRom = isRomMicroPC(thread->readMicroPC()); 610 if (!fromRom && !curMacroStaticInst) { 611 setupFetchRequest(&ifetch_req); 612 fault = thread->itb->translateAtomic(&ifetch_req, tc); 613 } 614 615 if (fault == NoFault) { 616 Tick icache_latency = 0; 617 bool icache_access = false; 618 dcache_access = false; // assume no dcache access 619 620 if (!fromRom && !curMacroStaticInst) { 621 // This is commented out because the predecoder would act like 622 // a tiny cache otherwise. It wouldn't be flushed when needed 623 // like the I cache. It should be flushed, and when that works 624 // this code should be uncommented. 625 //Fetch more instruction memory if necessary 626 //if(predecoder.needMoreBytes()) 627 //{ 628 icache_access = true; 629 Packet ifetch_pkt = Packet(&ifetch_req, MemCmd::ReadReq, 630 Packet::Broadcast); 631 ifetch_pkt.dataStatic(&inst); 632 633 if (hasPhysMemPort && ifetch_pkt.getAddr() == physMemAddr) 634 icache_latency = physmemPort.sendAtomic(&ifetch_pkt); 635 else 636 icache_latency = icachePort.sendAtomic(&ifetch_pkt); 637 638 assert(!ifetch_pkt.isError()); 639 640 // ifetch_req is initialized to read the instruction directly 641 // into the CPU object's inst field. 642 //} 643 } 644 645 preExecute(); 646 647 if (curStaticInst) { 648 fault = curStaticInst->execute(this, traceData); 649 650 // keep an instruction count 651 if (fault == NoFault) 652 countInst(); 653 else if (traceData) { 654 // If there was a fault, we should trace this instruction. 655 delete traceData; 656 traceData = NULL; 657 } 658 659 postExecute(); 660 } 661 662 // @todo remove me after debugging with legion done 663 if (curStaticInst && (!curStaticInst->isMicroop() || 664 curStaticInst->isFirstMicroop())) 665 instCnt++; 666 667 Tick stall_ticks = 0; 668 if (simulate_inst_stalls && icache_access) 669 stall_ticks += icache_latency; 670 671 if (simulate_data_stalls && dcache_access) 672 stall_ticks += dcache_latency; 673 674 if (stall_ticks) { 675 Tick stall_cycles = stall_ticks / ticks(1); 676 Tick aligned_stall_ticks = ticks(stall_cycles); 677 678 if (aligned_stall_ticks < stall_ticks) 679 aligned_stall_ticks += 1; 680 681 latency += aligned_stall_ticks; 682 } 683 684 } 685 if(fault != NoFault || !stayAtPC) 686 advancePC(fault); 687 } 688 689 // instruction takes at least one cycle 690 if (latency < ticks(1)) 691 latency = ticks(1); 692 693 if (_status != Idle) 694 schedule(tickEvent, curTick + latency); 695} 696 697 698void 699AtomicSimpleCPU::printAddr(Addr a) 700{ 701 dcachePort.printAddr(a); 702} 703 704 705//////////////////////////////////////////////////////////////////////// 706// 707// AtomicSimpleCPU Simulation Object 708// 709AtomicSimpleCPU * 710AtomicSimpleCPUParams::create() 711{ 712 numThreads = 1; 713#if !FULL_SYSTEM 714 if (workload.size() != 1) 715 panic("only one workload allowed"); 716#endif 717 return new AtomicSimpleCPU(this); 718} 719