atomic.cc revision 5100
1/* 2 * Copyright (c) 2002-2005 The Regents of The University of Michigan 3 * All rights reserved. 4 * 5 * Redistribution and use in source and binary forms, with or without 6 * modification, are permitted provided that the following conditions are 7 * met: redistributions of source code must retain the above copyright 8 * notice, this list of conditions and the following disclaimer; 9 * redistributions in binary form must reproduce the above copyright 10 * notice, this list of conditions and the following disclaimer in the 11 * documentation and/or other materials provided with the distribution; 12 * neither the name of the copyright holders nor the names of its 13 * contributors may be used to endorse or promote products derived from 14 * this software without specific prior written permission. 15 * 16 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 17 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 18 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 19 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 20 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 21 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 22 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 23 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 24 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 25 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 26 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 27 * 28 * Authors: Steve Reinhardt 29 */ 30 31#include "arch/locked_mem.hh" 32#include "arch/mmaped_ipr.hh" 33#include "arch/utility.hh" 34#include "base/bigint.hh" 35#include "cpu/exetrace.hh" 36#include "cpu/simple/atomic.hh" 37#include "mem/packet.hh" 38#include "mem/packet_access.hh" 39#include "params/AtomicSimpleCPU.hh" 40#include "sim/system.hh" 41 42using namespace std; 43using namespace TheISA; 44 45AtomicSimpleCPU::TickEvent::TickEvent(AtomicSimpleCPU *c) 46 : Event(&mainEventQueue, CPU_Tick_Pri), cpu(c) 47{ 48} 49 50 51void 52AtomicSimpleCPU::TickEvent::process() 53{ 54 cpu->tick(); 55} 56 57const char * 58AtomicSimpleCPU::TickEvent::description() 59{ 60 return "AtomicSimpleCPU tick"; 61} 62 63Port * 64AtomicSimpleCPU::getPort(const std::string &if_name, int idx) 65{ 66 if (if_name == "dcache_port") 67 return &dcachePort; 68 else if (if_name == "icache_port") 69 return &icachePort; 70 else if (if_name == "physmem_port") { 71 hasPhysMemPort = true; 72 return &physmemPort; 73 } 74 else 75 panic("No Such Port\n"); 76} 77 78void 79AtomicSimpleCPU::init() 80{ 81 BaseCPU::init(); 82#if FULL_SYSTEM 83 for (int i = 0; i < threadContexts.size(); ++i) { 84 ThreadContext *tc = threadContexts[i]; 85 86 // initialize CPU, including PC 87 TheISA::initCPU(tc, tc->readCpuId()); 88 } 89#endif 90 if (hasPhysMemPort) { 91 bool snoop = false; 92 AddrRangeList pmAddrList; 93 physmemPort.getPeerAddressRanges(pmAddrList, snoop); 94 physMemAddr = *pmAddrList.begin(); 95 } 96} 97 98bool 99AtomicSimpleCPU::CpuPort::recvTiming(PacketPtr pkt) 100{ 101 panic("AtomicSimpleCPU doesn't expect recvTiming callback!"); 102 return true; 103} 104 105Tick 106AtomicSimpleCPU::CpuPort::recvAtomic(PacketPtr pkt) 107{ 108 //Snooping a coherence request, just return 109 return 0; 110} 111 112void 113AtomicSimpleCPU::CpuPort::recvFunctional(PacketPtr pkt) 114{ 115 //No internal storage to update, just return 116 return; 117} 118 119void 120AtomicSimpleCPU::CpuPort::recvStatusChange(Status status) 121{ 122 if (status == RangeChange) { 123 if (!snoopRangeSent) { 124 snoopRangeSent = true; 125 sendStatusChange(Port::RangeChange); 126 } 127 return; 128 } 129 130 panic("AtomicSimpleCPU doesn't expect recvStatusChange callback!"); 131} 132 133void 134AtomicSimpleCPU::CpuPort::recvRetry() 135{ 136 panic("AtomicSimpleCPU doesn't expect recvRetry callback!"); 137} 138 139void 140AtomicSimpleCPU::DcachePort::setPeer(Port *port) 141{ 142 Port::setPeer(port); 143 144#if FULL_SYSTEM 145 // Update the ThreadContext's memory ports (Functional/Virtual 146 // Ports) 147 cpu->tcBase()->connectMemPorts(); 148#endif 149} 150 151AtomicSimpleCPU::AtomicSimpleCPU(Params *p) 152 : BaseSimpleCPU(p), tickEvent(this), 153 width(p->width), simulate_stalls(p->simulate_stalls), 154 icachePort(name() + "-iport", this), dcachePort(name() + "-iport", this), 155 physmemPort(name() + "-iport", this), hasPhysMemPort(false) 156{ 157 _status = Idle; 158 159 icachePort.snoopRangeSent = false; 160 dcachePort.snoopRangeSent = false; 161 162 ifetch_req.setThreadContext(p->cpu_id, 0); // Add thread ID if we add MT 163 data_read_req.setThreadContext(p->cpu_id, 0); // Add thread ID here too 164 data_write_req.setThreadContext(p->cpu_id, 0); // Add thread ID here too 165} 166 167 168AtomicSimpleCPU::~AtomicSimpleCPU() 169{ 170} 171 172void 173AtomicSimpleCPU::serialize(ostream &os) 174{ 175 SimObject::State so_state = SimObject::getState(); 176 SERIALIZE_ENUM(so_state); 177 Status _status = status(); 178 SERIALIZE_ENUM(_status); 179 BaseSimpleCPU::serialize(os); 180 nameOut(os, csprintf("%s.tickEvent", name())); 181 tickEvent.serialize(os); 182} 183 184void 185AtomicSimpleCPU::unserialize(Checkpoint *cp, const string §ion) 186{ 187 SimObject::State so_state; 188 UNSERIALIZE_ENUM(so_state); 189 UNSERIALIZE_ENUM(_status); 190 BaseSimpleCPU::unserialize(cp, section); 191 tickEvent.unserialize(cp, csprintf("%s.tickEvent", section)); 192} 193 194void 195AtomicSimpleCPU::resume() 196{ 197 DPRINTF(SimpleCPU, "Resume\n"); 198 if (_status != SwitchedOut && _status != Idle) { 199 assert(system->getMemoryMode() == Enums::atomic); 200 201 changeState(SimObject::Running); 202 if (thread->status() == ThreadContext::Active) { 203 if (!tickEvent.scheduled()) { 204 tickEvent.schedule(nextCycle()); 205 } 206 } 207 } 208} 209 210void 211AtomicSimpleCPU::switchOut() 212{ 213 assert(status() == Running || status() == Idle); 214 _status = SwitchedOut; 215 216 tickEvent.squash(); 217} 218 219 220void 221AtomicSimpleCPU::takeOverFrom(BaseCPU *oldCPU) 222{ 223 BaseCPU::takeOverFrom(oldCPU, &icachePort, &dcachePort); 224 225 assert(!tickEvent.scheduled()); 226 227 // if any of this CPU's ThreadContexts are active, mark the CPU as 228 // running and schedule its tick event. 229 for (int i = 0; i < threadContexts.size(); ++i) { 230 ThreadContext *tc = threadContexts[i]; 231 if (tc->status() == ThreadContext::Active && _status != Running) { 232 _status = Running; 233 tickEvent.schedule(nextCycle()); 234 break; 235 } 236 } 237 if (_status != Running) { 238 _status = Idle; 239 } 240} 241 242 243void 244AtomicSimpleCPU::activateContext(int thread_num, int delay) 245{ 246 DPRINTF(SimpleCPU, "ActivateContext %d (%d cycles)\n", thread_num, delay); 247 248 assert(thread_num == 0); 249 assert(thread); 250 251 assert(_status == Idle); 252 assert(!tickEvent.scheduled()); 253 254 notIdleFraction++; 255 256 //Make sure ticks are still on multiples of cycles 257 tickEvent.schedule(nextCycle(curTick + ticks(delay))); 258 _status = Running; 259} 260 261 262void 263AtomicSimpleCPU::suspendContext(int thread_num) 264{ 265 DPRINTF(SimpleCPU, "SuspendContext %d\n", thread_num); 266 267 assert(thread_num == 0); 268 assert(thread); 269 270 assert(_status == Running); 271 272 // tick event may not be scheduled if this gets called from inside 273 // an instruction's execution, e.g. "quiesce" 274 if (tickEvent.scheduled()) 275 tickEvent.deschedule(); 276 277 notIdleFraction--; 278 _status = Idle; 279} 280 281 282template <class T> 283Fault 284AtomicSimpleCPU::read(Addr addr, T &data, unsigned flags) 285{ 286 // use the CPU's statically allocated read request and packet objects 287 Request *req = &data_read_req; 288 289 if (traceData) { 290 traceData->setAddr(addr); 291 } 292 293 //The block size of our peer. 294 int blockSize = dcachePort.peerBlockSize(); 295 //The size of the data we're trying to read. 296 int dataSize = sizeof(T); 297 298 uint8_t * dataPtr = (uint8_t *)&data; 299 300 //The address of the second part of this access if it needs to be split 301 //across a cache line boundary. 302 Addr secondAddr = roundDown(addr + dataSize - 1, blockSize); 303 304 if(secondAddr > addr) 305 dataSize = secondAddr - addr; 306 307 dcache_latency = 0; 308 309 while(1) { 310 req->setVirt(0, addr, dataSize, flags, thread->readPC()); 311 312 // translate to physical address 313 Fault fault = thread->translateDataReadReq(req); 314 315 // Now do the access. 316 if (fault == NoFault) { 317 Packet pkt = Packet(req, 318 req->isLocked() ? MemCmd::LoadLockedReq : MemCmd::ReadReq, 319 Packet::Broadcast); 320 pkt.dataStatic(dataPtr); 321 322 if (req->isMmapedIpr()) 323 dcache_latency += TheISA::handleIprRead(thread->getTC(), &pkt); 324 else { 325 if (hasPhysMemPort && pkt.getAddr() == physMemAddr) 326 dcache_latency += physmemPort.sendAtomic(&pkt); 327 else 328 dcache_latency += dcachePort.sendAtomic(&pkt); 329 } 330 dcache_access = true; 331 332 assert(!pkt.isError()); 333 334 if (req->isLocked()) { 335 TheISA::handleLockedRead(thread, req); 336 } 337 } 338 339 // This will need a new way to tell if it has a dcache attached. 340 if (req->isUncacheable()) 341 recordEvent("Uncached Read"); 342 343 //If there's a fault, return it 344 if (fault != NoFault) 345 return fault; 346 //If we don't need to access a second cache line, stop now. 347 if (secondAddr <= addr) 348 { 349 data = gtoh(data); 350 return fault; 351 } 352 353 /* 354 * Set up for accessing the second cache line. 355 */ 356 357 //Move the pointer we're reading into to the correct location. 358 dataPtr += dataSize; 359 //Adjust the size to get the remaining bytes. 360 dataSize = addr + sizeof(T) - secondAddr; 361 //And access the right address. 362 addr = secondAddr; 363 } 364} 365 366#ifndef DOXYGEN_SHOULD_SKIP_THIS 367 368template 369Fault 370AtomicSimpleCPU::read(Addr addr, Twin32_t &data, unsigned flags); 371 372template 373Fault 374AtomicSimpleCPU::read(Addr addr, Twin64_t &data, unsigned flags); 375 376template 377Fault 378AtomicSimpleCPU::read(Addr addr, uint64_t &data, unsigned flags); 379 380template 381Fault 382AtomicSimpleCPU::read(Addr addr, uint32_t &data, unsigned flags); 383 384template 385Fault 386AtomicSimpleCPU::read(Addr addr, uint16_t &data, unsigned flags); 387 388template 389Fault 390AtomicSimpleCPU::read(Addr addr, uint8_t &data, unsigned flags); 391 392#endif //DOXYGEN_SHOULD_SKIP_THIS 393 394template<> 395Fault 396AtomicSimpleCPU::read(Addr addr, double &data, unsigned flags) 397{ 398 return read(addr, *(uint64_t*)&data, flags); 399} 400 401template<> 402Fault 403AtomicSimpleCPU::read(Addr addr, float &data, unsigned flags) 404{ 405 return read(addr, *(uint32_t*)&data, flags); 406} 407 408 409template<> 410Fault 411AtomicSimpleCPU::read(Addr addr, int32_t &data, unsigned flags) 412{ 413 return read(addr, (uint32_t&)data, flags); 414} 415 416 417template <class T> 418Fault 419AtomicSimpleCPU::write(T data, Addr addr, unsigned flags, uint64_t *res) 420{ 421 // use the CPU's statically allocated write request and packet objects 422 Request *req = &data_write_req; 423 424 if (traceData) { 425 traceData->setAddr(addr); 426 } 427 428 //The block size of our peer. 429 int blockSize = dcachePort.peerBlockSize(); 430 //The size of the data we're trying to read. 431 int dataSize = sizeof(T); 432 433 uint8_t * dataPtr = (uint8_t *)&data; 434 435 //The address of the second part of this access if it needs to be split 436 //across a cache line boundary. 437 Addr secondAddr = roundDown(addr + dataSize - 1, blockSize); 438 439 if(secondAddr > addr) 440 dataSize = secondAddr - addr; 441 442 dcache_latency = 0; 443 444 while(1) { 445 req->setVirt(0, addr, dataSize, flags, thread->readPC()); 446 447 // translate to physical address 448 Fault fault = thread->translateDataWriteReq(req); 449 450 // Now do the access. 451 if (fault == NoFault) { 452 MemCmd cmd = MemCmd::WriteReq; // default 453 bool do_access = true; // flag to suppress cache access 454 455 if (req->isLocked()) { 456 cmd = MemCmd::StoreCondReq; 457 do_access = TheISA::handleLockedWrite(thread, req); 458 } else if (req->isSwap()) { 459 cmd = MemCmd::SwapReq; 460 if (req->isCondSwap()) { 461 assert(res); 462 req->setExtraData(*res); 463 } 464 } 465 466 if (do_access) { 467 Packet pkt = Packet(req, cmd, Packet::Broadcast); 468 pkt.dataStatic(dataPtr); 469 470 if (req->isMmapedIpr()) { 471 dcache_latency += 472 TheISA::handleIprWrite(thread->getTC(), &pkt); 473 } else { 474 //XXX This needs to be outside of the loop in order to 475 //work properly for cache line boundary crossing 476 //accesses in transendian simulations. 477 data = htog(data); 478 if (hasPhysMemPort && pkt.getAddr() == physMemAddr) 479 dcache_latency += physmemPort.sendAtomic(&pkt); 480 else 481 dcache_latency += dcachePort.sendAtomic(&pkt); 482 } 483 dcache_access = true; 484 assert(!pkt.isError()); 485 486 if (req->isSwap()) { 487 assert(res); 488 *res = pkt.get<T>(); 489 } 490 } 491 492 if (res && !req->isSwap()) { 493 *res = req->getExtraData(); 494 } 495 } 496 497 // This will need a new way to tell if it's hooked up to a cache or not. 498 if (req->isUncacheable()) 499 recordEvent("Uncached Write"); 500 501 //If there's a fault or we don't need to access a second cache line, 502 //stop now. 503 if (fault != NoFault || secondAddr <= addr) 504 { 505 // If the write needs to have a fault on the access, consider 506 // calling changeStatus() and changing it to "bad addr write" 507 // or something. 508 return fault; 509 } 510 511 /* 512 * Set up for accessing the second cache line. 513 */ 514 515 //Move the pointer we're reading into to the correct location. 516 dataPtr += dataSize; 517 //Adjust the size to get the remaining bytes. 518 dataSize = addr + sizeof(T) - secondAddr; 519 //And access the right address. 520 addr = secondAddr; 521 } 522} 523 524 525#ifndef DOXYGEN_SHOULD_SKIP_THIS 526 527template 528Fault 529AtomicSimpleCPU::write(Twin32_t data, Addr addr, 530 unsigned flags, uint64_t *res); 531 532template 533Fault 534AtomicSimpleCPU::write(Twin64_t data, Addr addr, 535 unsigned flags, uint64_t *res); 536 537template 538Fault 539AtomicSimpleCPU::write(uint64_t data, Addr addr, 540 unsigned flags, uint64_t *res); 541 542template 543Fault 544AtomicSimpleCPU::write(uint32_t data, Addr addr, 545 unsigned flags, uint64_t *res); 546 547template 548Fault 549AtomicSimpleCPU::write(uint16_t data, Addr addr, 550 unsigned flags, uint64_t *res); 551 552template 553Fault 554AtomicSimpleCPU::write(uint8_t data, Addr addr, 555 unsigned flags, uint64_t *res); 556 557#endif //DOXYGEN_SHOULD_SKIP_THIS 558 559template<> 560Fault 561AtomicSimpleCPU::write(double data, Addr addr, unsigned flags, uint64_t *res) 562{ 563 return write(*(uint64_t*)&data, addr, flags, res); 564} 565 566template<> 567Fault 568AtomicSimpleCPU::write(float data, Addr addr, unsigned flags, uint64_t *res) 569{ 570 return write(*(uint32_t*)&data, addr, flags, res); 571} 572 573 574template<> 575Fault 576AtomicSimpleCPU::write(int32_t data, Addr addr, unsigned flags, uint64_t *res) 577{ 578 return write((uint32_t)data, addr, flags, res); 579} 580 581 582void 583AtomicSimpleCPU::tick() 584{ 585 DPRINTF(SimpleCPU, "Tick\n"); 586 587 Tick latency = ticks(1); // instruction takes one cycle by default 588 589 for (int i = 0; i < width; ++i) { 590 numCycles++; 591 592 if (!curStaticInst || !curStaticInst->isDelayedCommit()) 593 checkForInterrupts(); 594 595 Fault fault = setupFetchRequest(&ifetch_req); 596 597 if (fault == NoFault) { 598 Tick icache_latency = 0; 599 bool icache_access = false; 600 dcache_access = false; // assume no dcache access 601 602 //Fetch more instruction memory if necessary 603 //if(predecoder.needMoreBytes()) 604 //{ 605 icache_access = true; 606 Packet ifetch_pkt = Packet(&ifetch_req, MemCmd::ReadReq, 607 Packet::Broadcast); 608 ifetch_pkt.dataStatic(&inst); 609 610 if (hasPhysMemPort && ifetch_pkt.getAddr() == physMemAddr) 611 icache_latency = physmemPort.sendAtomic(&ifetch_pkt); 612 else 613 icache_latency = icachePort.sendAtomic(&ifetch_pkt); 614 615 assert(!ifetch_pkt.isError()); 616 617 // ifetch_req is initialized to read the instruction directly 618 // into the CPU object's inst field. 619 //} 620 621 preExecute(); 622 623 if (curStaticInst) { 624 fault = curStaticInst->execute(this, traceData); 625 626 // keep an instruction count 627 if (fault == NoFault) 628 countInst(); 629 else if (traceData) { 630 // If there was a fault, we should trace this instruction. 631 delete traceData; 632 traceData = NULL; 633 } 634 635 postExecute(); 636 } 637 638 // @todo remove me after debugging with legion done 639 if (curStaticInst && (!curStaticInst->isMicroop() || 640 curStaticInst->isFirstMicroop())) 641 instCnt++; 642 643 if (simulate_stalls) { 644 Tick icache_stall = 645 icache_access ? icache_latency - ticks(1) : 0; 646 Tick dcache_stall = 647 dcache_access ? dcache_latency - ticks(1) : 0; 648 Tick stall_cycles = (icache_stall + dcache_stall) / ticks(1); 649 if (ticks(stall_cycles) < (icache_stall + dcache_stall)) 650 latency += ticks(stall_cycles+1); 651 else 652 latency += ticks(stall_cycles); 653 } 654 655 } 656 if(fault != NoFault || !stayAtPC) 657 advancePC(fault); 658 } 659 660 if (_status != Idle) 661 tickEvent.schedule(curTick + latency); 662} 663 664 665//////////////////////////////////////////////////////////////////////// 666// 667// AtomicSimpleCPU Simulation Object 668// 669AtomicSimpleCPU * 670AtomicSimpleCPUParams::create() 671{ 672 AtomicSimpleCPU::Params *params = new AtomicSimpleCPU::Params(); 673 params->name = name; 674 params->numberOfThreads = 1; 675 params->max_insts_any_thread = max_insts_any_thread; 676 params->max_insts_all_threads = max_insts_all_threads; 677 params->max_loads_any_thread = max_loads_any_thread; 678 params->max_loads_all_threads = max_loads_all_threads; 679 params->progress_interval = progress_interval; 680 params->deferRegistration = defer_registration; 681 params->phase = phase; 682 params->clock = clock; 683 params->functionTrace = function_trace; 684 params->functionTraceStart = function_trace_start; 685 params->width = width; 686 params->simulate_stalls = simulate_stalls; 687 params->system = system; 688 params->cpu_id = cpu_id; 689 params->tracer = tracer; 690 691 params->itb = itb; 692 params->dtb = dtb; 693#if FULL_SYSTEM 694 params->profile = profile; 695 params->do_quiesce = do_quiesce; 696 params->do_checkpoint_insts = do_checkpoint_insts; 697 params->do_statistics_insts = do_statistics_insts; 698#else 699 if (workload.size() != 1) 700 panic("only one workload allowed"); 701 params->process = workload[0]; 702#endif 703 704 AtomicSimpleCPU *cpu = new AtomicSimpleCPU(params); 705 return cpu; 706} 707