atomic.cc revision 6076
1/* 2 * Copyright (c) 2002-2005 The Regents of The University of Michigan 3 * All rights reserved. 4 * 5 * Redistribution and use in source and binary forms, with or without 6 * modification, are permitted provided that the following conditions are 7 * met: redistributions of source code must retain the above copyright 8 * notice, this list of conditions and the following disclaimer; 9 * redistributions in binary form must reproduce the above copyright 10 * notice, this list of conditions and the following disclaimer in the 11 * documentation and/or other materials provided with the distribution; 12 * neither the name of the copyright holders nor the names of its 13 * contributors may be used to endorse or promote products derived from 14 * this software without specific prior written permission. 15 * 16 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 17 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 18 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 19 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 20 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 21 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 22 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 23 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 24 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 25 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 26 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 27 * 28 * Authors: Steve Reinhardt 29 */ 30 31#include "arch/locked_mem.hh" 32#include "arch/mmaped_ipr.hh" 33#include "arch/utility.hh" 34#include "base/bigint.hh" 35#include "cpu/exetrace.hh" 36#include "cpu/simple/atomic.hh" 37#include "mem/packet.hh" 38#include "mem/packet_access.hh" 39#include "params/AtomicSimpleCPU.hh" 40#include "sim/system.hh" 41 42using namespace std; 43using namespace TheISA; 44 45AtomicSimpleCPU::TickEvent::TickEvent(AtomicSimpleCPU *c) 46 : Event(CPU_Tick_Pri), cpu(c) 47{ 48} 49 50 51void 52AtomicSimpleCPU::TickEvent::process() 53{ 54 cpu->tick(); 55} 56 57const char * 58AtomicSimpleCPU::TickEvent::description() const 59{ 60 return "AtomicSimpleCPU tick"; 61} 62 63Port * 64AtomicSimpleCPU::getPort(const std::string &if_name, int idx) 65{ 66 if (if_name == "dcache_port") 67 return &dcachePort; 68 else if (if_name == "icache_port") 69 return &icachePort; 70 else if (if_name == "physmem_port") { 71 hasPhysMemPort = true; 72 return &physmemPort; 73 } 74 else 75 panic("No Such Port\n"); 76} 77 78void 79AtomicSimpleCPU::init() 80{ 81 BaseCPU::init(); 82#if FULL_SYSTEM 83 for (int i = 0; i < threadContexts.size(); ++i) { 84 ThreadContext *tc = threadContexts[i]; 85 86 // initialize CPU, including PC 87 TheISA::initCPU(tc, tc->contextId()); 88 } 89#endif 90 if (hasPhysMemPort) { 91 bool snoop = false; 92 AddrRangeList pmAddrList; 93 physmemPort.getPeerAddressRanges(pmAddrList, snoop); 94 physMemAddr = *pmAddrList.begin(); 95 } 96 // Atomic doesn't do MT right now, so contextId == threadId 97 ifetch_req.setThreadContext(_cpuId, 0); // Add thread ID if we add MT 98 data_read_req.setThreadContext(_cpuId, 0); // Add thread ID here too 99 data_write_req.setThreadContext(_cpuId, 0); // Add thread ID here too 100} 101 102bool 103AtomicSimpleCPU::CpuPort::recvTiming(PacketPtr pkt) 104{ 105 panic("AtomicSimpleCPU doesn't expect recvTiming callback!"); 106 return true; 107} 108 109Tick 110AtomicSimpleCPU::CpuPort::recvAtomic(PacketPtr pkt) 111{ 112 //Snooping a coherence request, just return 113 return 0; 114} 115 116void 117AtomicSimpleCPU::CpuPort::recvFunctional(PacketPtr pkt) 118{ 119 //No internal storage to update, just return 120 return; 121} 122 123void 124AtomicSimpleCPU::CpuPort::recvStatusChange(Status status) 125{ 126 if (status == RangeChange) { 127 if (!snoopRangeSent) { 128 snoopRangeSent = true; 129 sendStatusChange(Port::RangeChange); 130 } 131 return; 132 } 133 134 panic("AtomicSimpleCPU doesn't expect recvStatusChange callback!"); 135} 136 137void 138AtomicSimpleCPU::CpuPort::recvRetry() 139{ 140 panic("AtomicSimpleCPU doesn't expect recvRetry callback!"); 141} 142 143void 144AtomicSimpleCPU::DcachePort::setPeer(Port *port) 145{ 146 Port::setPeer(port); 147 148#if FULL_SYSTEM 149 // Update the ThreadContext's memory ports (Functional/Virtual 150 // Ports) 151 cpu->tcBase()->connectMemPorts(cpu->tcBase()); 152#endif 153} 154 155AtomicSimpleCPU::AtomicSimpleCPU(AtomicSimpleCPUParams *p) 156 : BaseSimpleCPU(p), tickEvent(this), width(p->width), 157 simulate_data_stalls(p->simulate_data_stalls), 158 simulate_inst_stalls(p->simulate_inst_stalls), 159 icachePort(name() + "-iport", this), dcachePort(name() + "-iport", this), 160 physmemPort(name() + "-iport", this), hasPhysMemPort(false) 161{ 162 _status = Idle; 163 164 icachePort.snoopRangeSent = false; 165 dcachePort.snoopRangeSent = false; 166 167} 168 169 170AtomicSimpleCPU::~AtomicSimpleCPU() 171{ 172} 173 174void 175AtomicSimpleCPU::serialize(ostream &os) 176{ 177 SimObject::State so_state = SimObject::getState(); 178 SERIALIZE_ENUM(so_state); 179 BaseSimpleCPU::serialize(os); 180 nameOut(os, csprintf("%s.tickEvent", name())); 181 tickEvent.serialize(os); 182} 183 184void 185AtomicSimpleCPU::unserialize(Checkpoint *cp, const string §ion) 186{ 187 SimObject::State so_state; 188 UNSERIALIZE_ENUM(so_state); 189 BaseSimpleCPU::unserialize(cp, section); 190 tickEvent.unserialize(cp, csprintf("%s.tickEvent", section)); 191} 192 193void 194AtomicSimpleCPU::resume() 195{ 196 if (_status == Idle || _status == SwitchedOut) 197 return; 198 199 DPRINTF(SimpleCPU, "Resume\n"); 200 assert(system->getMemoryMode() == Enums::atomic); 201 202 changeState(SimObject::Running); 203 if (thread->status() == ThreadContext::Active) { 204 if (!tickEvent.scheduled()) 205 schedule(tickEvent, nextCycle()); 206 } 207} 208 209void 210AtomicSimpleCPU::switchOut() 211{ 212 assert(_status == Running || _status == Idle); 213 _status = SwitchedOut; 214 215 tickEvent.squash(); 216} 217 218 219void 220AtomicSimpleCPU::takeOverFrom(BaseCPU *oldCPU) 221{ 222 BaseCPU::takeOverFrom(oldCPU, &icachePort, &dcachePort); 223 224 assert(!tickEvent.scheduled()); 225 226 // if any of this CPU's ThreadContexts are active, mark the CPU as 227 // running and schedule its tick event. 228 for (int i = 0; i < threadContexts.size(); ++i) { 229 ThreadContext *tc = threadContexts[i]; 230 if (tc->status() == ThreadContext::Active && _status != Running) { 231 _status = Running; 232 schedule(tickEvent, nextCycle()); 233 break; 234 } 235 } 236 if (_status != Running) { 237 _status = Idle; 238 } 239 assert(threadContexts.size() == 1); 240 ifetch_req.setThreadContext(_cpuId, 0); // Add thread ID if we add MT 241 data_read_req.setThreadContext(_cpuId, 0); // Add thread ID here too 242 data_write_req.setThreadContext(_cpuId, 0); // Add thread ID here too 243} 244 245 246void 247AtomicSimpleCPU::activateContext(int thread_num, int delay) 248{ 249 DPRINTF(SimpleCPU, "ActivateContext %d (%d cycles)\n", thread_num, delay); 250 251 assert(thread_num == 0); 252 assert(thread); 253 254 assert(_status == Idle); 255 assert(!tickEvent.scheduled()); 256 257 notIdleFraction++; 258 numCycles += tickToCycles(thread->lastActivate - thread->lastSuspend); 259 260 //Make sure ticks are still on multiples of cycles 261 schedule(tickEvent, nextCycle(curTick + ticks(delay))); 262 _status = Running; 263} 264 265 266void 267AtomicSimpleCPU::suspendContext(int thread_num) 268{ 269 DPRINTF(SimpleCPU, "SuspendContext %d\n", thread_num); 270 271 assert(thread_num == 0); 272 assert(thread); 273 274 if (_status == Idle) 275 return; 276 277 assert(_status == Running); 278 279 // tick event may not be scheduled if this gets called from inside 280 // an instruction's execution, e.g. "quiesce" 281 if (tickEvent.scheduled()) 282 deschedule(tickEvent); 283 284 notIdleFraction--; 285 _status = Idle; 286} 287 288 289template <class T> 290Fault 291AtomicSimpleCPU::read(Addr addr, T &data, unsigned flags) 292{ 293 // use the CPU's statically allocated read request and packet objects 294 Request *req = &data_read_req; 295 296 if (traceData) { 297 traceData->setAddr(addr); 298 } 299 300 //The block size of our peer. 301 int blockSize = dcachePort.peerBlockSize(); 302 //The size of the data we're trying to read. 303 int dataSize = sizeof(T); 304 305 uint8_t * dataPtr = (uint8_t *)&data; 306 307 //The address of the second part of this access if it needs to be split 308 //across a cache line boundary. 309 Addr secondAddr = roundDown(addr + dataSize - 1, blockSize); 310 311 if(secondAddr > addr) 312 dataSize = secondAddr - addr; 313 314 dcache_latency = 0; 315 316 while(1) { 317 req->setVirt(0, addr, dataSize, flags, thread->readPC()); 318 319 // translate to physical address 320 Fault fault = thread->dtb->translateAtomic(req, tc, BaseTLB::Read); 321 322 // Now do the access. 323 if (fault == NoFault) { 324 Packet pkt = Packet(req, 325 req->isLlsc() ? MemCmd::LoadLockedReq : MemCmd::ReadReq, 326 Packet::Broadcast); 327 pkt.dataStatic(dataPtr); 328 329 if (req->isMmapedIpr()) 330 dcache_latency += TheISA::handleIprRead(thread->getTC(), &pkt); 331 else { 332 if (hasPhysMemPort && pkt.getAddr() == physMemAddr) 333 dcache_latency += physmemPort.sendAtomic(&pkt); 334 else 335 dcache_latency += dcachePort.sendAtomic(&pkt); 336 } 337 dcache_access = true; 338 339 assert(!pkt.isError()); 340 341 if (req->isLlsc()) { 342 TheISA::handleLockedRead(thread, req); 343 } 344 } 345 346 // This will need a new way to tell if it has a dcache attached. 347 if (req->isUncacheable()) 348 recordEvent("Uncached Read"); 349 350 //If there's a fault, return it 351 if (fault != NoFault) 352 return fault; 353 //If we don't need to access a second cache line, stop now. 354 if (secondAddr <= addr) 355 { 356 data = gtoh(data); 357 if (traceData) { 358 traceData->setData(data); 359 } 360 return fault; 361 } 362 363 /* 364 * Set up for accessing the second cache line. 365 */ 366 367 //Move the pointer we're reading into to the correct location. 368 dataPtr += dataSize; 369 //Adjust the size to get the remaining bytes. 370 dataSize = addr + sizeof(T) - secondAddr; 371 //And access the right address. 372 addr = secondAddr; 373 } 374} 375 376#ifndef DOXYGEN_SHOULD_SKIP_THIS 377 378template 379Fault 380AtomicSimpleCPU::read(Addr addr, Twin32_t &data, unsigned flags); 381 382template 383Fault 384AtomicSimpleCPU::read(Addr addr, Twin64_t &data, unsigned flags); 385 386template 387Fault 388AtomicSimpleCPU::read(Addr addr, uint64_t &data, unsigned flags); 389 390template 391Fault 392AtomicSimpleCPU::read(Addr addr, uint32_t &data, unsigned flags); 393 394template 395Fault 396AtomicSimpleCPU::read(Addr addr, uint16_t &data, unsigned flags); 397 398template 399Fault 400AtomicSimpleCPU::read(Addr addr, uint8_t &data, unsigned flags); 401 402#endif //DOXYGEN_SHOULD_SKIP_THIS 403 404template<> 405Fault 406AtomicSimpleCPU::read(Addr addr, double &data, unsigned flags) 407{ 408 return read(addr, *(uint64_t*)&data, flags); 409} 410 411template<> 412Fault 413AtomicSimpleCPU::read(Addr addr, float &data, unsigned flags) 414{ 415 return read(addr, *(uint32_t*)&data, flags); 416} 417 418 419template<> 420Fault 421AtomicSimpleCPU::read(Addr addr, int32_t &data, unsigned flags) 422{ 423 return read(addr, (uint32_t&)data, flags); 424} 425 426 427template <class T> 428Fault 429AtomicSimpleCPU::write(T data, Addr addr, unsigned flags, uint64_t *res) 430{ 431 // use the CPU's statically allocated write request and packet objects 432 Request *req = &data_write_req; 433 434 if (traceData) { 435 traceData->setAddr(addr); 436 } 437 438 //The block size of our peer. 439 int blockSize = dcachePort.peerBlockSize(); 440 //The size of the data we're trying to read. 441 int dataSize = sizeof(T); 442 443 uint8_t * dataPtr = (uint8_t *)&data; 444 445 //The address of the second part of this access if it needs to be split 446 //across a cache line boundary. 447 Addr secondAddr = roundDown(addr + dataSize - 1, blockSize); 448 449 if(secondAddr > addr) 450 dataSize = secondAddr - addr; 451 452 dcache_latency = 0; 453 454 while(1) { 455 req->setVirt(0, addr, dataSize, flags, thread->readPC()); 456 457 // translate to physical address 458 Fault fault = thread->dtb->translateAtomic(req, tc, BaseTLB::Write); 459 460 // Now do the access. 461 if (fault == NoFault) { 462 MemCmd cmd = MemCmd::WriteReq; // default 463 bool do_access = true; // flag to suppress cache access 464 465 if (req->isLlsc()) { 466 cmd = MemCmd::StoreCondReq; 467 do_access = TheISA::handleLockedWrite(thread, req); 468 } else if (req->isSwap()) { 469 cmd = MemCmd::SwapReq; 470 if (req->isCondSwap()) { 471 assert(res); 472 req->setExtraData(*res); 473 } 474 } 475 476 if (do_access) { 477 Packet pkt = Packet(req, cmd, Packet::Broadcast); 478 pkt.dataStatic(dataPtr); 479 480 if (req->isMmapedIpr()) { 481 dcache_latency += 482 TheISA::handleIprWrite(thread->getTC(), &pkt); 483 } else { 484 //XXX This needs to be outside of the loop in order to 485 //work properly for cache line boundary crossing 486 //accesses in transendian simulations. 487 data = htog(data); 488 if (hasPhysMemPort && pkt.getAddr() == physMemAddr) 489 dcache_latency += physmemPort.sendAtomic(&pkt); 490 else 491 dcache_latency += dcachePort.sendAtomic(&pkt); 492 } 493 dcache_access = true; 494 assert(!pkt.isError()); 495 496 if (req->isSwap()) { 497 assert(res); 498 *res = pkt.get<T>(); 499 } 500 } 501 502 if (res && !req->isSwap()) { 503 *res = req->getExtraData(); 504 } 505 } 506 507 // This will need a new way to tell if it's hooked up to a cache or not. 508 if (req->isUncacheable()) 509 recordEvent("Uncached Write"); 510 511 //If there's a fault or we don't need to access a second cache line, 512 //stop now. 513 if (fault != NoFault || secondAddr <= addr) 514 { 515 // If the write needs to have a fault on the access, consider 516 // calling changeStatus() and changing it to "bad addr write" 517 // or something. 518 if (traceData) { 519 traceData->setData(gtoh(data)); 520 } 521 return fault; 522 } 523 524 /* 525 * Set up for accessing the second cache line. 526 */ 527 528 //Move the pointer we're reading into to the correct location. 529 dataPtr += dataSize; 530 //Adjust the size to get the remaining bytes. 531 dataSize = addr + sizeof(T) - secondAddr; 532 //And access the right address. 533 addr = secondAddr; 534 } 535} 536 537 538#ifndef DOXYGEN_SHOULD_SKIP_THIS 539 540template 541Fault 542AtomicSimpleCPU::write(Twin32_t data, Addr addr, 543 unsigned flags, uint64_t *res); 544 545template 546Fault 547AtomicSimpleCPU::write(Twin64_t data, Addr addr, 548 unsigned flags, uint64_t *res); 549 550template 551Fault 552AtomicSimpleCPU::write(uint64_t data, Addr addr, 553 unsigned flags, uint64_t *res); 554 555template 556Fault 557AtomicSimpleCPU::write(uint32_t data, Addr addr, 558 unsigned flags, uint64_t *res); 559 560template 561Fault 562AtomicSimpleCPU::write(uint16_t data, Addr addr, 563 unsigned flags, uint64_t *res); 564 565template 566Fault 567AtomicSimpleCPU::write(uint8_t data, Addr addr, 568 unsigned flags, uint64_t *res); 569 570#endif //DOXYGEN_SHOULD_SKIP_THIS 571 572template<> 573Fault 574AtomicSimpleCPU::write(double data, Addr addr, unsigned flags, uint64_t *res) 575{ 576 return write(*(uint64_t*)&data, addr, flags, res); 577} 578 579template<> 580Fault 581AtomicSimpleCPU::write(float data, Addr addr, unsigned flags, uint64_t *res) 582{ 583 return write(*(uint32_t*)&data, addr, flags, res); 584} 585 586 587template<> 588Fault 589AtomicSimpleCPU::write(int32_t data, Addr addr, unsigned flags, uint64_t *res) 590{ 591 return write((uint32_t)data, addr, flags, res); 592} 593 594 595void 596AtomicSimpleCPU::tick() 597{ 598 DPRINTF(SimpleCPU, "Tick\n"); 599 600 Tick latency = 0; 601 602 for (int i = 0; i < width; ++i) { 603 numCycles++; 604 605 if (!curStaticInst || !curStaticInst->isDelayedCommit()) 606 checkForInterrupts(); 607 608 checkPcEventQueue(); 609 610 Fault fault = NoFault; 611 612 bool fromRom = isRomMicroPC(thread->readMicroPC()); 613 if (!fromRom && !curMacroStaticInst) { 614 setupFetchRequest(&ifetch_req); 615 fault = thread->itb->translateAtomic(&ifetch_req, tc, 616 BaseTLB::Execute); 617 } 618 619 if (fault == NoFault) { 620 Tick icache_latency = 0; 621 bool icache_access = false; 622 dcache_access = false; // assume no dcache access 623 624 if (!fromRom && !curMacroStaticInst) { 625 // This is commented out because the predecoder would act like 626 // a tiny cache otherwise. It wouldn't be flushed when needed 627 // like the I cache. It should be flushed, and when that works 628 // this code should be uncommented. 629 //Fetch more instruction memory if necessary 630 //if(predecoder.needMoreBytes()) 631 //{ 632 icache_access = true; 633 Packet ifetch_pkt = Packet(&ifetch_req, MemCmd::ReadReq, 634 Packet::Broadcast); 635 ifetch_pkt.dataStatic(&inst); 636 637 if (hasPhysMemPort && ifetch_pkt.getAddr() == physMemAddr) 638 icache_latency = physmemPort.sendAtomic(&ifetch_pkt); 639 else 640 icache_latency = icachePort.sendAtomic(&ifetch_pkt); 641 642 assert(!ifetch_pkt.isError()); 643 644 // ifetch_req is initialized to read the instruction directly 645 // into the CPU object's inst field. 646 //} 647 } 648 649 preExecute(); 650 651 if (curStaticInst) { 652 fault = curStaticInst->execute(this, traceData); 653 654 // keep an instruction count 655 if (fault == NoFault) 656 countInst(); 657 else if (traceData) { 658 // If there was a fault, we should trace this instruction. 659 delete traceData; 660 traceData = NULL; 661 } 662 663 postExecute(); 664 } 665 666 // @todo remove me after debugging with legion done 667 if (curStaticInst && (!curStaticInst->isMicroop() || 668 curStaticInst->isFirstMicroop())) 669 instCnt++; 670 671 Tick stall_ticks = 0; 672 if (simulate_inst_stalls && icache_access) 673 stall_ticks += icache_latency; 674 675 if (simulate_data_stalls && dcache_access) 676 stall_ticks += dcache_latency; 677 678 if (stall_ticks) { 679 Tick stall_cycles = stall_ticks / ticks(1); 680 Tick aligned_stall_ticks = ticks(stall_cycles); 681 682 if (aligned_stall_ticks < stall_ticks) 683 aligned_stall_ticks += 1; 684 685 latency += aligned_stall_ticks; 686 } 687 688 } 689 if(fault != NoFault || !stayAtPC) 690 advancePC(fault); 691 } 692 693 // instruction takes at least one cycle 694 if (latency < ticks(1)) 695 latency = ticks(1); 696 697 if (_status != Idle) 698 schedule(tickEvent, curTick + latency); 699} 700 701 702void 703AtomicSimpleCPU::printAddr(Addr a) 704{ 705 dcachePort.printAddr(a); 706} 707 708 709//////////////////////////////////////////////////////////////////////// 710// 711// AtomicSimpleCPU Simulation Object 712// 713AtomicSimpleCPU * 714AtomicSimpleCPUParams::create() 715{ 716 numThreads = 1; 717#if !FULL_SYSTEM 718 if (workload.size() != 1) 719 panic("only one workload allowed"); 720#endif 721 return new AtomicSimpleCPU(this); 722} 723