atomic.cc revision 4182
1/* 2 * Copyright (c) 2002-2005 The Regents of The University of Michigan 3 * All rights reserved. 4 * 5 * Redistribution and use in source and binary forms, with or without 6 * modification, are permitted provided that the following conditions are 7 * met: redistributions of source code must retain the above copyright 8 * notice, this list of conditions and the following disclaimer; 9 * redistributions in binary form must reproduce the above copyright 10 * notice, this list of conditions and the following disclaimer in the 11 * documentation and/or other materials provided with the distribution; 12 * neither the name of the copyright holders nor the names of its 13 * contributors may be used to endorse or promote products derived from 14 * this software without specific prior written permission. 15 * 16 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 17 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 18 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 19 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 20 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 21 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 22 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 23 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 24 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 25 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 26 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 27 * 28 * Authors: Steve Reinhardt 29 */ 30 31#include "arch/locked_mem.hh" 32#include "arch/mmaped_ipr.hh" 33#include "arch/utility.hh" 34#include "base/bigint.hh" 35#include "cpu/exetrace.hh" 36#include "cpu/simple/atomic.hh" 37#include "mem/packet.hh" 38#include "mem/packet_access.hh" 39#include "sim/builder.hh" 40#include "sim/system.hh" 41 42using namespace std; 43using namespace TheISA; 44 45AtomicSimpleCPU::TickEvent::TickEvent(AtomicSimpleCPU *c) 46 : Event(&mainEventQueue, CPU_Tick_Pri), cpu(c) 47{ 48} 49 50 51void 52AtomicSimpleCPU::TickEvent::process() 53{ 54 cpu->tick(); 55} 56 57const char * 58AtomicSimpleCPU::TickEvent::description() 59{ 60 return "AtomicSimpleCPU tick event"; 61} 62 63Port * 64AtomicSimpleCPU::getPort(const std::string &if_name, int idx) 65{ 66 if (if_name == "dcache_port") 67 return &dcachePort; 68 else if (if_name == "icache_port") 69 return &icachePort; 70 else 71 panic("No Such Port\n"); 72} 73 74void 75AtomicSimpleCPU::init() 76{ 77 BaseCPU::init(); 78#if FULL_SYSTEM 79 for (int i = 0; i < threadContexts.size(); ++i) { 80 ThreadContext *tc = threadContexts[i]; 81 82 // initialize CPU, including PC 83 TheISA::initCPU(tc, tc->readCpuId()); 84 } 85#endif 86} 87 88bool 89AtomicSimpleCPU::CpuPort::recvTiming(PacketPtr pkt) 90{ 91 panic("AtomicSimpleCPU doesn't expect recvTiming callback!"); 92 return true; 93} 94 95Tick 96AtomicSimpleCPU::CpuPort::recvAtomic(PacketPtr pkt) 97{ 98 //Snooping a coherence request, just return 99 return 0; 100} 101 102void 103AtomicSimpleCPU::CpuPort::recvFunctional(PacketPtr pkt) 104{ 105 //No internal storage to update, just return 106 return; 107} 108 109void 110AtomicSimpleCPU::CpuPort::recvStatusChange(Status status) 111{ 112 if (status == RangeChange) { 113 if (!snoopRangeSent) { 114 snoopRangeSent = true; 115 sendStatusChange(Port::RangeChange); 116 } 117 return; 118 } 119 120 panic("AtomicSimpleCPU doesn't expect recvStatusChange callback!"); 121} 122 123void 124AtomicSimpleCPU::CpuPort::recvRetry() 125{ 126 panic("AtomicSimpleCPU doesn't expect recvRetry callback!"); 127} 128 129 130AtomicSimpleCPU::AtomicSimpleCPU(Params *p) 131 : BaseSimpleCPU(p), tickEvent(this), 132 width(p->width), simulate_stalls(p->simulate_stalls), 133 icachePort(name() + "-iport", this), dcachePort(name() + "-iport", this) 134{ 135 _status = Idle; 136 137 icachePort.snoopRangeSent = false; 138 dcachePort.snoopRangeSent = false; 139 140 ifetch_req = new Request(); 141 ifetch_req->setThreadContext(p->cpu_id, 0); // Add thread ID if we add MT 142 ifetch_pkt = new Packet(ifetch_req, MemCmd::ReadReq, Packet::Broadcast); 143 ifetch_pkt->dataStatic(&inst); 144 145 data_read_req = new Request(); 146 data_read_req->setThreadContext(p->cpu_id, 0); // Add thread ID here too 147 data_read_pkt = new Packet(data_read_req, MemCmd::ReadReq, 148 Packet::Broadcast); 149 data_read_pkt->dataStatic(&dataReg); 150 151 data_write_req = new Request(); 152 data_write_req->setThreadContext(p->cpu_id, 0); // Add thread ID here too 153 data_write_pkt = new Packet(data_write_req, MemCmd::WriteReq, 154 Packet::Broadcast); 155 data_swap_pkt = new Packet(data_write_req, MemCmd::SwapReq, 156 Packet::Broadcast); 157} 158 159 160AtomicSimpleCPU::~AtomicSimpleCPU() 161{ 162} 163 164void 165AtomicSimpleCPU::serialize(ostream &os) 166{ 167 SimObject::State so_state = SimObject::getState(); 168 SERIALIZE_ENUM(so_state); 169 Status _status = status(); 170 SERIALIZE_ENUM(_status); 171 BaseSimpleCPU::serialize(os); 172 nameOut(os, csprintf("%s.tickEvent", name())); 173 tickEvent.serialize(os); 174} 175 176void 177AtomicSimpleCPU::unserialize(Checkpoint *cp, const string §ion) 178{ 179 SimObject::State so_state; 180 UNSERIALIZE_ENUM(so_state); 181 UNSERIALIZE_ENUM(_status); 182 BaseSimpleCPU::unserialize(cp, section); 183 tickEvent.unserialize(cp, csprintf("%s.tickEvent", section)); 184} 185 186void 187AtomicSimpleCPU::resume() 188{ 189 if (_status != SwitchedOut && _status != Idle) { 190 assert(system->getMemoryMode() == System::Atomic); 191 192 changeState(SimObject::Running); 193 if (thread->status() == ThreadContext::Active) { 194 if (!tickEvent.scheduled()) { 195 tickEvent.schedule(nextCycle()); 196 } 197 } 198 } 199} 200 201void 202AtomicSimpleCPU::switchOut() 203{ 204 assert(status() == Running || status() == Idle); 205 _status = SwitchedOut; 206 207 tickEvent.squash(); 208} 209 210 211void 212AtomicSimpleCPU::takeOverFrom(BaseCPU *oldCPU) 213{ 214 BaseCPU::takeOverFrom(oldCPU); 215 216 assert(!tickEvent.scheduled()); 217 218 // if any of this CPU's ThreadContexts are active, mark the CPU as 219 // running and schedule its tick event. 220 for (int i = 0; i < threadContexts.size(); ++i) { 221 ThreadContext *tc = threadContexts[i]; 222 if (tc->status() == ThreadContext::Active && _status != Running) { 223 _status = Running; 224 tickEvent.schedule(nextCycle()); 225 break; 226 } 227 } 228 if (_status != Running) { 229 _status = Idle; 230 } 231} 232 233 234void 235AtomicSimpleCPU::activateContext(int thread_num, int delay) 236{ 237 assert(thread_num == 0); 238 assert(thread); 239 240 assert(_status == Idle); 241 assert(!tickEvent.scheduled()); 242 243 notIdleFraction++; 244 245#if FULL_SYSTEM 246 // Connect the ThreadContext's memory ports (Functional/Virtual 247 // Ports) 248 tc->connectMemPorts(); 249#endif 250 251 //Make sure ticks are still on multiples of cycles 252 tickEvent.schedule(nextCycle(curTick + cycles(delay))); 253 _status = Running; 254} 255 256 257void 258AtomicSimpleCPU::suspendContext(int thread_num) 259{ 260 assert(thread_num == 0); 261 assert(thread); 262 263 assert(_status == Running); 264 265 // tick event may not be scheduled if this gets called from inside 266 // an instruction's execution, e.g. "quiesce" 267 if (tickEvent.scheduled()) 268 tickEvent.deschedule(); 269 270 notIdleFraction--; 271 _status = Idle; 272} 273 274 275template <class T> 276Fault 277AtomicSimpleCPU::read(Addr addr, T &data, unsigned flags) 278{ 279 // use the CPU's statically allocated read request and packet objects 280 Request *req = data_read_req; 281 PacketPtr pkt = data_read_pkt; 282 283 req->setVirt(0, addr, sizeof(T), flags, thread->readPC()); 284 285 if (traceData) { 286 traceData->setAddr(addr); 287 } 288 289 // translate to physical address 290 Fault fault = thread->translateDataReadReq(req); 291 292 // Now do the access. 293 if (fault == NoFault) { 294 pkt->reinitFromRequest(); 295 296 if (req->isMmapedIpr()) 297 dcache_latency = TheISA::handleIprRead(thread->getTC(),pkt); 298 else 299 dcache_latency = dcachePort.sendAtomic(pkt); 300 dcache_access = true; 301#if !defined(NDEBUG) 302 if (pkt->result != Packet::Success) 303 panic("Unable to find responder for address pa = %#X va = %#X\n", 304 pkt->req->getPaddr(), pkt->req->getVaddr()); 305#endif 306 data = pkt->get<T>(); 307 308 if (req->isLocked()) { 309 TheISA::handleLockedRead(thread, req); 310 } 311 } 312 313 // This will need a new way to tell if it has a dcache attached. 314 if (req->isUncacheable()) 315 recordEvent("Uncached Read"); 316 317 return fault; 318} 319 320#ifndef DOXYGEN_SHOULD_SKIP_THIS 321 322template 323Fault 324AtomicSimpleCPU::read(Addr addr, Twin32_t &data, unsigned flags); 325 326template 327Fault 328AtomicSimpleCPU::read(Addr addr, Twin64_t &data, unsigned flags); 329 330template 331Fault 332AtomicSimpleCPU::read(Addr addr, uint64_t &data, unsigned flags); 333 334template 335Fault 336AtomicSimpleCPU::read(Addr addr, uint32_t &data, unsigned flags); 337 338template 339Fault 340AtomicSimpleCPU::read(Addr addr, uint16_t &data, unsigned flags); 341 342template 343Fault 344AtomicSimpleCPU::read(Addr addr, uint8_t &data, unsigned flags); 345 346#endif //DOXYGEN_SHOULD_SKIP_THIS 347 348template<> 349Fault 350AtomicSimpleCPU::read(Addr addr, double &data, unsigned flags) 351{ 352 return read(addr, *(uint64_t*)&data, flags); 353} 354 355template<> 356Fault 357AtomicSimpleCPU::read(Addr addr, float &data, unsigned flags) 358{ 359 return read(addr, *(uint32_t*)&data, flags); 360} 361 362 363template<> 364Fault 365AtomicSimpleCPU::read(Addr addr, int32_t &data, unsigned flags) 366{ 367 return read(addr, (uint32_t&)data, flags); 368} 369 370 371template <class T> 372Fault 373AtomicSimpleCPU::write(T data, Addr addr, unsigned flags, uint64_t *res) 374{ 375 // use the CPU's statically allocated write request and packet objects 376 Request *req = data_write_req; 377 PacketPtr pkt; 378 379 req->setVirt(0, addr, sizeof(T), flags, thread->readPC()); 380 381 if (req->isSwap()) 382 pkt = data_swap_pkt; 383 else 384 pkt = data_write_pkt; 385 386 if (traceData) { 387 traceData->setAddr(addr); 388 } 389 390 // translate to physical address 391 Fault fault = thread->translateDataWriteReq(req); 392 393 // Now do the access. 394 if (fault == NoFault) { 395 bool do_access = true; // flag to suppress cache access 396 397 if (req->isLocked()) { 398 do_access = TheISA::handleLockedWrite(thread, req); 399 } 400 if (req->isCondSwap()) { 401 assert(res); 402 req->setExtraData(*res); 403 } 404 405 406 if (do_access) { 407 pkt->reinitFromRequest(); 408 pkt->dataStatic(&data); 409 410 if (req->isMmapedIpr()) { 411 dcache_latency = TheISA::handleIprWrite(thread->getTC(), pkt); 412 } else { 413 data = htog(data); 414 dcache_latency = dcachePort.sendAtomic(pkt); 415 } 416 dcache_access = true; 417 418#if !defined(NDEBUG) 419 if (pkt->result != Packet::Success) 420 panic("Unable to find responder for address pa = %#X va = %#X\n", 421 pkt->req->getPaddr(), pkt->req->getVaddr()); 422#endif 423 } 424 425 if (req->isSwap()) { 426 assert(res); 427 *res = pkt->get<T>(); 428 } else if (res) { 429 *res = req->getExtraData(); 430 } 431 } 432 433 // This will need a new way to tell if it's hooked up to a cache or not. 434 if (req->isUncacheable()) 435 recordEvent("Uncached Write"); 436 437 // If the write needs to have a fault on the access, consider calling 438 // changeStatus() and changing it to "bad addr write" or something. 439 return fault; 440} 441 442 443#ifndef DOXYGEN_SHOULD_SKIP_THIS 444template 445Fault 446AtomicSimpleCPU::write(uint64_t data, Addr addr, 447 unsigned flags, uint64_t *res); 448 449template 450Fault 451AtomicSimpleCPU::write(uint32_t data, Addr addr, 452 unsigned flags, uint64_t *res); 453 454template 455Fault 456AtomicSimpleCPU::write(uint16_t data, Addr addr, 457 unsigned flags, uint64_t *res); 458 459template 460Fault 461AtomicSimpleCPU::write(uint8_t data, Addr addr, 462 unsigned flags, uint64_t *res); 463 464#endif //DOXYGEN_SHOULD_SKIP_THIS 465 466template<> 467Fault 468AtomicSimpleCPU::write(double data, Addr addr, unsigned flags, uint64_t *res) 469{ 470 return write(*(uint64_t*)&data, addr, flags, res); 471} 472 473template<> 474Fault 475AtomicSimpleCPU::write(float data, Addr addr, unsigned flags, uint64_t *res) 476{ 477 return write(*(uint32_t*)&data, addr, flags, res); 478} 479 480 481template<> 482Fault 483AtomicSimpleCPU::write(int32_t data, Addr addr, unsigned flags, uint64_t *res) 484{ 485 return write((uint32_t)data, addr, flags, res); 486} 487 488 489void 490AtomicSimpleCPU::tick() 491{ 492 Tick latency = cycles(1); // instruction takes one cycle by default 493 494 for (int i = 0; i < width; ++i) { 495 numCycles++; 496 497 if (!curStaticInst || !curStaticInst->isDelayedCommit()) 498 checkForInterrupts(); 499 500 Fault fault = setupFetchRequest(ifetch_req); 501 502 if (fault == NoFault) { 503 Tick icache_latency = 0; 504 bool icache_access = false; 505 dcache_access = false; // assume no dcache access 506 507 //Fetch more instruction memory if necessary 508 if(predecoder.needMoreBytes()) 509 { 510 icache_access = true; 511 ifetch_pkt->reinitFromRequest(); 512 513 icache_latency = icachePort.sendAtomic(ifetch_pkt); 514 // ifetch_req is initialized to read the instruction directly 515 // into the CPU object's inst field. 516 } 517 518 preExecute(); 519 520 if(curStaticInst) 521 { 522 fault = curStaticInst->execute(this, traceData); 523 postExecute(); 524 } 525 526 // @todo remove me after debugging with legion done 527 if (curStaticInst && (!curStaticInst->isMicroOp() || 528 curStaticInst->isFirstMicroOp())) 529 instCnt++; 530 531 if (simulate_stalls) { 532 Tick icache_stall = 533 icache_access ? icache_latency - cycles(1) : 0; 534 Tick dcache_stall = 535 dcache_access ? dcache_latency - cycles(1) : 0; 536 Tick stall_cycles = (icache_stall + dcache_stall) / cycles(1); 537 if (cycles(stall_cycles) < (icache_stall + dcache_stall)) 538 latency += cycles(stall_cycles+1); 539 else 540 latency += cycles(stall_cycles); 541 } 542 543 } 544 if(predecoder.needMoreBytes()) 545 advancePC(fault); 546 } 547 548 if (_status != Idle) 549 tickEvent.schedule(curTick + latency); 550} 551 552 553//////////////////////////////////////////////////////////////////////// 554// 555// AtomicSimpleCPU Simulation Object 556// 557BEGIN_DECLARE_SIM_OBJECT_PARAMS(AtomicSimpleCPU) 558 559 Param<Counter> max_insts_any_thread; 560 Param<Counter> max_insts_all_threads; 561 Param<Counter> max_loads_any_thread; 562 Param<Counter> max_loads_all_threads; 563 Param<Tick> progress_interval; 564 SimObjectParam<System *> system; 565 Param<int> cpu_id; 566 567#if FULL_SYSTEM 568 SimObjectParam<TheISA::ITB *> itb; 569 SimObjectParam<TheISA::DTB *> dtb; 570 Param<Tick> profile; 571 572 Param<bool> do_quiesce; 573 Param<bool> do_checkpoint_insts; 574 Param<bool> do_statistics_insts; 575#else 576 SimObjectParam<Process *> workload; 577#endif // FULL_SYSTEM 578 579 Param<int> clock; 580 Param<int> phase; 581 582 Param<bool> defer_registration; 583 Param<int> width; 584 Param<bool> function_trace; 585 Param<Tick> function_trace_start; 586 Param<bool> simulate_stalls; 587 588END_DECLARE_SIM_OBJECT_PARAMS(AtomicSimpleCPU) 589 590BEGIN_INIT_SIM_OBJECT_PARAMS(AtomicSimpleCPU) 591 592 INIT_PARAM(max_insts_any_thread, 593 "terminate when any thread reaches this inst count"), 594 INIT_PARAM(max_insts_all_threads, 595 "terminate when all threads have reached this inst count"), 596 INIT_PARAM(max_loads_any_thread, 597 "terminate when any thread reaches this load count"), 598 INIT_PARAM(max_loads_all_threads, 599 "terminate when all threads have reached this load count"), 600 INIT_PARAM(progress_interval, "Progress interval"), 601 INIT_PARAM(system, "system object"), 602 INIT_PARAM(cpu_id, "processor ID"), 603 604#if FULL_SYSTEM 605 INIT_PARAM(itb, "Instruction TLB"), 606 INIT_PARAM(dtb, "Data TLB"), 607 INIT_PARAM(profile, ""), 608 INIT_PARAM(do_quiesce, ""), 609 INIT_PARAM(do_checkpoint_insts, ""), 610 INIT_PARAM(do_statistics_insts, ""), 611#else 612 INIT_PARAM(workload, "processes to run"), 613#endif // FULL_SYSTEM 614 615 INIT_PARAM(clock, "clock speed"), 616 INIT_PARAM_DFLT(phase, "clock phase", 0), 617 INIT_PARAM(defer_registration, "defer system registration (for sampling)"), 618 INIT_PARAM(width, "cpu width"), 619 INIT_PARAM(function_trace, "Enable function trace"), 620 INIT_PARAM(function_trace_start, "Cycle to start function trace"), 621 INIT_PARAM(simulate_stalls, "Simulate cache stall cycles") 622 623END_INIT_SIM_OBJECT_PARAMS(AtomicSimpleCPU) 624 625 626CREATE_SIM_OBJECT(AtomicSimpleCPU) 627{ 628 AtomicSimpleCPU::Params *params = new AtomicSimpleCPU::Params(); 629 params->name = getInstanceName(); 630 params->numberOfThreads = 1; 631 params->max_insts_any_thread = max_insts_any_thread; 632 params->max_insts_all_threads = max_insts_all_threads; 633 params->max_loads_any_thread = max_loads_any_thread; 634 params->max_loads_all_threads = max_loads_all_threads; 635 params->progress_interval = progress_interval; 636 params->deferRegistration = defer_registration; 637 params->phase = phase; 638 params->clock = clock; 639 params->functionTrace = function_trace; 640 params->functionTraceStart = function_trace_start; 641 params->width = width; 642 params->simulate_stalls = simulate_stalls; 643 params->system = system; 644 params->cpu_id = cpu_id; 645 646#if FULL_SYSTEM 647 params->itb = itb; 648 params->dtb = dtb; 649 params->profile = profile; 650 params->do_quiesce = do_quiesce; 651 params->do_checkpoint_insts = do_checkpoint_insts; 652 params->do_statistics_insts = do_statistics_insts; 653#else 654 params->process = workload; 655#endif 656 657 AtomicSimpleCPU *cpu = new AtomicSimpleCPU(params); 658 return cpu; 659} 660 661REGISTER_SIM_OBJECT("AtomicSimpleCPU", AtomicSimpleCPU) 662 663