atomic.cc revision 3901
1/* 2 * Copyright (c) 2002-2005 The Regents of The University of Michigan 3 * All rights reserved. 4 * 5 * Redistribution and use in source and binary forms, with or without 6 * modification, are permitted provided that the following conditions are 7 * met: redistributions of source code must retain the above copyright 8 * notice, this list of conditions and the following disclaimer; 9 * redistributions in binary form must reproduce the above copyright 10 * notice, this list of conditions and the following disclaimer in the 11 * documentation and/or other materials provided with the distribution; 12 * neither the name of the copyright holders nor the names of its 13 * contributors may be used to endorse or promote products derived from 14 * this software without specific prior written permission. 15 * 16 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 17 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 18 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 19 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 20 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 21 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 22 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 23 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 24 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 25 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 26 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 27 * 28 * Authors: Steve Reinhardt 29 */ 30 31#include "arch/locked_mem.hh" 32#include "arch/mmaped_ipr.hh" 33#include "arch/utility.hh" 34#include "cpu/exetrace.hh" 35#include "cpu/simple/atomic.hh" 36#include "mem/packet.hh" 37#include "mem/packet_access.hh" 38#include "sim/builder.hh" 39#include "sim/system.hh" 40 41using namespace std; 42using namespace TheISA; 43 44AtomicSimpleCPU::TickEvent::TickEvent(AtomicSimpleCPU *c) 45 : Event(&mainEventQueue, CPU_Tick_Pri), cpu(c) 46{ 47} 48 49 50void 51AtomicSimpleCPU::TickEvent::process() 52{ 53 cpu->tick(); 54} 55 56const char * 57AtomicSimpleCPU::TickEvent::description() 58{ 59 return "AtomicSimpleCPU tick event"; 60} 61 62Port * 63AtomicSimpleCPU::getPort(const std::string &if_name, int idx) 64{ 65 if (if_name == "dcache_port") 66 return &dcachePort; 67 else if (if_name == "icache_port") 68 return &icachePort; 69 else 70 panic("No Such Port\n"); 71} 72 73void 74AtomicSimpleCPU::init() 75{ 76 BaseCPU::init(); 77#if FULL_SYSTEM 78 for (int i = 0; i < threadContexts.size(); ++i) { 79 ThreadContext *tc = threadContexts[i]; 80 81 // initialize CPU, including PC 82 TheISA::initCPU(tc, tc->readCpuId()); 83 } 84#endif 85} 86 87bool 88AtomicSimpleCPU::CpuPort::recvTiming(PacketPtr pkt) 89{ 90 panic("AtomicSimpleCPU doesn't expect recvTiming callback!"); 91 return true; 92} 93 94Tick 95AtomicSimpleCPU::CpuPort::recvAtomic(PacketPtr pkt) 96{ 97 //Snooping a coherence request, just return 98 return 0; 99} 100 101void 102AtomicSimpleCPU::CpuPort::recvFunctional(PacketPtr pkt) 103{ 104 //No internal storage to update, just return 105 return; 106} 107 108void 109AtomicSimpleCPU::CpuPort::recvStatusChange(Status status) 110{ 111 if (status == RangeChange) { 112 if (!snoopRangeSent) { 113 snoopRangeSent = true; 114 sendStatusChange(Port::RangeChange); 115 } 116 return; 117 } 118 119 panic("AtomicSimpleCPU doesn't expect recvStatusChange callback!"); 120} 121 122void 123AtomicSimpleCPU::CpuPort::recvRetry() 124{ 125 panic("AtomicSimpleCPU doesn't expect recvRetry callback!"); 126} 127 128 129AtomicSimpleCPU::AtomicSimpleCPU(Params *p) 130 : BaseSimpleCPU(p), tickEvent(this), 131 width(p->width), simulate_stalls(p->simulate_stalls), 132 icachePort(name() + "-iport", this), dcachePort(name() + "-iport", this) 133{ 134 _status = Idle; 135 136 icachePort.snoopRangeSent = false; 137 dcachePort.snoopRangeSent = false; 138 139 ifetch_req = new Request(); 140 ifetch_req->setThreadContext(p->cpu_id, 0); // Add thread ID if we add MT 141 ifetch_pkt = new Packet(ifetch_req, Packet::ReadReq, Packet::Broadcast); 142 ifetch_pkt->dataStatic(&inst); 143 144 data_read_req = new Request(); 145 data_read_req->setThreadContext(p->cpu_id, 0); // Add thread ID here too 146 data_read_pkt = new Packet(data_read_req, Packet::ReadReq, 147 Packet::Broadcast); 148 data_read_pkt->dataStatic(&dataReg); 149 150 data_write_req = new Request(); 151 data_write_req->setThreadContext(p->cpu_id, 0); // Add thread ID here too 152 data_write_pkt = new Packet(data_write_req, Packet::WriteReq, 153 Packet::Broadcast); 154} 155 156 157AtomicSimpleCPU::~AtomicSimpleCPU() 158{ 159} 160 161void 162AtomicSimpleCPU::serialize(ostream &os) 163{ 164 SimObject::State so_state = SimObject::getState(); 165 SERIALIZE_ENUM(so_state); 166 Status _status = status(); 167 SERIALIZE_ENUM(_status); 168 BaseSimpleCPU::serialize(os); 169 nameOut(os, csprintf("%s.tickEvent", name())); 170 tickEvent.serialize(os); 171} 172 173void 174AtomicSimpleCPU::unserialize(Checkpoint *cp, const string §ion) 175{ 176 SimObject::State so_state; 177 UNSERIALIZE_ENUM(so_state); 178 UNSERIALIZE_ENUM(_status); 179 BaseSimpleCPU::unserialize(cp, section); 180 tickEvent.unserialize(cp, csprintf("%s.tickEvent", section)); 181} 182 183void 184AtomicSimpleCPU::resume() 185{ 186 if (_status != SwitchedOut && _status != Idle) { 187 assert(system->getMemoryMode() == System::Atomic); 188 189 changeState(SimObject::Running); 190 if (thread->status() == ThreadContext::Active) { 191 if (!tickEvent.scheduled()) { 192 tickEvent.schedule(nextCycle()); 193 } 194 } 195 } 196} 197 198void 199AtomicSimpleCPU::switchOut() 200{ 201 assert(status() == Running || status() == Idle); 202 _status = SwitchedOut; 203 204 tickEvent.squash(); 205} 206 207 208void 209AtomicSimpleCPU::takeOverFrom(BaseCPU *oldCPU) 210{ 211 BaseCPU::takeOverFrom(oldCPU); 212 213 assert(!tickEvent.scheduled()); 214 215 // if any of this CPU's ThreadContexts are active, mark the CPU as 216 // running and schedule its tick event. 217 for (int i = 0; i < threadContexts.size(); ++i) { 218 ThreadContext *tc = threadContexts[i]; 219 if (tc->status() == ThreadContext::Active && _status != Running) { 220 _status = Running; 221 tickEvent.schedule(nextCycle()); 222 break; 223 } 224 } 225 if (_status != Running) { 226 _status = Idle; 227 } 228} 229 230 231void 232AtomicSimpleCPU::activateContext(int thread_num, int delay) 233{ 234 assert(thread_num == 0); 235 assert(thread); 236 237 assert(_status == Idle); 238 assert(!tickEvent.scheduled()); 239 240 notIdleFraction++; 241 242#if FULL_SYSTEM 243 // Connect the ThreadContext's memory ports (Functional/Virtual 244 // Ports) 245 tc->connectMemPorts(); 246#endif 247 248 //Make sure ticks are still on multiples of cycles 249 tickEvent.schedule(nextCycle(curTick + cycles(delay))); 250 _status = Running; 251} 252 253 254void 255AtomicSimpleCPU::suspendContext(int thread_num) 256{ 257 assert(thread_num == 0); 258 assert(thread); 259 260 assert(_status == Running); 261 262 // tick event may not be scheduled if this gets called from inside 263 // an instruction's execution, e.g. "quiesce" 264 if (tickEvent.scheduled()) 265 tickEvent.deschedule(); 266 267 notIdleFraction--; 268 _status = Idle; 269} 270 271 272template <class T> 273Fault 274AtomicSimpleCPU::read(Addr addr, T &data, unsigned flags) 275{ 276 // use the CPU's statically allocated read request and packet objects 277 Request *req = data_read_req; 278 PacketPtr pkt = data_read_pkt; 279 280 req->setVirt(0, addr, sizeof(T), flags, thread->readPC()); 281 282 if (traceData) { 283 traceData->setAddr(addr); 284 } 285 286 // translate to physical address 287 Fault fault = thread->translateDataReadReq(req); 288 289 // Now do the access. 290 if (fault == NoFault) { 291 pkt->reinitFromRequest(); 292 293 if (req->isMmapedIpr()) 294 dcache_latency = TheISA::handleIprRead(thread->getTC(),pkt); 295 else 296 dcache_latency = dcachePort.sendAtomic(pkt); 297 dcache_access = true; 298#if !defined(NDEBUG) 299 if (pkt->result != Packet::Success) 300 panic("Unable to find responder for address pa = %#X va = %#X\n", 301 pkt->req->getPaddr(), pkt->req->getVaddr()); 302#endif 303 data = pkt->get<T>(); 304 305 if (req->isLocked()) { 306 TheISA::handleLockedRead(thread, req); 307 } 308 } 309 310 // This will need a new way to tell if it has a dcache attached. 311 if (req->isUncacheable()) 312 recordEvent("Uncached Read"); 313 314 return fault; 315} 316 317#ifndef DOXYGEN_SHOULD_SKIP_THIS 318 319template 320Fault 321AtomicSimpleCPU::read(Addr addr, uint64_t &data, unsigned flags); 322 323template 324Fault 325AtomicSimpleCPU::read(Addr addr, uint32_t &data, unsigned flags); 326 327template 328Fault 329AtomicSimpleCPU::read(Addr addr, uint16_t &data, unsigned flags); 330 331template 332Fault 333AtomicSimpleCPU::read(Addr addr, uint8_t &data, unsigned flags); 334 335#endif //DOXYGEN_SHOULD_SKIP_THIS 336 337template<> 338Fault 339AtomicSimpleCPU::read(Addr addr, double &data, unsigned flags) 340{ 341 return read(addr, *(uint64_t*)&data, flags); 342} 343 344template<> 345Fault 346AtomicSimpleCPU::read(Addr addr, float &data, unsigned flags) 347{ 348 return read(addr, *(uint32_t*)&data, flags); 349} 350 351 352template<> 353Fault 354AtomicSimpleCPU::read(Addr addr, int32_t &data, unsigned flags) 355{ 356 return read(addr, (uint32_t&)data, flags); 357} 358 359 360template <class T> 361Fault 362AtomicSimpleCPU::write(T data, Addr addr, unsigned flags, uint64_t *res) 363{ 364 // use the CPU's statically allocated write request and packet objects 365 Request *req = data_write_req; 366 PacketPtr pkt = data_write_pkt; 367 368 req->setVirt(0, addr, sizeof(T), flags, thread->readPC()); 369 370 if (traceData) { 371 traceData->setAddr(addr); 372 } 373 374 // translate to physical address 375 Fault fault = thread->translateDataWriteReq(req); 376 377 // Now do the access. 378 if (fault == NoFault) { 379 bool do_access = true; // flag to suppress cache access 380 381 if (req->isLocked()) { 382 do_access = TheISA::handleLockedWrite(thread, req); 383 } 384 385 if (do_access) { 386 pkt->reinitFromRequest(); 387 pkt->dataStatic(&data); 388 389 if (req->isMmapedIpr()) { 390 dcache_latency = TheISA::handleIprWrite(thread->getTC(), pkt); 391 } else { 392 data = htog(data); 393 dcache_latency = dcachePort.sendAtomic(pkt); 394 } 395 dcache_access = true; 396 397#if !defined(NDEBUG) 398 if (pkt->result != Packet::Success) 399 panic("Unable to find responder for address pa = %#X va = %#X\n", 400 pkt->req->getPaddr(), pkt->req->getVaddr()); 401#endif 402 } 403 404 if (req->isLocked()) { 405 uint64_t scResult = req->getScResult(); 406 if (scResult != 0) { 407 // clear failure counter 408 thread->setStCondFailures(0); 409 } 410 if (res) { 411 *res = req->getScResult(); 412 } 413 } 414 } 415 416 // This will need a new way to tell if it's hooked up to a cache or not. 417 if (req->isUncacheable()) 418 recordEvent("Uncached Write"); 419 420 // If the write needs to have a fault on the access, consider calling 421 // changeStatus() and changing it to "bad addr write" or something. 422 return fault; 423} 424 425 426#ifndef DOXYGEN_SHOULD_SKIP_THIS 427template 428Fault 429AtomicSimpleCPU::write(uint64_t data, Addr addr, 430 unsigned flags, uint64_t *res); 431 432template 433Fault 434AtomicSimpleCPU::write(uint32_t data, Addr addr, 435 unsigned flags, uint64_t *res); 436 437template 438Fault 439AtomicSimpleCPU::write(uint16_t data, Addr addr, 440 unsigned flags, uint64_t *res); 441 442template 443Fault 444AtomicSimpleCPU::write(uint8_t data, Addr addr, 445 unsigned flags, uint64_t *res); 446 447#endif //DOXYGEN_SHOULD_SKIP_THIS 448 449template<> 450Fault 451AtomicSimpleCPU::write(double data, Addr addr, unsigned flags, uint64_t *res) 452{ 453 return write(*(uint64_t*)&data, addr, flags, res); 454} 455 456template<> 457Fault 458AtomicSimpleCPU::write(float data, Addr addr, unsigned flags, uint64_t *res) 459{ 460 return write(*(uint32_t*)&data, addr, flags, res); 461} 462 463 464template<> 465Fault 466AtomicSimpleCPU::write(int32_t data, Addr addr, unsigned flags, uint64_t *res) 467{ 468 return write((uint32_t)data, addr, flags, res); 469} 470 471 472void 473AtomicSimpleCPU::tick() 474{ 475 Tick latency = cycles(1); // instruction takes one cycle by default 476 477 for (int i = 0; i < width; ++i) { 478 numCycles++; 479 480 if (!curStaticInst || !curStaticInst->isDelayedCommit()) 481 checkForInterrupts(); 482 483 Fault fault = setupFetchRequest(ifetch_req); 484 485 if (fault == NoFault) { 486 ifetch_pkt->reinitFromRequest(); 487 488 Tick icache_latency = icachePort.sendAtomic(ifetch_pkt); 489 // ifetch_req is initialized to read the instruction directly 490 // into the CPU object's inst field. 491 492 dcache_access = false; // assume no dcache access 493 preExecute(); 494 495 fault = curStaticInst->execute(this, traceData); 496 postExecute(); 497 498 // @todo remove me after debugging with legion done 499 if (curStaticInst && (!curStaticInst->isMicroOp() || 500 curStaticInst->isFirstMicroOp())) 501 instCnt++; 502 503 if (simulate_stalls) { 504 Tick icache_stall = icache_latency - cycles(1); 505 Tick dcache_stall = 506 dcache_access ? dcache_latency - cycles(1) : 0; 507 Tick stall_cycles = (icache_stall + dcache_stall) / cycles(1); 508 if (cycles(stall_cycles) < (icache_stall + dcache_stall)) 509 latency += cycles(stall_cycles+1); 510 else 511 latency += cycles(stall_cycles); 512 } 513 514 } 515 516 advancePC(fault); 517 } 518 519 if (_status != Idle) 520 tickEvent.schedule(curTick + latency); 521} 522 523 524//////////////////////////////////////////////////////////////////////// 525// 526// AtomicSimpleCPU Simulation Object 527// 528BEGIN_DECLARE_SIM_OBJECT_PARAMS(AtomicSimpleCPU) 529 530 Param<Counter> max_insts_any_thread; 531 Param<Counter> max_insts_all_threads; 532 Param<Counter> max_loads_any_thread; 533 Param<Counter> max_loads_all_threads; 534 Param<Tick> progress_interval; 535 SimObjectParam<System *> system; 536 Param<int> cpu_id; 537 538#if FULL_SYSTEM 539 SimObjectParam<TheISA::ITB *> itb; 540 SimObjectParam<TheISA::DTB *> dtb; 541 Param<Tick> profile; 542 543 Param<bool> do_quiesce; 544 Param<bool> do_checkpoint_insts; 545 Param<bool> do_statistics_insts; 546#else 547 SimObjectParam<Process *> workload; 548#endif // FULL_SYSTEM 549 550 Param<int> clock; 551 Param<int> phase; 552 553 Param<bool> defer_registration; 554 Param<int> width; 555 Param<bool> function_trace; 556 Param<Tick> function_trace_start; 557 Param<bool> simulate_stalls; 558 559END_DECLARE_SIM_OBJECT_PARAMS(AtomicSimpleCPU) 560 561BEGIN_INIT_SIM_OBJECT_PARAMS(AtomicSimpleCPU) 562 563 INIT_PARAM(max_insts_any_thread, 564 "terminate when any thread reaches this inst count"), 565 INIT_PARAM(max_insts_all_threads, 566 "terminate when all threads have reached this inst count"), 567 INIT_PARAM(max_loads_any_thread, 568 "terminate when any thread reaches this load count"), 569 INIT_PARAM(max_loads_all_threads, 570 "terminate when all threads have reached this load count"), 571 INIT_PARAM(progress_interval, "Progress interval"), 572 INIT_PARAM(system, "system object"), 573 INIT_PARAM(cpu_id, "processor ID"), 574 575#if FULL_SYSTEM 576 INIT_PARAM(itb, "Instruction TLB"), 577 INIT_PARAM(dtb, "Data TLB"), 578 INIT_PARAM(profile, ""), 579 INIT_PARAM(do_quiesce, ""), 580 INIT_PARAM(do_checkpoint_insts, ""), 581 INIT_PARAM(do_statistics_insts, ""), 582#else 583 INIT_PARAM(workload, "processes to run"), 584#endif // FULL_SYSTEM 585 586 INIT_PARAM(clock, "clock speed"), 587 INIT_PARAM_DFLT(phase, "clock phase", 0), 588 INIT_PARAM(defer_registration, "defer system registration (for sampling)"), 589 INIT_PARAM(width, "cpu width"), 590 INIT_PARAM(function_trace, "Enable function trace"), 591 INIT_PARAM(function_trace_start, "Cycle to start function trace"), 592 INIT_PARAM(simulate_stalls, "Simulate cache stall cycles") 593 594END_INIT_SIM_OBJECT_PARAMS(AtomicSimpleCPU) 595 596 597CREATE_SIM_OBJECT(AtomicSimpleCPU) 598{ 599 AtomicSimpleCPU::Params *params = new AtomicSimpleCPU::Params(); 600 params->name = getInstanceName(); 601 params->numberOfThreads = 1; 602 params->max_insts_any_thread = max_insts_any_thread; 603 params->max_insts_all_threads = max_insts_all_threads; 604 params->max_loads_any_thread = max_loads_any_thread; 605 params->max_loads_all_threads = max_loads_all_threads; 606 params->progress_interval = progress_interval; 607 params->deferRegistration = defer_registration; 608 params->phase = phase; 609 params->clock = clock; 610 params->functionTrace = function_trace; 611 params->functionTraceStart = function_trace_start; 612 params->width = width; 613 params->simulate_stalls = simulate_stalls; 614 params->system = system; 615 params->cpu_id = cpu_id; 616 617#if FULL_SYSTEM 618 params->itb = itb; 619 params->dtb = dtb; 620 params->profile = profile; 621 params->do_quiesce = do_quiesce; 622 params->do_checkpoint_insts = do_checkpoint_insts; 623 params->do_statistics_insts = do_statistics_insts; 624#else 625 params->process = workload; 626#endif 627 628 AtomicSimpleCPU *cpu = new AtomicSimpleCPU(params); 629 return cpu; 630} 631 632REGISTER_SIM_OBJECT("AtomicSimpleCPU", AtomicSimpleCPU) 633 634