atomic.cc revision 4870
1/* 2 * Copyright (c) 2002-2005 The Regents of The University of Michigan 3 * All rights reserved. 4 * 5 * Redistribution and use in source and binary forms, with or without 6 * modification, are permitted provided that the following conditions are 7 * met: redistributions of source code must retain the above copyright 8 * notice, this list of conditions and the following disclaimer; 9 * redistributions in binary form must reproduce the above copyright 10 * notice, this list of conditions and the following disclaimer in the 11 * documentation and/or other materials provided with the distribution; 12 * neither the name of the copyright holders nor the names of its 13 * contributors may be used to endorse or promote products derived from 14 * this software without specific prior written permission. 15 * 16 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 17 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 18 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 19 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 20 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 21 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 22 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 23 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 24 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 25 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 26 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 27 * 28 * Authors: Steve Reinhardt 29 */ 30 31#include "arch/locked_mem.hh" 32#include "arch/mmaped_ipr.hh" 33#include "arch/utility.hh" 34#include "base/bigint.hh" 35#include "cpu/exetrace.hh" 36#include "cpu/simple/atomic.hh" 37#include "mem/packet.hh" 38#include "mem/packet_access.hh" 39#include "sim/builder.hh" 40#include "sim/system.hh" 41 42using namespace std; 43using namespace TheISA; 44 45AtomicSimpleCPU::TickEvent::TickEvent(AtomicSimpleCPU *c) 46 : Event(&mainEventQueue, CPU_Tick_Pri), cpu(c) 47{ 48} 49 50 51void 52AtomicSimpleCPU::TickEvent::process() 53{ 54 cpu->tick(); 55} 56 57const char * 58AtomicSimpleCPU::TickEvent::description() 59{ 60 return "AtomicSimpleCPU tick event"; 61} 62 63Port * 64AtomicSimpleCPU::getPort(const std::string &if_name, int idx) 65{ 66 if (if_name == "dcache_port") 67 return &dcachePort; 68 else if (if_name == "icache_port") 69 return &icachePort; 70 else 71 panic("No Such Port\n"); 72} 73 74void 75AtomicSimpleCPU::init() 76{ 77 BaseCPU::init(); 78#if FULL_SYSTEM 79 for (int i = 0; i < threadContexts.size(); ++i) { 80 ThreadContext *tc = threadContexts[i]; 81 82 // initialize CPU, including PC 83 TheISA::initCPU(tc, tc->readCpuId()); 84 } 85#endif 86} 87 88bool 89AtomicSimpleCPU::CpuPort::recvTiming(PacketPtr pkt) 90{ 91 panic("AtomicSimpleCPU doesn't expect recvTiming callback!"); 92 return true; 93} 94 95Tick 96AtomicSimpleCPU::CpuPort::recvAtomic(PacketPtr pkt) 97{ 98 //Snooping a coherence request, just return 99 return 0; 100} 101 102void 103AtomicSimpleCPU::CpuPort::recvFunctional(PacketPtr pkt) 104{ 105 //No internal storage to update, just return 106 return; 107} 108 109void 110AtomicSimpleCPU::CpuPort::recvStatusChange(Status status) 111{ 112 if (status == RangeChange) { 113 if (!snoopRangeSent) { 114 snoopRangeSent = true; 115 sendStatusChange(Port::RangeChange); 116 } 117 return; 118 } 119 120 panic("AtomicSimpleCPU doesn't expect recvStatusChange callback!"); 121} 122 123void 124AtomicSimpleCPU::CpuPort::recvRetry() 125{ 126 panic("AtomicSimpleCPU doesn't expect recvRetry callback!"); 127} 128 129void 130AtomicSimpleCPU::DcachePort::setPeer(Port *port) 131{ 132 Port::setPeer(port); 133 134#if FULL_SYSTEM 135 // Update the ThreadContext's memory ports (Functional/Virtual 136 // Ports) 137 cpu->tcBase()->connectMemPorts(); 138#endif 139} 140 141AtomicSimpleCPU::AtomicSimpleCPU(Params *p) 142 : BaseSimpleCPU(p), tickEvent(this), 143 width(p->width), simulate_stalls(p->simulate_stalls), 144 icachePort(name() + "-iport", this), dcachePort(name() + "-iport", this) 145{ 146 _status = Idle; 147 148 icachePort.snoopRangeSent = false; 149 dcachePort.snoopRangeSent = false; 150 151 ifetch_req.setThreadContext(p->cpu_id, 0); // Add thread ID if we add MT 152 data_read_req.setThreadContext(p->cpu_id, 0); // Add thread ID here too 153 data_write_req.setThreadContext(p->cpu_id, 0); // Add thread ID here too 154} 155 156 157AtomicSimpleCPU::~AtomicSimpleCPU() 158{ 159} 160 161void 162AtomicSimpleCPU::serialize(ostream &os) 163{ 164 SimObject::State so_state = SimObject::getState(); 165 SERIALIZE_ENUM(so_state); 166 Status _status = status(); 167 SERIALIZE_ENUM(_status); 168 BaseSimpleCPU::serialize(os); 169 nameOut(os, csprintf("%s.tickEvent", name())); 170 tickEvent.serialize(os); 171} 172 173void 174AtomicSimpleCPU::unserialize(Checkpoint *cp, const string §ion) 175{ 176 SimObject::State so_state; 177 UNSERIALIZE_ENUM(so_state); 178 UNSERIALIZE_ENUM(_status); 179 BaseSimpleCPU::unserialize(cp, section); 180 tickEvent.unserialize(cp, csprintf("%s.tickEvent", section)); 181} 182 183void 184AtomicSimpleCPU::resume() 185{ 186 if (_status != SwitchedOut && _status != Idle) { 187 assert(system->getMemoryMode() == System::Atomic); 188 189 changeState(SimObject::Running); 190 if (thread->status() == ThreadContext::Active) { 191 if (!tickEvent.scheduled()) { 192 tickEvent.schedule(nextCycle()); 193 } 194 } 195 } 196} 197 198void 199AtomicSimpleCPU::switchOut() 200{ 201 assert(status() == Running || status() == Idle); 202 _status = SwitchedOut; 203 204 tickEvent.squash(); 205} 206 207 208void 209AtomicSimpleCPU::takeOverFrom(BaseCPU *oldCPU) 210{ 211 BaseCPU::takeOverFrom(oldCPU, &icachePort, &dcachePort); 212 213 assert(!tickEvent.scheduled()); 214 215 // if any of this CPU's ThreadContexts are active, mark the CPU as 216 // running and schedule its tick event. 217 for (int i = 0; i < threadContexts.size(); ++i) { 218 ThreadContext *tc = threadContexts[i]; 219 if (tc->status() == ThreadContext::Active && _status != Running) { 220 _status = Running; 221 tickEvent.schedule(nextCycle()); 222 break; 223 } 224 } 225 if (_status != Running) { 226 _status = Idle; 227 } 228} 229 230 231void 232AtomicSimpleCPU::activateContext(int thread_num, int delay) 233{ 234 assert(thread_num == 0); 235 assert(thread); 236 237 assert(_status == Idle); 238 assert(!tickEvent.scheduled()); 239 240 notIdleFraction++; 241 242 //Make sure ticks are still on multiples of cycles 243 tickEvent.schedule(nextCycle(curTick + cycles(delay))); 244 _status = Running; 245} 246 247 248void 249AtomicSimpleCPU::suspendContext(int thread_num) 250{ 251 assert(thread_num == 0); 252 assert(thread); 253 254 assert(_status == Running); 255 256 // tick event may not be scheduled if this gets called from inside 257 // an instruction's execution, e.g. "quiesce" 258 if (tickEvent.scheduled()) 259 tickEvent.deschedule(); 260 261 notIdleFraction--; 262 _status = Idle; 263} 264 265 266template <class T> 267Fault 268AtomicSimpleCPU::read(Addr addr, T &data, unsigned flags) 269{ 270 // use the CPU's statically allocated read request and packet objects 271 Request *req = &data_read_req; 272 req->setVirt(0, addr, sizeof(T), flags, thread->readPC()); 273 274 if (traceData) { 275 traceData->setAddr(addr); 276 } 277 278 // translate to physical address 279 Fault fault = thread->translateDataReadReq(req); 280 281 // Now do the access. 282 if (fault == NoFault) { 283 Packet pkt = Packet(req, MemCmd::ReadReq, Packet::Broadcast); 284 pkt.dataStatic(&data); 285 286 if (req->isMmapedIpr()) 287 dcache_latency = TheISA::handleIprRead(thread->getTC(), &pkt); 288 else 289 dcache_latency = dcachePort.sendAtomic(&pkt); 290 dcache_access = true; 291 assert(!pkt.isError()); 292 293 if (req->isLocked()) { 294 TheISA::handleLockedRead(thread, req); 295 } 296 } 297 298 // This will need a new way to tell if it has a dcache attached. 299 if (req->isUncacheable()) 300 recordEvent("Uncached Read"); 301 302 return fault; 303} 304 305#ifndef DOXYGEN_SHOULD_SKIP_THIS 306 307template 308Fault 309AtomicSimpleCPU::read(Addr addr, Twin32_t &data, unsigned flags); 310 311template 312Fault 313AtomicSimpleCPU::read(Addr addr, Twin64_t &data, unsigned flags); 314 315template 316Fault 317AtomicSimpleCPU::read(Addr addr, uint64_t &data, unsigned flags); 318 319template 320Fault 321AtomicSimpleCPU::read(Addr addr, uint32_t &data, unsigned flags); 322 323template 324Fault 325AtomicSimpleCPU::read(Addr addr, uint16_t &data, unsigned flags); 326 327template 328Fault 329AtomicSimpleCPU::read(Addr addr, uint8_t &data, unsigned flags); 330 331#endif //DOXYGEN_SHOULD_SKIP_THIS 332 333template<> 334Fault 335AtomicSimpleCPU::read(Addr addr, double &data, unsigned flags) 336{ 337 return read(addr, *(uint64_t*)&data, flags); 338} 339 340template<> 341Fault 342AtomicSimpleCPU::read(Addr addr, float &data, unsigned flags) 343{ 344 return read(addr, *(uint32_t*)&data, flags); 345} 346 347 348template<> 349Fault 350AtomicSimpleCPU::read(Addr addr, int32_t &data, unsigned flags) 351{ 352 return read(addr, (uint32_t&)data, flags); 353} 354 355 356template <class T> 357Fault 358AtomicSimpleCPU::write(T data, Addr addr, unsigned flags, uint64_t *res) 359{ 360 // use the CPU's statically allocated write request and packet objects 361 Request *req = &data_write_req; 362 req->setVirt(0, addr, sizeof(T), flags, thread->readPC()); 363 364 if (traceData) { 365 traceData->setAddr(addr); 366 } 367 368 // translate to physical address 369 Fault fault = thread->translateDataWriteReq(req); 370 371 // Now do the access. 372 if (fault == NoFault) { 373 Packet pkt = 374 Packet(req, req->isSwap() ? MemCmd::SwapReq : MemCmd::WriteReq, 375 Packet::Broadcast); 376 pkt.dataStatic(&data); 377 378 bool do_access = true; // flag to suppress cache access 379 380 if (req->isLocked()) { 381 do_access = TheISA::handleLockedWrite(thread, req); 382 } 383 if (req->isCondSwap()) { 384 assert(res); 385 req->setExtraData(*res); 386 } 387 388 389 if (do_access) { 390 if (req->isMmapedIpr()) { 391 dcache_latency = TheISA::handleIprWrite(thread->getTC(), &pkt); 392 } else { 393 data = htog(data); 394 dcache_latency = dcachePort.sendAtomic(&pkt); 395 } 396 dcache_access = true; 397 assert(!pkt.isError()); 398 } 399 400 if (req->isSwap()) { 401 assert(res); 402 *res = pkt.get<T>(); 403 } else if (res) { 404 *res = req->getExtraData(); 405 } 406 } 407 408 // This will need a new way to tell if it's hooked up to a cache or not. 409 if (req->isUncacheable()) 410 recordEvent("Uncached Write"); 411 412 // If the write needs to have a fault on the access, consider calling 413 // changeStatus() and changing it to "bad addr write" or something. 414 return fault; 415} 416 417 418#ifndef DOXYGEN_SHOULD_SKIP_THIS 419 420template 421Fault 422AtomicSimpleCPU::write(Twin32_t data, Addr addr, 423 unsigned flags, uint64_t *res); 424 425template 426Fault 427AtomicSimpleCPU::write(Twin64_t data, Addr addr, 428 unsigned flags, uint64_t *res); 429 430template 431Fault 432AtomicSimpleCPU::write(uint64_t data, Addr addr, 433 unsigned flags, uint64_t *res); 434 435template 436Fault 437AtomicSimpleCPU::write(uint32_t data, Addr addr, 438 unsigned flags, uint64_t *res); 439 440template 441Fault 442AtomicSimpleCPU::write(uint16_t data, Addr addr, 443 unsigned flags, uint64_t *res); 444 445template 446Fault 447AtomicSimpleCPU::write(uint8_t data, Addr addr, 448 unsigned flags, uint64_t *res); 449 450#endif //DOXYGEN_SHOULD_SKIP_THIS 451 452template<> 453Fault 454AtomicSimpleCPU::write(double data, Addr addr, unsigned flags, uint64_t *res) 455{ 456 return write(*(uint64_t*)&data, addr, flags, res); 457} 458 459template<> 460Fault 461AtomicSimpleCPU::write(float data, Addr addr, unsigned flags, uint64_t *res) 462{ 463 return write(*(uint32_t*)&data, addr, flags, res); 464} 465 466 467template<> 468Fault 469AtomicSimpleCPU::write(int32_t data, Addr addr, unsigned flags, uint64_t *res) 470{ 471 return write((uint32_t)data, addr, flags, res); 472} 473 474 475void 476AtomicSimpleCPU::tick() 477{ 478 Tick latency = cycles(1); // instruction takes one cycle by default 479 480 for (int i = 0; i < width; ++i) { 481 numCycles++; 482 483 if (!curStaticInst || !curStaticInst->isDelayedCommit()) 484 checkForInterrupts(); 485 486 Fault fault = setupFetchRequest(&ifetch_req); 487 488 if (fault == NoFault) { 489 Tick icache_latency = 0; 490 bool icache_access = false; 491 dcache_access = false; // assume no dcache access 492 493 //Fetch more instruction memory if necessary 494 //if(predecoder.needMoreBytes()) 495 //{ 496 icache_access = true; 497 Packet ifetch_pkt = Packet(&ifetch_req, MemCmd::ReadReq, 498 Packet::Broadcast); 499 ifetch_pkt.dataStatic(&inst); 500 501 icache_latency = icachePort.sendAtomic(&ifetch_pkt); 502 // ifetch_req is initialized to read the instruction directly 503 // into the CPU object's inst field. 504 //} 505 506 preExecute(); 507 508 if(curStaticInst) 509 { 510 fault = curStaticInst->execute(this, traceData); 511 postExecute(); 512 } 513 514 // @todo remove me after debugging with legion done 515 if (curStaticInst && (!curStaticInst->isMicroop() || 516 curStaticInst->isFirstMicroop())) 517 instCnt++; 518 519 if (simulate_stalls) { 520 Tick icache_stall = 521 icache_access ? icache_latency - cycles(1) : 0; 522 Tick dcache_stall = 523 dcache_access ? dcache_latency - cycles(1) : 0; 524 Tick stall_cycles = (icache_stall + dcache_stall) / cycles(1); 525 if (cycles(stall_cycles) < (icache_stall + dcache_stall)) 526 latency += cycles(stall_cycles+1); 527 else 528 latency += cycles(stall_cycles); 529 } 530 531 } 532 if(fault != NoFault || !stayAtPC) 533 advancePC(fault); 534 } 535 536 if (_status != Idle) 537 tickEvent.schedule(curTick + latency); 538} 539 540 541//////////////////////////////////////////////////////////////////////// 542// 543// AtomicSimpleCPU Simulation Object 544// 545BEGIN_DECLARE_SIM_OBJECT_PARAMS(AtomicSimpleCPU) 546 547 Param<Counter> max_insts_any_thread; 548 Param<Counter> max_insts_all_threads; 549 Param<Counter> max_loads_any_thread; 550 Param<Counter> max_loads_all_threads; 551 Param<Tick> progress_interval; 552 SimObjectParam<System *> system; 553 Param<int> cpu_id; 554 555#if FULL_SYSTEM 556 SimObjectParam<TheISA::ITB *> itb; 557 SimObjectParam<TheISA::DTB *> dtb; 558 Param<Tick> profile; 559 560 Param<bool> do_quiesce; 561 Param<bool> do_checkpoint_insts; 562 Param<bool> do_statistics_insts; 563#else 564 SimObjectParam<Process *> workload; 565#endif // FULL_SYSTEM 566 567 Param<int> clock; 568 Param<int> phase; 569 570 Param<bool> defer_registration; 571 Param<int> width; 572 Param<bool> function_trace; 573 Param<Tick> function_trace_start; 574 Param<bool> simulate_stalls; 575 576END_DECLARE_SIM_OBJECT_PARAMS(AtomicSimpleCPU) 577 578BEGIN_INIT_SIM_OBJECT_PARAMS(AtomicSimpleCPU) 579 580 INIT_PARAM(max_insts_any_thread, 581 "terminate when any thread reaches this inst count"), 582 INIT_PARAM(max_insts_all_threads, 583 "terminate when all threads have reached this inst count"), 584 INIT_PARAM(max_loads_any_thread, 585 "terminate when any thread reaches this load count"), 586 INIT_PARAM(max_loads_all_threads, 587 "terminate when all threads have reached this load count"), 588 INIT_PARAM(progress_interval, "Progress interval"), 589 INIT_PARAM(system, "system object"), 590 INIT_PARAM(cpu_id, "processor ID"), 591 592#if FULL_SYSTEM 593 INIT_PARAM(itb, "Instruction TLB"), 594 INIT_PARAM(dtb, "Data TLB"), 595 INIT_PARAM(profile, ""), 596 INIT_PARAM(do_quiesce, ""), 597 INIT_PARAM(do_checkpoint_insts, ""), 598 INIT_PARAM(do_statistics_insts, ""), 599#else 600 INIT_PARAM(workload, "processes to run"), 601#endif // FULL_SYSTEM 602 603 INIT_PARAM(clock, "clock speed"), 604 INIT_PARAM_DFLT(phase, "clock phase", 0), 605 INIT_PARAM(defer_registration, "defer system registration (for sampling)"), 606 INIT_PARAM(width, "cpu width"), 607 INIT_PARAM(function_trace, "Enable function trace"), 608 INIT_PARAM(function_trace_start, "Cycle to start function trace"), 609 INIT_PARAM(simulate_stalls, "Simulate cache stall cycles") 610 611END_INIT_SIM_OBJECT_PARAMS(AtomicSimpleCPU) 612 613 614CREATE_SIM_OBJECT(AtomicSimpleCPU) 615{ 616 AtomicSimpleCPU::Params *params = new AtomicSimpleCPU::Params(); 617 params->name = getInstanceName(); 618 params->numberOfThreads = 1; 619 params->max_insts_any_thread = max_insts_any_thread; 620 params->max_insts_all_threads = max_insts_all_threads; 621 params->max_loads_any_thread = max_loads_any_thread; 622 params->max_loads_all_threads = max_loads_all_threads; 623 params->progress_interval = progress_interval; 624 params->deferRegistration = defer_registration; 625 params->phase = phase; 626 params->clock = clock; 627 params->functionTrace = function_trace; 628 params->functionTraceStart = function_trace_start; 629 params->width = width; 630 params->simulate_stalls = simulate_stalls; 631 params->system = system; 632 params->cpu_id = cpu_id; 633 634#if FULL_SYSTEM 635 params->itb = itb; 636 params->dtb = dtb; 637 params->profile = profile; 638 params->do_quiesce = do_quiesce; 639 params->do_checkpoint_insts = do_checkpoint_insts; 640 params->do_statistics_insts = do_statistics_insts; 641#else 642 params->process = workload; 643#endif 644 645 AtomicSimpleCPU *cpu = new AtomicSimpleCPU(params); 646 return cpu; 647} 648 649REGISTER_SIM_OBJECT("AtomicSimpleCPU", AtomicSimpleCPU) 650 651