atomic.cc revision 9524
12086SN/A/* 22086SN/A * Copyright (c) 2012 ARM Limited 32086SN/A * All rights reserved. 42086SN/A * 52086SN/A * The license below extends only to copyright in the software and shall 62086SN/A * not be construed as granting a license to any other intellectual 72086SN/A * property including but not limited to intellectual property relating 82086SN/A * to a hardware implementation of the functionality of the software 92086SN/A * licensed hereunder. You may use the software subject to the license 102086SN/A * terms below provided that you ensure that this notice is replicated 112086SN/A * unmodified and in its entirety in all distributions of the software, 122086SN/A * modified or unmodified, in source code or in binary form. 132086SN/A * 142086SN/A * Copyright (c) 2002-2005 The Regents of The University of Michigan 152086SN/A * All rights reserved. 162086SN/A * 172086SN/A * Redistribution and use in source and binary forms, with or without 182086SN/A * modification, are permitted provided that the following conditions are 192086SN/A * met: redistributions of source code must retain the above copyright 202086SN/A * notice, this list of conditions and the following disclaimer; 212086SN/A * redistributions in binary form must reproduce the above copyright 222086SN/A * notice, this list of conditions and the following disclaimer in the 232086SN/A * documentation and/or other materials provided with the distribution; 242086SN/A * neither the name of the copyright holders nor the names of its 252086SN/A * contributors may be used to endorse or promote products derived from 262086SN/A * this software without specific prior written permission. 272086SN/A * 282665Ssaidi@eecs.umich.edu * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 292665Ssaidi@eecs.umich.edu * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 302665Ssaidi@eecs.umich.edu * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 312686Sksewell@umich.edu * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 322086SN/A * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 334202Sbinkertn@umich.edu * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 342086SN/A * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 354202Sbinkertn@umich.edu * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 364202Sbinkertn@umich.edu * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 374835Sgblack@eecs.umich.edu * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 384661Sksewell@umich.edu * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 394835Sgblack@eecs.umich.edu * 404202Sbinkertn@umich.edu * Authors: Steve Reinhardt 414661Sksewell@umich.edu */ 422086SN/A 434202Sbinkertn@umich.edu#include "arch/locked_mem.hh" 444202Sbinkertn@umich.edu#include "arch/mmapped_ipr.hh" 454202Sbinkertn@umich.edu#include "arch/utility.hh" 464202Sbinkertn@umich.edu#include "base/bigint.hh" 474202Sbinkertn@umich.edu#include "config/the_isa.hh" 482086SN/A#include "cpu/simple/atomic.hh" 494202Sbinkertn@umich.edu#include "cpu/exetrace.hh" 504202Sbinkertn@umich.edu#include "debug/Drain.hh" 512086SN/A#include "debug/ExecFaulting.hh" 524202Sbinkertn@umich.edu#include "debug/SimpleCPU.hh" 534202Sbinkertn@umich.edu#include "mem/packet.hh" 544202Sbinkertn@umich.edu#include "mem/packet_access.hh" 554202Sbinkertn@umich.edu#include "mem/physical.hh" 564202Sbinkertn@umich.edu#include "params/AtomicSimpleCPU.hh" 574202Sbinkertn@umich.edu#include "sim/faults.hh" 58#include "sim/system.hh" 59#include "sim/full_system.hh" 60 61using namespace std; 62using namespace TheISA; 63 64AtomicSimpleCPU::TickEvent::TickEvent(AtomicSimpleCPU *c) 65 : Event(CPU_Tick_Pri), cpu(c) 66{ 67} 68 69 70void 71AtomicSimpleCPU::TickEvent::process() 72{ 73 cpu->tick(); 74} 75 76const char * 77AtomicSimpleCPU::TickEvent::description() const 78{ 79 return "AtomicSimpleCPU tick"; 80} 81 82void 83AtomicSimpleCPU::init() 84{ 85 BaseCPU::init(); 86 87 // Initialise the ThreadContext's memory proxies 88 tcBase()->initMemProxies(tcBase()); 89 90 if (FullSystem && !params()->switched_out) { 91 ThreadID size = threadContexts.size(); 92 for (ThreadID i = 0; i < size; ++i) { 93 ThreadContext *tc = threadContexts[i]; 94 // initialize CPU, including PC 95 TheISA::initCPU(tc, tc->contextId()); 96 } 97 } 98 99 // Atomic doesn't do MT right now, so contextId == threadId 100 ifetch_req.setThreadContext(_cpuId, 0); // Add thread ID if we add MT 101 data_read_req.setThreadContext(_cpuId, 0); // Add thread ID here too 102 data_write_req.setThreadContext(_cpuId, 0); // Add thread ID here too 103} 104 105AtomicSimpleCPU::AtomicSimpleCPU(AtomicSimpleCPUParams *p) 106 : BaseSimpleCPU(p), tickEvent(this), width(p->width), locked(false), 107 simulate_data_stalls(p->simulate_data_stalls), 108 simulate_inst_stalls(p->simulate_inst_stalls), 109 drain_manager(NULL), 110 icachePort(name() + ".icache_port", this), 111 dcachePort(name() + ".dcache_port", this), 112 fastmem(p->fastmem) 113{ 114 _status = Idle; 115} 116 117 118AtomicSimpleCPU::~AtomicSimpleCPU() 119{ 120 if (tickEvent.scheduled()) { 121 deschedule(tickEvent); 122 } 123} 124 125unsigned int 126AtomicSimpleCPU::drain(DrainManager *dm) 127{ 128 assert(!drain_manager); 129 if (switchedOut()) 130 return 0; 131 132 if (!isDrained()) { 133 DPRINTF(Drain, "Requesting drain: %s\n", pcState()); 134 drain_manager = dm; 135 return 1; 136 } else { 137 if (tickEvent.scheduled()) 138 deschedule(tickEvent); 139 140 DPRINTF(Drain, "Not executing microcode, no need to drain.\n"); 141 return 0; 142 } 143} 144 145void 146AtomicSimpleCPU::drainResume() 147{ 148 assert(!tickEvent.scheduled()); 149 assert(!drain_manager); 150 if (switchedOut()) 151 return; 152 153 DPRINTF(SimpleCPU, "Resume\n"); 154 verifyMemoryMode(); 155 156 assert(!threadContexts.empty()); 157 if (threadContexts.size() > 1) 158 fatal("The atomic CPU only supports one thread.\n"); 159 160 if (thread->status() == ThreadContext::Active) { 161 schedule(tickEvent, nextCycle()); 162 _status = BaseSimpleCPU::Running; 163 } else { 164 _status = BaseSimpleCPU::Idle; 165 } 166 167 system->totalNumInsts = 0; 168} 169 170bool 171AtomicSimpleCPU::tryCompleteDrain() 172{ 173 if (!drain_manager) 174 return false; 175 176 DPRINTF(Drain, "tryCompleteDrain: %s\n", pcState()); 177 if (!isDrained()) 178 return false; 179 180 DPRINTF(Drain, "CPU done draining, processing drain event\n"); 181 drain_manager->signalDrainDone(); 182 drain_manager = NULL; 183 184 return true; 185} 186 187 188void 189AtomicSimpleCPU::switchOut() 190{ 191 BaseSimpleCPU::switchOut(); 192 193 assert(!tickEvent.scheduled()); 194 assert(_status == BaseSimpleCPU::Running || _status == Idle); 195 assert(isDrained()); 196} 197 198 199void 200AtomicSimpleCPU::takeOverFrom(BaseCPU *oldCPU) 201{ 202 BaseSimpleCPU::takeOverFrom(oldCPU); 203 204 // The tick event should have been descheduled by drain() 205 assert(!tickEvent.scheduled()); 206 207 ifetch_req.setThreadContext(_cpuId, 0); // Add thread ID if we add MT 208 data_read_req.setThreadContext(_cpuId, 0); // Add thread ID here too 209 data_write_req.setThreadContext(_cpuId, 0); // Add thread ID here too 210} 211 212void 213AtomicSimpleCPU::verifyMemoryMode() const 214{ 215 if (!system->isAtomicMode()) { 216 fatal("The atomic CPU requires the memory system to be in " 217 "'atomic' mode.\n"); 218 } 219} 220 221void 222AtomicSimpleCPU::activateContext(ThreadID thread_num, Cycles delay) 223{ 224 DPRINTF(SimpleCPU, "ActivateContext %d (%d cycles)\n", thread_num, delay); 225 226 assert(thread_num == 0); 227 assert(thread); 228 229 assert(_status == Idle); 230 assert(!tickEvent.scheduled()); 231 232 notIdleFraction++; 233 numCycles += ticksToCycles(thread->lastActivate - thread->lastSuspend); 234 235 //Make sure ticks are still on multiples of cycles 236 schedule(tickEvent, clockEdge(delay)); 237 _status = BaseSimpleCPU::Running; 238} 239 240 241void 242AtomicSimpleCPU::suspendContext(ThreadID thread_num) 243{ 244 DPRINTF(SimpleCPU, "SuspendContext %d\n", thread_num); 245 246 assert(thread_num == 0); 247 assert(thread); 248 249 if (_status == Idle) 250 return; 251 252 assert(_status == BaseSimpleCPU::Running); 253 254 // tick event may not be scheduled if this gets called from inside 255 // an instruction's execution, e.g. "quiesce" 256 if (tickEvent.scheduled()) 257 deschedule(tickEvent); 258 259 notIdleFraction--; 260 _status = Idle; 261} 262 263 264Fault 265AtomicSimpleCPU::readMem(Addr addr, uint8_t * data, 266 unsigned size, unsigned flags) 267{ 268 // use the CPU's statically allocated read request and packet objects 269 Request *req = &data_read_req; 270 271 if (traceData) { 272 traceData->setAddr(addr); 273 } 274 275 //The block size of our peer. 276 unsigned blockSize = dcachePort.peerBlockSize(); 277 //The size of the data we're trying to read. 278 int fullSize = size; 279 280 //The address of the second part of this access if it needs to be split 281 //across a cache line boundary. 282 Addr secondAddr = roundDown(addr + size - 1, blockSize); 283 284 if (secondAddr > addr) 285 size = secondAddr - addr; 286 287 dcache_latency = 0; 288 289 while (1) { 290 req->setVirt(0, addr, size, flags, dataMasterId(), thread->pcState().instAddr()); 291 292 // translate to physical address 293 Fault fault = thread->dtb->translateAtomic(req, tc, BaseTLB::Read); 294 295 // Now do the access. 296 if (fault == NoFault && !req->getFlags().isSet(Request::NO_ACCESS)) { 297 Packet pkt = Packet(req, 298 req->isLLSC() ? MemCmd::LoadLockedReq : 299 MemCmd::ReadReq); 300 pkt.dataStatic(data); 301 302 if (req->isMmappedIpr()) 303 dcache_latency += TheISA::handleIprRead(thread->getTC(), &pkt); 304 else { 305 if (fastmem && system->isMemAddr(pkt.getAddr())) 306 system->getPhysMem().access(&pkt); 307 else 308 dcache_latency += dcachePort.sendAtomic(&pkt); 309 } 310 dcache_access = true; 311 312 assert(!pkt.isError()); 313 314 if (req->isLLSC()) { 315 TheISA::handleLockedRead(thread, req); 316 } 317 } 318 319 //If there's a fault, return it 320 if (fault != NoFault) { 321 if (req->isPrefetch()) { 322 return NoFault; 323 } else { 324 return fault; 325 } 326 } 327 328 //If we don't need to access a second cache line, stop now. 329 if (secondAddr <= addr) 330 { 331 if (req->isLocked() && fault == NoFault) { 332 assert(!locked); 333 locked = true; 334 } 335 return fault; 336 } 337 338 /* 339 * Set up for accessing the second cache line. 340 */ 341 342 //Move the pointer we're reading into to the correct location. 343 data += size; 344 //Adjust the size to get the remaining bytes. 345 size = addr + fullSize - secondAddr; 346 //And access the right address. 347 addr = secondAddr; 348 } 349} 350 351 352Fault 353AtomicSimpleCPU::writeMem(uint8_t *data, unsigned size, 354 Addr addr, unsigned flags, uint64_t *res) 355{ 356 // use the CPU's statically allocated write request and packet objects 357 Request *req = &data_write_req; 358 359 if (traceData) { 360 traceData->setAddr(addr); 361 } 362 363 //The block size of our peer. 364 unsigned blockSize = dcachePort.peerBlockSize(); 365 //The size of the data we're trying to read. 366 int fullSize = size; 367 368 //The address of the second part of this access if it needs to be split 369 //across a cache line boundary. 370 Addr secondAddr = roundDown(addr + size - 1, blockSize); 371 372 if(secondAddr > addr) 373 size = secondAddr - addr; 374 375 dcache_latency = 0; 376 377 while(1) { 378 req->setVirt(0, addr, size, flags, dataMasterId(), thread->pcState().instAddr()); 379 380 // translate to physical address 381 Fault fault = thread->dtb->translateAtomic(req, tc, BaseTLB::Write); 382 383 // Now do the access. 384 if (fault == NoFault) { 385 MemCmd cmd = MemCmd::WriteReq; // default 386 bool do_access = true; // flag to suppress cache access 387 388 if (req->isLLSC()) { 389 cmd = MemCmd::StoreCondReq; 390 do_access = TheISA::handleLockedWrite(thread, req); 391 } else if (req->isSwap()) { 392 cmd = MemCmd::SwapReq; 393 if (req->isCondSwap()) { 394 assert(res); 395 req->setExtraData(*res); 396 } 397 } 398 399 if (do_access && !req->getFlags().isSet(Request::NO_ACCESS)) { 400 Packet pkt = Packet(req, cmd); 401 pkt.dataStatic(data); 402 403 if (req->isMmappedIpr()) { 404 dcache_latency += 405 TheISA::handleIprWrite(thread->getTC(), &pkt); 406 } else { 407 if (fastmem && system->isMemAddr(pkt.getAddr())) 408 system->getPhysMem().access(&pkt); 409 else 410 dcache_latency += dcachePort.sendAtomic(&pkt); 411 } 412 dcache_access = true; 413 assert(!pkt.isError()); 414 415 if (req->isSwap()) { 416 assert(res); 417 memcpy(res, pkt.getPtr<uint8_t>(), fullSize); 418 } 419 } 420 421 if (res && !req->isSwap()) { 422 *res = req->getExtraData(); 423 } 424 } 425 426 //If there's a fault or we don't need to access a second cache line, 427 //stop now. 428 if (fault != NoFault || secondAddr <= addr) 429 { 430 if (req->isLocked() && fault == NoFault) { 431 assert(locked); 432 locked = false; 433 } 434 if (fault != NoFault && req->isPrefetch()) { 435 return NoFault; 436 } else { 437 return fault; 438 } 439 } 440 441 /* 442 * Set up for accessing the second cache line. 443 */ 444 445 //Move the pointer we're reading into to the correct location. 446 data += size; 447 //Adjust the size to get the remaining bytes. 448 size = addr + fullSize - secondAddr; 449 //And access the right address. 450 addr = secondAddr; 451 } 452} 453 454 455void 456AtomicSimpleCPU::tick() 457{ 458 DPRINTF(SimpleCPU, "Tick\n"); 459 460 Tick latency = 0; 461 462 for (int i = 0; i < width || locked; ++i) { 463 numCycles++; 464 465 if (!curStaticInst || !curStaticInst->isDelayedCommit()) 466 checkForInterrupts(); 467 468 checkPcEventQueue(); 469 // We must have just got suspended by a PC event 470 if (_status == Idle) { 471 tryCompleteDrain(); 472 return; 473 } 474 475 Fault fault = NoFault; 476 477 TheISA::PCState pcState = thread->pcState(); 478 479 bool needToFetch = !isRomMicroPC(pcState.microPC()) && 480 !curMacroStaticInst; 481 if (needToFetch) { 482 setupFetchRequest(&ifetch_req); 483 fault = thread->itb->translateAtomic(&ifetch_req, tc, 484 BaseTLB::Execute); 485 } 486 487 if (fault == NoFault) { 488 Tick icache_latency = 0; 489 bool icache_access = false; 490 dcache_access = false; // assume no dcache access 491 492 if (needToFetch) { 493 // This is commented out because the decoder would act like 494 // a tiny cache otherwise. It wouldn't be flushed when needed 495 // like the I cache. It should be flushed, and when that works 496 // this code should be uncommented. 497 //Fetch more instruction memory if necessary 498 //if(decoder.needMoreBytes()) 499 //{ 500 icache_access = true; 501 Packet ifetch_pkt = Packet(&ifetch_req, MemCmd::ReadReq); 502 ifetch_pkt.dataStatic(&inst); 503 504 if (fastmem && system->isMemAddr(ifetch_pkt.getAddr())) 505 system->getPhysMem().access(&ifetch_pkt); 506 else 507 icache_latency = icachePort.sendAtomic(&ifetch_pkt); 508 509 assert(!ifetch_pkt.isError()); 510 511 // ifetch_req is initialized to read the instruction directly 512 // into the CPU object's inst field. 513 //} 514 } 515 516 preExecute(); 517 518 if (curStaticInst) { 519 fault = curStaticInst->execute(this, traceData); 520 521 // keep an instruction count 522 if (fault == NoFault) 523 countInst(); 524 else if (traceData && !DTRACE(ExecFaulting)) { 525 delete traceData; 526 traceData = NULL; 527 } 528 529 postExecute(); 530 } 531 532 // @todo remove me after debugging with legion done 533 if (curStaticInst && (!curStaticInst->isMicroop() || 534 curStaticInst->isFirstMicroop())) 535 instCnt++; 536 537 Tick stall_ticks = 0; 538 if (simulate_inst_stalls && icache_access) 539 stall_ticks += icache_latency; 540 541 if (simulate_data_stalls && dcache_access) 542 stall_ticks += dcache_latency; 543 544 if (stall_ticks) { 545 // the atomic cpu does its accounting in ticks, so 546 // keep counting in ticks but round to the clock 547 // period 548 latency += divCeil(stall_ticks, clockPeriod()) * 549 clockPeriod(); 550 } 551 552 } 553 if(fault != NoFault || !stayAtPC) 554 advancePC(fault); 555 } 556 557 if (tryCompleteDrain()) 558 return; 559 560 // instruction takes at least one cycle 561 if (latency < clockPeriod()) 562 latency = clockPeriod(); 563 564 if (_status != Idle) 565 schedule(tickEvent, curTick() + latency); 566} 567 568 569void 570AtomicSimpleCPU::printAddr(Addr a) 571{ 572 dcachePort.printAddr(a); 573} 574 575 576//////////////////////////////////////////////////////////////////////// 577// 578// AtomicSimpleCPU Simulation Object 579// 580AtomicSimpleCPU * 581AtomicSimpleCPUParams::create() 582{ 583 numThreads = 1; 584 if (!FullSystem && workload.size() != 1) 585 panic("only one workload allowed"); 586 return new AtomicSimpleCPU(this); 587} 588