atomic.cc revision 8707:489489c67fd9
1/* 2 * Copyright (c) 2002-2005 The Regents of The University of Michigan 3 * All rights reserved. 4 * 5 * Redistribution and use in source and binary forms, with or without 6 * modification, are permitted provided that the following conditions are 7 * met: redistributions of source code must retain the above copyright 8 * notice, this list of conditions and the following disclaimer; 9 * redistributions in binary form must reproduce the above copyright 10 * notice, this list of conditions and the following disclaimer in the 11 * documentation and/or other materials provided with the distribution; 12 * neither the name of the copyright holders nor the names of its 13 * contributors may be used to endorse or promote products derived from 14 * this software without specific prior written permission. 15 * 16 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 17 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 18 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 19 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 20 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 21 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 22 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 23 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 24 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 25 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 26 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 27 * 28 * Authors: Steve Reinhardt 29 */ 30 31#include "arch/locked_mem.hh" 32#include "arch/mmapped_ipr.hh" 33#include "arch/utility.hh" 34#include "base/bigint.hh" 35#include "config/the_isa.hh" 36#include "cpu/simple/atomic.hh" 37#include "cpu/exetrace.hh" 38#include "debug/ExecFaulting.hh" 39#include "debug/SimpleCPU.hh" 40#include "mem/packet.hh" 41#include "mem/packet_access.hh" 42#include "params/AtomicSimpleCPU.hh" 43#include "sim/faults.hh" 44#include "sim/system.hh" 45 46using namespace std; 47using namespace TheISA; 48 49AtomicSimpleCPU::TickEvent::TickEvent(AtomicSimpleCPU *c) 50 : Event(CPU_Tick_Pri), cpu(c) 51{ 52} 53 54 55void 56AtomicSimpleCPU::TickEvent::process() 57{ 58 cpu->tick(); 59} 60 61const char * 62AtomicSimpleCPU::TickEvent::description() const 63{ 64 return "AtomicSimpleCPU tick"; 65} 66 67Port * 68AtomicSimpleCPU::getPort(const string &if_name, int idx) 69{ 70 if (if_name == "dcache_port") 71 return &dcachePort; 72 else if (if_name == "icache_port") 73 return &icachePort; 74 else if (if_name == "physmem_port") { 75 hasPhysMemPort = true; 76 return &physmemPort; 77 } 78 else 79 panic("No Such Port\n"); 80} 81 82void 83AtomicSimpleCPU::init() 84{ 85 BaseCPU::init(); 86#if FULL_SYSTEM 87 ThreadID size = threadContexts.size(); 88 for (ThreadID i = 0; i < size; ++i) { 89 ThreadContext *tc = threadContexts[i]; 90 91 // initialize CPU, including PC 92 TheISA::initCPU(tc, tc->contextId()); 93 } 94 95 // Initialise the ThreadContext's memory proxies 96 tcBase()->initMemProxies(tcBase()); 97#endif 98 if (hasPhysMemPort) { 99 bool snoop = false; 100 AddrRangeList pmAddrList; 101 physmemPort.getPeerAddressRanges(pmAddrList, snoop); 102 physMemAddr = *pmAddrList.begin(); 103 } 104 // Atomic doesn't do MT right now, so contextId == threadId 105 ifetch_req.setThreadContext(_cpuId, 0); // Add thread ID if we add MT 106 data_read_req.setThreadContext(_cpuId, 0); // Add thread ID here too 107 data_write_req.setThreadContext(_cpuId, 0); // Add thread ID here too 108} 109 110AtomicSimpleCPU::AtomicSimpleCPU(AtomicSimpleCPUParams *p) 111 : BaseSimpleCPU(p), tickEvent(this), width(p->width), locked(false), 112 simulate_data_stalls(p->simulate_data_stalls), 113 simulate_inst_stalls(p->simulate_inst_stalls), 114 icachePort(name() + "-iport", this), dcachePort(name() + "-iport", this), 115 physmemPort(name() + "-iport", this), hasPhysMemPort(false) 116{ 117 _status = Idle; 118} 119 120 121AtomicSimpleCPU::~AtomicSimpleCPU() 122{ 123 if (tickEvent.scheduled()) { 124 deschedule(tickEvent); 125 } 126} 127 128void 129AtomicSimpleCPU::serialize(ostream &os) 130{ 131 SimObject::State so_state = SimObject::getState(); 132 SERIALIZE_ENUM(so_state); 133 SERIALIZE_SCALAR(locked); 134 BaseSimpleCPU::serialize(os); 135 nameOut(os, csprintf("%s.tickEvent", name())); 136 tickEvent.serialize(os); 137} 138 139void 140AtomicSimpleCPU::unserialize(Checkpoint *cp, const string §ion) 141{ 142 SimObject::State so_state; 143 UNSERIALIZE_ENUM(so_state); 144 UNSERIALIZE_SCALAR(locked); 145 BaseSimpleCPU::unserialize(cp, section); 146 tickEvent.unserialize(cp, csprintf("%s.tickEvent", section)); 147} 148 149void 150AtomicSimpleCPU::resume() 151{ 152 if (_status == Idle || _status == SwitchedOut) 153 return; 154 155 DPRINTF(SimpleCPU, "Resume\n"); 156 assert(system->getMemoryMode() == Enums::atomic); 157 158 changeState(SimObject::Running); 159 if (thread->status() == ThreadContext::Active) { 160 if (!tickEvent.scheduled()) 161 schedule(tickEvent, nextCycle()); 162 } 163 system->totalNumInsts = 0; 164} 165 166void 167AtomicSimpleCPU::switchOut() 168{ 169 assert(_status == Running || _status == Idle); 170 _status = SwitchedOut; 171 172 tickEvent.squash(); 173} 174 175 176void 177AtomicSimpleCPU::takeOverFrom(BaseCPU *oldCPU) 178{ 179 BaseCPU::takeOverFrom(oldCPU, &icachePort, &dcachePort); 180 181 assert(!tickEvent.scheduled()); 182 183 // if any of this CPU's ThreadContexts are active, mark the CPU as 184 // running and schedule its tick event. 185 ThreadID size = threadContexts.size(); 186 for (ThreadID i = 0; i < size; ++i) { 187 ThreadContext *tc = threadContexts[i]; 188 if (tc->status() == ThreadContext::Active && _status != Running) { 189 _status = Running; 190 schedule(tickEvent, nextCycle()); 191 break; 192 } 193 } 194 if (_status != Running) { 195 _status = Idle; 196 } 197 assert(threadContexts.size() == 1); 198 ifetch_req.setThreadContext(_cpuId, 0); // Add thread ID if we add MT 199 data_read_req.setThreadContext(_cpuId, 0); // Add thread ID here too 200 data_write_req.setThreadContext(_cpuId, 0); // Add thread ID here too 201} 202 203 204void 205AtomicSimpleCPU::activateContext(int thread_num, int delay) 206{ 207 DPRINTF(SimpleCPU, "ActivateContext %d (%d cycles)\n", thread_num, delay); 208 209 assert(thread_num == 0); 210 assert(thread); 211 212 assert(_status == Idle); 213 assert(!tickEvent.scheduled()); 214 215 notIdleFraction++; 216 numCycles += tickToCycles(thread->lastActivate - thread->lastSuspend); 217 218 //Make sure ticks are still on multiples of cycles 219 schedule(tickEvent, nextCycle(curTick() + ticks(delay))); 220 _status = Running; 221} 222 223 224void 225AtomicSimpleCPU::suspendContext(int thread_num) 226{ 227 DPRINTF(SimpleCPU, "SuspendContext %d\n", thread_num); 228 229 assert(thread_num == 0); 230 assert(thread); 231 232 if (_status == Idle) 233 return; 234 235 assert(_status == Running); 236 237 // tick event may not be scheduled if this gets called from inside 238 // an instruction's execution, e.g. "quiesce" 239 if (tickEvent.scheduled()) 240 deschedule(tickEvent); 241 242 notIdleFraction--; 243 _status = Idle; 244} 245 246 247Fault 248AtomicSimpleCPU::readMem(Addr addr, uint8_t * data, 249 unsigned size, unsigned flags) 250{ 251 // use the CPU's statically allocated read request and packet objects 252 Request *req = &data_read_req; 253 254 if (traceData) { 255 traceData->setAddr(addr); 256 } 257 258 //The block size of our peer. 259 unsigned blockSize = dcachePort.peerBlockSize(); 260 //The size of the data we're trying to read. 261 int fullSize = size; 262 263 //The address of the second part of this access if it needs to be split 264 //across a cache line boundary. 265 Addr secondAddr = roundDown(addr + size - 1, blockSize); 266 267 if (secondAddr > addr) 268 size = secondAddr - addr; 269 270 dcache_latency = 0; 271 272 while (1) { 273 req->setVirt(0, addr, size, flags, thread->pcState().instAddr()); 274 275 // translate to physical address 276 Fault fault = thread->dtb->translateAtomic(req, tc, BaseTLB::Read); 277 278 // Now do the access. 279 if (fault == NoFault && !req->getFlags().isSet(Request::NO_ACCESS)) { 280 Packet pkt = Packet(req, 281 req->isLLSC() ? MemCmd::LoadLockedReq : MemCmd::ReadReq, 282 Packet::Broadcast); 283 pkt.dataStatic(data); 284 285 if (req->isMmappedIpr()) 286 dcache_latency += TheISA::handleIprRead(thread->getTC(), &pkt); 287 else { 288 if (hasPhysMemPort && pkt.getAddr() == physMemAddr) 289 dcache_latency += physmemPort.sendAtomic(&pkt); 290 else 291 dcache_latency += dcachePort.sendAtomic(&pkt); 292 } 293 dcache_access = true; 294 295 assert(!pkt.isError()); 296 297 if (req->isLLSC()) { 298 TheISA::handleLockedRead(thread, req); 299 } 300 } 301 302 //If there's a fault, return it 303 if (fault != NoFault) { 304 if (req->isPrefetch()) { 305 return NoFault; 306 } else { 307 return fault; 308 } 309 } 310 311 //If we don't need to access a second cache line, stop now. 312 if (secondAddr <= addr) 313 { 314 if (req->isLocked() && fault == NoFault) { 315 assert(!locked); 316 locked = true; 317 } 318 return fault; 319 } 320 321 /* 322 * Set up for accessing the second cache line. 323 */ 324 325 //Move the pointer we're reading into to the correct location. 326 data += size; 327 //Adjust the size to get the remaining bytes. 328 size = addr + fullSize - secondAddr; 329 //And access the right address. 330 addr = secondAddr; 331 } 332} 333 334 335Fault 336AtomicSimpleCPU::writeMem(uint8_t *data, unsigned size, 337 Addr addr, unsigned flags, uint64_t *res) 338{ 339 // use the CPU's statically allocated write request and packet objects 340 Request *req = &data_write_req; 341 342 if (traceData) { 343 traceData->setAddr(addr); 344 } 345 346 //The block size of our peer. 347 unsigned blockSize = dcachePort.peerBlockSize(); 348 //The size of the data we're trying to read. 349 int fullSize = size; 350 351 //The address of the second part of this access if it needs to be split 352 //across a cache line boundary. 353 Addr secondAddr = roundDown(addr + size - 1, blockSize); 354 355 if(secondAddr > addr) 356 size = secondAddr - addr; 357 358 dcache_latency = 0; 359 360 while(1) { 361 req->setVirt(0, addr, size, flags, thread->pcState().instAddr()); 362 363 // translate to physical address 364 Fault fault = thread->dtb->translateAtomic(req, tc, BaseTLB::Write); 365 366 // Now do the access. 367 if (fault == NoFault) { 368 MemCmd cmd = MemCmd::WriteReq; // default 369 bool do_access = true; // flag to suppress cache access 370 371 if (req->isLLSC()) { 372 cmd = MemCmd::StoreCondReq; 373 do_access = TheISA::handleLockedWrite(thread, req); 374 } else if (req->isSwap()) { 375 cmd = MemCmd::SwapReq; 376 if (req->isCondSwap()) { 377 assert(res); 378 req->setExtraData(*res); 379 } 380 } 381 382 if (do_access && !req->getFlags().isSet(Request::NO_ACCESS)) { 383 Packet pkt = Packet(req, cmd, Packet::Broadcast); 384 pkt.dataStatic(data); 385 386 if (req->isMmappedIpr()) { 387 dcache_latency += 388 TheISA::handleIprWrite(thread->getTC(), &pkt); 389 } else { 390 if (hasPhysMemPort && pkt.getAddr() == physMemAddr) 391 dcache_latency += physmemPort.sendAtomic(&pkt); 392 else 393 dcache_latency += dcachePort.sendAtomic(&pkt); 394 } 395 dcache_access = true; 396 assert(!pkt.isError()); 397 398 if (req->isSwap()) { 399 assert(res); 400 memcpy(res, pkt.getPtr<uint8_t>(), fullSize); 401 } 402 } 403 404 if (res && !req->isSwap()) { 405 *res = req->getExtraData(); 406 } 407 } 408 409 //If there's a fault or we don't need to access a second cache line, 410 //stop now. 411 if (fault != NoFault || secondAddr <= addr) 412 { 413 if (req->isLocked() && fault == NoFault) { 414 assert(locked); 415 locked = false; 416 } 417 if (fault != NoFault && req->isPrefetch()) { 418 return NoFault; 419 } else { 420 return fault; 421 } 422 } 423 424 /* 425 * Set up for accessing the second cache line. 426 */ 427 428 //Move the pointer we're reading into to the correct location. 429 data += size; 430 //Adjust the size to get the remaining bytes. 431 size = addr + fullSize - secondAddr; 432 //And access the right address. 433 addr = secondAddr; 434 } 435} 436 437 438void 439AtomicSimpleCPU::tick() 440{ 441 DPRINTF(SimpleCPU, "Tick\n"); 442 443 Tick latency = 0; 444 445 for (int i = 0; i < width || locked; ++i) { 446 numCycles++; 447 448 if (!curStaticInst || !curStaticInst->isDelayedCommit()) 449 checkForInterrupts(); 450 451 checkPcEventQueue(); 452 // We must have just got suspended by a PC event 453 if (_status == Idle) 454 return; 455 456 Fault fault = NoFault; 457 458 TheISA::PCState pcState = thread->pcState(); 459 460 bool needToFetch = !isRomMicroPC(pcState.microPC()) && 461 !curMacroStaticInst; 462 if (needToFetch) { 463 setupFetchRequest(&ifetch_req); 464 fault = thread->itb->translateAtomic(&ifetch_req, tc, 465 BaseTLB::Execute); 466 } 467 468 if (fault == NoFault) { 469 Tick icache_latency = 0; 470 bool icache_access = false; 471 dcache_access = false; // assume no dcache access 472 473 if (needToFetch) { 474 // This is commented out because the predecoder would act like 475 // a tiny cache otherwise. It wouldn't be flushed when needed 476 // like the I cache. It should be flushed, and when that works 477 // this code should be uncommented. 478 //Fetch more instruction memory if necessary 479 //if(predecoder.needMoreBytes()) 480 //{ 481 icache_access = true; 482 Packet ifetch_pkt = Packet(&ifetch_req, MemCmd::ReadReq, 483 Packet::Broadcast); 484 ifetch_pkt.dataStatic(&inst); 485 486 if (hasPhysMemPort && ifetch_pkt.getAddr() == physMemAddr) 487 icache_latency = physmemPort.sendAtomic(&ifetch_pkt); 488 else 489 icache_latency = icachePort.sendAtomic(&ifetch_pkt); 490 491 assert(!ifetch_pkt.isError()); 492 493 // ifetch_req is initialized to read the instruction directly 494 // into the CPU object's inst field. 495 //} 496 } 497 498 preExecute(); 499 500 if (curStaticInst) { 501 fault = curStaticInst->execute(this, traceData); 502 503 // keep an instruction count 504 if (fault == NoFault) 505 countInst(); 506 else if (traceData && !DTRACE(ExecFaulting)) { 507 delete traceData; 508 traceData = NULL; 509 } 510 511 postExecute(); 512 } 513 514 // @todo remove me after debugging with legion done 515 if (curStaticInst && (!curStaticInst->isMicroop() || 516 curStaticInst->isFirstMicroop())) 517 instCnt++; 518 519 Tick stall_ticks = 0; 520 if (simulate_inst_stalls && icache_access) 521 stall_ticks += icache_latency; 522 523 if (simulate_data_stalls && dcache_access) 524 stall_ticks += dcache_latency; 525 526 if (stall_ticks) { 527 Tick stall_cycles = stall_ticks / ticks(1); 528 Tick aligned_stall_ticks = ticks(stall_cycles); 529 530 if (aligned_stall_ticks < stall_ticks) 531 aligned_stall_ticks += 1; 532 533 latency += aligned_stall_ticks; 534 } 535 536 } 537 if(fault != NoFault || !stayAtPC) 538 advancePC(fault); 539 } 540 541 // instruction takes at least one cycle 542 if (latency < ticks(1)) 543 latency = ticks(1); 544 545 if (_status != Idle) 546 schedule(tickEvent, curTick() + latency); 547} 548 549 550void 551AtomicSimpleCPU::printAddr(Addr a) 552{ 553 dcachePort.printAddr(a); 554} 555 556 557//////////////////////////////////////////////////////////////////////// 558// 559// AtomicSimpleCPU Simulation Object 560// 561AtomicSimpleCPU * 562AtomicSimpleCPUParams::create() 563{ 564 numThreads = 1; 565#if !FULL_SYSTEM 566 if (workload.size() != 1) 567 panic("only one workload allowed"); 568#endif 569 return new AtomicSimpleCPU(this); 570} 571