GPUCoalescer.cc revision 11308
111308Santhony.gutierrez@amd.com/*
211308Santhony.gutierrez@amd.com * Copyright (c) 2013-2015 Advanced Micro Devices, Inc.
311308Santhony.gutierrez@amd.com * All rights reserved.
411308Santhony.gutierrez@amd.com *
511308Santhony.gutierrez@amd.com * For use for simulation and test purposes only
611308Santhony.gutierrez@amd.com *
711308Santhony.gutierrez@amd.com * Redistribution and use in source and binary forms, with or without
811308Santhony.gutierrez@amd.com * modification, are permitted provided that the following conditions are met:
911308Santhony.gutierrez@amd.com *
1011308Santhony.gutierrez@amd.com * 1. Redistributions of source code must retain the above copyright notice,
1111308Santhony.gutierrez@amd.com * this list of conditions and the following disclaimer.
1211308Santhony.gutierrez@amd.com *
1311308Santhony.gutierrez@amd.com * 2. Redistributions in binary form must reproduce the above copyright notice,
1411308Santhony.gutierrez@amd.com * this list of conditions and the following disclaimer in the documentation
1511308Santhony.gutierrez@amd.com * and/or other materials provided with the distribution.
1611308Santhony.gutierrez@amd.com *
1711308Santhony.gutierrez@amd.com * 3. Neither the name of the copyright holder nor the names of its contributors
1811308Santhony.gutierrez@amd.com * may be used to endorse or promote products derived from this software
1911308Santhony.gutierrez@amd.com * without specific prior written permission.
2011308Santhony.gutierrez@amd.com *
2111308Santhony.gutierrez@amd.com * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
2211308Santhony.gutierrez@amd.com * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
2311308Santhony.gutierrez@amd.com * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
2411308Santhony.gutierrez@amd.com * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
2511308Santhony.gutierrez@amd.com * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
2611308Santhony.gutierrez@amd.com * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
2711308Santhony.gutierrez@amd.com * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
2811308Santhony.gutierrez@amd.com * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
2911308Santhony.gutierrez@amd.com * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
3011308Santhony.gutierrez@amd.com * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
3111308Santhony.gutierrez@amd.com * POSSIBILITY OF SUCH DAMAGE.
3211308Santhony.gutierrez@amd.com *
3311308Santhony.gutierrez@amd.com * Author: Sooraj Puthoor
3411308Santhony.gutierrez@amd.com */
3511308Santhony.gutierrez@amd.com
3611308Santhony.gutierrez@amd.com#include "base/misc.hh"
3711308Santhony.gutierrez@amd.com#include "base/str.hh"
3811308Santhony.gutierrez@amd.com#include "config/the_isa.hh"
3911308Santhony.gutierrez@amd.com
4011308Santhony.gutierrez@amd.com#if THE_ISA == X86_ISA
4111308Santhony.gutierrez@amd.com#include "arch/x86/insts/microldstop.hh"
4211308Santhony.gutierrez@amd.com
4311308Santhony.gutierrez@amd.com#endif // X86_ISA
4411308Santhony.gutierrez@amd.com#include "mem/ruby/system/GPUCoalescer.hh"
4511308Santhony.gutierrez@amd.com
4611308Santhony.gutierrez@amd.com#include "cpu/testers/rubytest/RubyTester.hh"
4711308Santhony.gutierrez@amd.com#include "debug/GPUCoalescer.hh"
4811308Santhony.gutierrez@amd.com#include "debug/MemoryAccess.hh"
4911308Santhony.gutierrez@amd.com#include "debug/ProtocolTrace.hh"
5011308Santhony.gutierrez@amd.com#include "debug/RubyPort.hh"
5111308Santhony.gutierrez@amd.com#include "debug/RubyStats.hh"
5211308Santhony.gutierrez@amd.com#include "gpu-compute/shader.hh"
5311308Santhony.gutierrez@amd.com#include "mem/packet.hh"
5411308Santhony.gutierrez@amd.com#include "mem/ruby/common/DataBlock.hh"
5511308Santhony.gutierrez@amd.com#include "mem/ruby/common/SubBlock.hh"
5611308Santhony.gutierrez@amd.com#include "mem/ruby/network/MessageBuffer.hh"
5711308Santhony.gutierrez@amd.com#include "mem/ruby/profiler/Profiler.hh"
5811308Santhony.gutierrez@amd.com#include "mem/ruby/slicc_interface/AbstractController.hh"
5911308Santhony.gutierrez@amd.com#include "mem/ruby/slicc_interface/RubyRequest.hh"
6011308Santhony.gutierrez@amd.com#include "mem/ruby/structures/CacheMemory.hh"
6111308Santhony.gutierrez@amd.com#include "mem/ruby/system/RubySystem.hh"
6211308Santhony.gutierrez@amd.com#include "params/RubyGPUCoalescer.hh"
6311308Santhony.gutierrez@amd.com
6411308Santhony.gutierrez@amd.comusing namespace std;
6511308Santhony.gutierrez@amd.com
6611308Santhony.gutierrez@amd.comGPUCoalescer *
6711308Santhony.gutierrez@amd.comRubyGPUCoalescerParams::create()
6811308Santhony.gutierrez@amd.com{
6911308Santhony.gutierrez@amd.com    return new GPUCoalescer(this);
7011308Santhony.gutierrez@amd.com}
7111308Santhony.gutierrez@amd.com
7211308Santhony.gutierrez@amd.comHSAScope
7311308Santhony.gutierrez@amd.comreqScopeToHSAScope(Request* req)
7411308Santhony.gutierrez@amd.com{
7511308Santhony.gutierrez@amd.com    HSAScope accessScope = HSAScope_UNSPECIFIED;
7611308Santhony.gutierrez@amd.com    if (req->isScoped()) {
7711308Santhony.gutierrez@amd.com        if (req->isWavefrontScope()) {
7811308Santhony.gutierrez@amd.com            accessScope = HSAScope_WAVEFRONT;
7911308Santhony.gutierrez@amd.com        } else if (req->isWorkgroupScope()) {
8011308Santhony.gutierrez@amd.com            accessScope = HSAScope_WORKGROUP;
8111308Santhony.gutierrez@amd.com        } else if (req->isDeviceScope()) {
8211308Santhony.gutierrez@amd.com            accessScope = HSAScope_DEVICE;
8311308Santhony.gutierrez@amd.com        } else if (req->isSystemScope()) {
8411308Santhony.gutierrez@amd.com            accessScope = HSAScope_SYSTEM;
8511308Santhony.gutierrez@amd.com        } else {
8611308Santhony.gutierrez@amd.com            fatal("Bad scope type");
8711308Santhony.gutierrez@amd.com        }
8811308Santhony.gutierrez@amd.com    }
8911308Santhony.gutierrez@amd.com    return accessScope;
9011308Santhony.gutierrez@amd.com}
9111308Santhony.gutierrez@amd.com
9211308Santhony.gutierrez@amd.comHSASegment
9311308Santhony.gutierrez@amd.comreqSegmentToHSASegment(Request* req)
9411308Santhony.gutierrez@amd.com{
9511308Santhony.gutierrez@amd.com    HSASegment accessSegment = HSASegment_GLOBAL;
9611308Santhony.gutierrez@amd.com
9711308Santhony.gutierrez@amd.com    if (req->isGlobalSegment()) {
9811308Santhony.gutierrez@amd.com        accessSegment = HSASegment_GLOBAL;
9911308Santhony.gutierrez@amd.com    } else if (req->isGroupSegment()) {
10011308Santhony.gutierrez@amd.com        accessSegment = HSASegment_GROUP;
10111308Santhony.gutierrez@amd.com    } else if (req->isPrivateSegment()) {
10211308Santhony.gutierrez@amd.com        accessSegment = HSASegment_PRIVATE;
10311308Santhony.gutierrez@amd.com    } else if (req->isKernargSegment()) {
10411308Santhony.gutierrez@amd.com        accessSegment = HSASegment_KERNARG;
10511308Santhony.gutierrez@amd.com    } else if (req->isReadonlySegment()) {
10611308Santhony.gutierrez@amd.com        accessSegment = HSASegment_READONLY;
10711308Santhony.gutierrez@amd.com    } else if (req->isSpillSegment()) {
10811308Santhony.gutierrez@amd.com        accessSegment = HSASegment_SPILL;
10911308Santhony.gutierrez@amd.com    } else if (req->isArgSegment()) {
11011308Santhony.gutierrez@amd.com        accessSegment = HSASegment_ARG;
11111308Santhony.gutierrez@amd.com    } else {
11211308Santhony.gutierrez@amd.com        fatal("Bad segment type");
11311308Santhony.gutierrez@amd.com    }
11411308Santhony.gutierrez@amd.com
11511308Santhony.gutierrez@amd.com    return accessSegment;
11611308Santhony.gutierrez@amd.com}
11711308Santhony.gutierrez@amd.com
11811308Santhony.gutierrez@amd.comGPUCoalescer::GPUCoalescer(const Params *p)
11911308Santhony.gutierrez@amd.com    : RubyPort(p), issueEvent(this), deadlockCheckEvent(this)
12011308Santhony.gutierrez@amd.com{
12111308Santhony.gutierrez@amd.com    m_store_waiting_on_load_cycles = 0;
12211308Santhony.gutierrez@amd.com    m_store_waiting_on_store_cycles = 0;
12311308Santhony.gutierrez@amd.com    m_load_waiting_on_store_cycles = 0;
12411308Santhony.gutierrez@amd.com    m_load_waiting_on_load_cycles = 0;
12511308Santhony.gutierrez@amd.com
12611308Santhony.gutierrez@amd.com    m_outstanding_count = 0;
12711308Santhony.gutierrez@amd.com
12811308Santhony.gutierrez@amd.com    m_max_outstanding_requests = 0;
12911308Santhony.gutierrez@amd.com    m_deadlock_threshold = 0;
13011308Santhony.gutierrez@amd.com    m_instCache_ptr = nullptr;
13111308Santhony.gutierrez@amd.com    m_dataCache_ptr = nullptr;
13211308Santhony.gutierrez@amd.com
13311308Santhony.gutierrez@amd.com    m_instCache_ptr = p->icache;
13411308Santhony.gutierrez@amd.com    m_dataCache_ptr = p->dcache;
13511308Santhony.gutierrez@amd.com    m_max_outstanding_requests = p->max_outstanding_requests;
13611308Santhony.gutierrez@amd.com    m_deadlock_threshold = p->deadlock_threshold;
13711308Santhony.gutierrez@amd.com
13811308Santhony.gutierrez@amd.com    assert(m_max_outstanding_requests > 0);
13911308Santhony.gutierrez@amd.com    assert(m_deadlock_threshold > 0);
14011308Santhony.gutierrez@amd.com    assert(m_instCache_ptr);
14111308Santhony.gutierrez@amd.com    assert(m_dataCache_ptr);
14211308Santhony.gutierrez@amd.com
14311308Santhony.gutierrez@amd.com    m_data_cache_hit_latency = p->dcache_hit_latency;
14411308Santhony.gutierrez@amd.com
14511308Santhony.gutierrez@amd.com    m_usingNetworkTester = p->using_network_tester;
14611308Santhony.gutierrez@amd.com    assumingRfOCoherence = p->assume_rfo;
14711308Santhony.gutierrez@amd.com}
14811308Santhony.gutierrez@amd.com
14911308Santhony.gutierrez@amd.comGPUCoalescer::~GPUCoalescer()
15011308Santhony.gutierrez@amd.com{
15111308Santhony.gutierrez@amd.com}
15211308Santhony.gutierrez@amd.com
15311308Santhony.gutierrez@amd.comvoid
15411308Santhony.gutierrez@amd.comGPUCoalescer::wakeup()
15511308Santhony.gutierrez@amd.com{
15611308Santhony.gutierrez@amd.com    // Check for deadlock of any of the requests
15711308Santhony.gutierrez@amd.com    Cycles current_time = curCycle();
15811308Santhony.gutierrez@amd.com
15911308Santhony.gutierrez@amd.com    // Check across all outstanding requests
16011308Santhony.gutierrez@amd.com    int total_outstanding = 0;
16111308Santhony.gutierrez@amd.com
16211308Santhony.gutierrez@amd.com    RequestTable::iterator read = m_readRequestTable.begin();
16311308Santhony.gutierrez@amd.com    RequestTable::iterator read_end = m_readRequestTable.end();
16411308Santhony.gutierrez@amd.com    for (; read != read_end; ++read) {
16511308Santhony.gutierrez@amd.com        GPUCoalescerRequest* request = read->second;
16611308Santhony.gutierrez@amd.com        if (current_time - request->issue_time < m_deadlock_threshold)
16711308Santhony.gutierrez@amd.com            continue;
16811308Santhony.gutierrez@amd.com
16911308Santhony.gutierrez@amd.com        panic("Possible Deadlock detected. Aborting!\n"
17011308Santhony.gutierrez@amd.com             "version: %d request.paddr: 0x%x m_readRequestTable: %d "
17111308Santhony.gutierrez@amd.com             "current time: %u issue_time: %d difference: %d\n", m_version,
17211308Santhony.gutierrez@amd.com              request->pkt->getAddr(), m_readRequestTable.size(),
17311308Santhony.gutierrez@amd.com              current_time * clockPeriod(), request->issue_time * clockPeriod(),
17411308Santhony.gutierrez@amd.com              (current_time - request->issue_time)*clockPeriod());
17511308Santhony.gutierrez@amd.com    }
17611308Santhony.gutierrez@amd.com
17711308Santhony.gutierrez@amd.com    RequestTable::iterator write = m_writeRequestTable.begin();
17811308Santhony.gutierrez@amd.com    RequestTable::iterator write_end = m_writeRequestTable.end();
17911308Santhony.gutierrez@amd.com    for (; write != write_end; ++write) {
18011308Santhony.gutierrez@amd.com        GPUCoalescerRequest* request = write->second;
18111308Santhony.gutierrez@amd.com        if (current_time - request->issue_time < m_deadlock_threshold)
18211308Santhony.gutierrez@amd.com            continue;
18311308Santhony.gutierrez@amd.com
18411308Santhony.gutierrez@amd.com        panic("Possible Deadlock detected. Aborting!\n"
18511308Santhony.gutierrez@amd.com             "version: %d request.paddr: 0x%x m_writeRequestTable: %d "
18611308Santhony.gutierrez@amd.com             "current time: %u issue_time: %d difference: %d\n", m_version,
18711308Santhony.gutierrez@amd.com              request->pkt->getAddr(), m_writeRequestTable.size(),
18811308Santhony.gutierrez@amd.com              current_time * clockPeriod(), request->issue_time * clockPeriod(),
18911308Santhony.gutierrez@amd.com              (current_time - request->issue_time) * clockPeriod());
19011308Santhony.gutierrez@amd.com    }
19111308Santhony.gutierrez@amd.com
19211308Santhony.gutierrez@amd.com    total_outstanding += m_writeRequestTable.size();
19311308Santhony.gutierrez@amd.com    total_outstanding += m_readRequestTable.size();
19411308Santhony.gutierrez@amd.com
19511308Santhony.gutierrez@amd.com    assert(m_outstanding_count == total_outstanding);
19611308Santhony.gutierrez@amd.com
19711308Santhony.gutierrez@amd.com    if (m_outstanding_count > 0) {
19811308Santhony.gutierrez@amd.com        // If there are still outstanding requests, keep checking
19911308Santhony.gutierrez@amd.com        schedule(deadlockCheckEvent,
20011308Santhony.gutierrez@amd.com                 m_deadlock_threshold * clockPeriod() +
20111308Santhony.gutierrez@amd.com                 curTick());
20211308Santhony.gutierrez@amd.com    }
20311308Santhony.gutierrez@amd.com}
20411308Santhony.gutierrez@amd.com
20511308Santhony.gutierrez@amd.comvoid
20611308Santhony.gutierrez@amd.comGPUCoalescer::resetStats()
20711308Santhony.gutierrez@amd.com{
20811308Santhony.gutierrez@amd.com    m_latencyHist.reset();
20911308Santhony.gutierrez@amd.com    m_missLatencyHist.reset();
21011308Santhony.gutierrez@amd.com    for (int i = 0; i < RubyRequestType_NUM; i++) {
21111308Santhony.gutierrez@amd.com        m_typeLatencyHist[i]->reset();
21211308Santhony.gutierrez@amd.com        m_missTypeLatencyHist[i]->reset();
21311308Santhony.gutierrez@amd.com        for (int j = 0; j < MachineType_NUM; j++) {
21411308Santhony.gutierrez@amd.com            m_missTypeMachLatencyHist[i][j]->reset();
21511308Santhony.gutierrez@amd.com        }
21611308Santhony.gutierrez@amd.com    }
21711308Santhony.gutierrez@amd.com
21811308Santhony.gutierrez@amd.com    for (int i = 0; i < MachineType_NUM; i++) {
21911308Santhony.gutierrez@amd.com        m_missMachLatencyHist[i]->reset();
22011308Santhony.gutierrez@amd.com
22111308Santhony.gutierrez@amd.com        m_IssueToInitialDelayHist[i]->reset();
22211308Santhony.gutierrez@amd.com        m_InitialToForwardDelayHist[i]->reset();
22311308Santhony.gutierrez@amd.com        m_ForwardToFirstResponseDelayHist[i]->reset();
22411308Santhony.gutierrez@amd.com        m_FirstResponseToCompletionDelayHist[i]->reset();
22511308Santhony.gutierrez@amd.com    }
22611308Santhony.gutierrez@amd.com}
22711308Santhony.gutierrez@amd.com
22811308Santhony.gutierrez@amd.comvoid
22911308Santhony.gutierrez@amd.comGPUCoalescer::printProgress(ostream& out) const
23011308Santhony.gutierrez@amd.com{
23111308Santhony.gutierrez@amd.com}
23211308Santhony.gutierrez@amd.com
23311308Santhony.gutierrez@amd.comRequestStatus
23411308Santhony.gutierrez@amd.comGPUCoalescer::getRequestStatus(PacketPtr pkt, RubyRequestType request_type)
23511308Santhony.gutierrez@amd.com{
23611308Santhony.gutierrez@amd.com    Addr line_addr = makeLineAddress(pkt->getAddr());
23711308Santhony.gutierrez@amd.com
23811308Santhony.gutierrez@amd.com    if (!m_mandatory_q_ptr->areNSlotsAvailable(1, clockEdge())) {
23911308Santhony.gutierrez@amd.com        return RequestStatus_BufferFull;
24011308Santhony.gutierrez@amd.com    }
24111308Santhony.gutierrez@amd.com
24211308Santhony.gutierrez@amd.com    if(m_controller->isBlocked(line_addr) &&
24311308Santhony.gutierrez@amd.com       request_type != RubyRequestType_Locked_RMW_Write) {
24411308Santhony.gutierrez@amd.com        return RequestStatus_Aliased;
24511308Santhony.gutierrez@amd.com    }
24611308Santhony.gutierrez@amd.com
24711308Santhony.gutierrez@amd.com    if ((request_type == RubyRequestType_ST) ||
24811308Santhony.gutierrez@amd.com        (request_type == RubyRequestType_ATOMIC) ||
24911308Santhony.gutierrez@amd.com        (request_type == RubyRequestType_ATOMIC_RETURN) ||
25011308Santhony.gutierrez@amd.com        (request_type == RubyRequestType_ATOMIC_NO_RETURN) ||
25111308Santhony.gutierrez@amd.com        (request_type == RubyRequestType_RMW_Read) ||
25211308Santhony.gutierrez@amd.com        (request_type == RubyRequestType_RMW_Write) ||
25311308Santhony.gutierrez@amd.com        (request_type == RubyRequestType_Load_Linked) ||
25411308Santhony.gutierrez@amd.com        (request_type == RubyRequestType_Store_Conditional) ||
25511308Santhony.gutierrez@amd.com        (request_type == RubyRequestType_Locked_RMW_Read) ||
25611308Santhony.gutierrez@amd.com        (request_type == RubyRequestType_Locked_RMW_Write) ||
25711308Santhony.gutierrez@amd.com        (request_type == RubyRequestType_FLUSH)) {
25811308Santhony.gutierrez@amd.com
25911308Santhony.gutierrez@amd.com        // Check if there is any outstanding read request for the same
26011308Santhony.gutierrez@amd.com        // cache line.
26111308Santhony.gutierrez@amd.com        if (m_readRequestTable.count(line_addr) > 0) {
26211308Santhony.gutierrez@amd.com            m_store_waiting_on_load_cycles++;
26311308Santhony.gutierrez@amd.com            return RequestStatus_Aliased;
26411308Santhony.gutierrez@amd.com        }
26511308Santhony.gutierrez@amd.com
26611308Santhony.gutierrez@amd.com        if (m_writeRequestTable.count(line_addr) > 0) {
26711308Santhony.gutierrez@amd.com          // There is an outstanding write request for the cache line
26811308Santhony.gutierrez@amd.com          m_store_waiting_on_store_cycles++;
26911308Santhony.gutierrez@amd.com          return RequestStatus_Aliased;
27011308Santhony.gutierrez@amd.com        }
27111308Santhony.gutierrez@amd.com    } else {
27211308Santhony.gutierrez@amd.com        // Check if there is any outstanding write request for the same
27311308Santhony.gutierrez@amd.com        // cache line.
27411308Santhony.gutierrez@amd.com        if (m_writeRequestTable.count(line_addr) > 0) {
27511308Santhony.gutierrez@amd.com            m_load_waiting_on_store_cycles++;
27611308Santhony.gutierrez@amd.com            return RequestStatus_Aliased;
27711308Santhony.gutierrez@amd.com        }
27811308Santhony.gutierrez@amd.com
27911308Santhony.gutierrez@amd.com        if (m_readRequestTable.count(line_addr) > 0) {
28011308Santhony.gutierrez@amd.com            // There is an outstanding read request for the cache line
28111308Santhony.gutierrez@amd.com            m_load_waiting_on_load_cycles++;
28211308Santhony.gutierrez@amd.com            return RequestStatus_Aliased;
28311308Santhony.gutierrez@amd.com        }
28411308Santhony.gutierrez@amd.com    }
28511308Santhony.gutierrez@amd.com
28611308Santhony.gutierrez@amd.com    return RequestStatus_Ready;
28711308Santhony.gutierrez@amd.com
28811308Santhony.gutierrez@amd.com}
28911308Santhony.gutierrez@amd.com
29011308Santhony.gutierrez@amd.com
29111308Santhony.gutierrez@amd.com
29211308Santhony.gutierrez@amd.com// sets the kernelEndList
29311308Santhony.gutierrez@amd.comvoid
29411308Santhony.gutierrez@amd.comGPUCoalescer::insertKernel(int wavefront_id, PacketPtr pkt)
29511308Santhony.gutierrez@amd.com{
29611308Santhony.gutierrez@amd.com    // Don't know if this will happen or is possible
29711308Santhony.gutierrez@amd.com    // but I just want to be careful and not have it become
29811308Santhony.gutierrez@amd.com    // simulator hang in the future
29911308Santhony.gutierrez@amd.com    DPRINTF(GPUCoalescer, "inserting wf: %d to kernelEndlist\n", wavefront_id);
30011308Santhony.gutierrez@amd.com    assert(kernelEndList.count(wavefront_id) == 0);
30111308Santhony.gutierrez@amd.com
30211308Santhony.gutierrez@amd.com    kernelEndList[wavefront_id] = pkt;
30311308Santhony.gutierrez@amd.com    DPRINTF(GPUCoalescer, "kernelEndList->size() = %d\n",
30411308Santhony.gutierrez@amd.com            kernelEndList.size());
30511308Santhony.gutierrez@amd.com}
30611308Santhony.gutierrez@amd.com
30711308Santhony.gutierrez@amd.com
30811308Santhony.gutierrez@amd.com// Insert the request on the correct request table.  Return true if
30911308Santhony.gutierrez@amd.com// the entry was already present.
31011308Santhony.gutierrez@amd.combool
31111308Santhony.gutierrez@amd.comGPUCoalescer::insertRequest(PacketPtr pkt, RubyRequestType request_type)
31211308Santhony.gutierrez@amd.com{
31311308Santhony.gutierrez@amd.com    assert(getRequestStatus(pkt, request_type) == RequestStatus_Ready ||
31411308Santhony.gutierrez@amd.com           pkt->req->isLockedRMW() ||
31511308Santhony.gutierrez@amd.com           !m_mandatory_q_ptr->areNSlotsAvailable(1, clockEdge()));
31611308Santhony.gutierrez@amd.com
31711308Santhony.gutierrez@amd.com    int total_outstanding M5_VAR_USED =
31811308Santhony.gutierrez@amd.com        m_writeRequestTable.size() + m_readRequestTable.size();
31911308Santhony.gutierrez@amd.com
32011308Santhony.gutierrez@amd.com    assert(m_outstanding_count == total_outstanding);
32111308Santhony.gutierrez@amd.com
32211308Santhony.gutierrez@amd.com    // See if we should schedule a deadlock check
32311308Santhony.gutierrez@amd.com    if (deadlockCheckEvent.scheduled() == false) {
32411308Santhony.gutierrez@amd.com        schedule(deadlockCheckEvent, m_deadlock_threshold + curTick());
32511308Santhony.gutierrez@amd.com    }
32611308Santhony.gutierrez@amd.com
32711308Santhony.gutierrez@amd.com    Addr line_addr = makeLineAddress(pkt->getAddr());
32811308Santhony.gutierrez@amd.com    if ((request_type == RubyRequestType_ST) ||
32911308Santhony.gutierrez@amd.com        (request_type == RubyRequestType_ATOMIC) ||
33011308Santhony.gutierrez@amd.com        (request_type == RubyRequestType_ATOMIC_RETURN) ||
33111308Santhony.gutierrez@amd.com        (request_type == RubyRequestType_ATOMIC_NO_RETURN) ||
33211308Santhony.gutierrez@amd.com        (request_type == RubyRequestType_RMW_Read) ||
33311308Santhony.gutierrez@amd.com        (request_type == RubyRequestType_RMW_Write) ||
33411308Santhony.gutierrez@amd.com        (request_type == RubyRequestType_Load_Linked) ||
33511308Santhony.gutierrez@amd.com        (request_type == RubyRequestType_Store_Conditional) ||
33611308Santhony.gutierrez@amd.com        (request_type == RubyRequestType_Locked_RMW_Read) ||
33711308Santhony.gutierrez@amd.com        (request_type == RubyRequestType_Locked_RMW_Write) ||
33811308Santhony.gutierrez@amd.com        (request_type == RubyRequestType_FLUSH)) {
33911308Santhony.gutierrez@amd.com
34011308Santhony.gutierrez@amd.com        pair<RequestTable::iterator, bool> r =
34111308Santhony.gutierrez@amd.com          m_writeRequestTable.insert(RequestTable::value_type(line_addr,
34211308Santhony.gutierrez@amd.com                                       (GPUCoalescerRequest*) NULL));
34311308Santhony.gutierrez@amd.com        if (r.second) {
34411308Santhony.gutierrez@amd.com            RequestTable::iterator i = r.first;
34511308Santhony.gutierrez@amd.com            i->second = new GPUCoalescerRequest(pkt, request_type,
34611308Santhony.gutierrez@amd.com                                                curCycle());
34711308Santhony.gutierrez@amd.com            DPRINTF(GPUCoalescer,
34811308Santhony.gutierrez@amd.com                    "Inserting write request for paddr %#x for type %d\n",
34911308Santhony.gutierrez@amd.com                    pkt->req->getPaddr(), i->second->m_type);
35011308Santhony.gutierrez@amd.com            m_outstanding_count++;
35111308Santhony.gutierrez@amd.com        } else {
35211308Santhony.gutierrez@amd.com            return true;
35311308Santhony.gutierrez@amd.com        }
35411308Santhony.gutierrez@amd.com    } else {
35511308Santhony.gutierrez@amd.com        pair<RequestTable::iterator, bool> r =
35611308Santhony.gutierrez@amd.com            m_readRequestTable.insert(RequestTable::value_type(line_addr,
35711308Santhony.gutierrez@amd.com                                        (GPUCoalescerRequest*) NULL));
35811308Santhony.gutierrez@amd.com
35911308Santhony.gutierrez@amd.com        if (r.second) {
36011308Santhony.gutierrez@amd.com            RequestTable::iterator i = r.first;
36111308Santhony.gutierrez@amd.com            i->second = new GPUCoalescerRequest(pkt, request_type,
36211308Santhony.gutierrez@amd.com                                             curCycle());
36311308Santhony.gutierrez@amd.com            DPRINTF(GPUCoalescer,
36411308Santhony.gutierrez@amd.com                    "Inserting read request for paddr %#x for type %d\n",
36511308Santhony.gutierrez@amd.com                    pkt->req->getPaddr(), i->second->m_type);
36611308Santhony.gutierrez@amd.com            m_outstanding_count++;
36711308Santhony.gutierrez@amd.com        } else {
36811308Santhony.gutierrez@amd.com            return true;
36911308Santhony.gutierrez@amd.com        }
37011308Santhony.gutierrez@amd.com    }
37111308Santhony.gutierrez@amd.com
37211308Santhony.gutierrez@amd.com    m_outstandReqHist.sample(m_outstanding_count);
37311308Santhony.gutierrez@amd.com
37411308Santhony.gutierrez@amd.com    total_outstanding = m_writeRequestTable.size() + m_readRequestTable.size();
37511308Santhony.gutierrez@amd.com    assert(m_outstanding_count == total_outstanding);
37611308Santhony.gutierrez@amd.com
37711308Santhony.gutierrez@amd.com    return false;
37811308Santhony.gutierrez@amd.com}
37911308Santhony.gutierrez@amd.com
38011308Santhony.gutierrez@amd.comvoid
38111308Santhony.gutierrez@amd.comGPUCoalescer::markRemoved()
38211308Santhony.gutierrez@amd.com{
38311308Santhony.gutierrez@amd.com    m_outstanding_count--;
38411308Santhony.gutierrez@amd.com    assert(m_outstanding_count ==
38511308Santhony.gutierrez@amd.com           m_writeRequestTable.size() + m_readRequestTable.size());
38611308Santhony.gutierrez@amd.com}
38711308Santhony.gutierrez@amd.com
38811308Santhony.gutierrez@amd.comvoid
38911308Santhony.gutierrez@amd.comGPUCoalescer::removeRequest(GPUCoalescerRequest* srequest)
39011308Santhony.gutierrez@amd.com{
39111308Santhony.gutierrez@amd.com    assert(m_outstanding_count ==
39211308Santhony.gutierrez@amd.com           m_writeRequestTable.size() + m_readRequestTable.size());
39311308Santhony.gutierrez@amd.com
39411308Santhony.gutierrez@amd.com    Addr line_addr = makeLineAddress(srequest->pkt->getAddr());
39511308Santhony.gutierrez@amd.com    if ((srequest->m_type == RubyRequestType_ST) ||
39611308Santhony.gutierrez@amd.com        (srequest->m_type == RubyRequestType_RMW_Read) ||
39711308Santhony.gutierrez@amd.com        (srequest->m_type == RubyRequestType_RMW_Write) ||
39811308Santhony.gutierrez@amd.com        (srequest->m_type == RubyRequestType_Load_Linked) ||
39911308Santhony.gutierrez@amd.com        (srequest->m_type == RubyRequestType_Store_Conditional) ||
40011308Santhony.gutierrez@amd.com        (srequest->m_type == RubyRequestType_Locked_RMW_Read) ||
40111308Santhony.gutierrez@amd.com        (srequest->m_type == RubyRequestType_Locked_RMW_Write)) {
40211308Santhony.gutierrez@amd.com        m_writeRequestTable.erase(line_addr);
40311308Santhony.gutierrez@amd.com    } else {
40411308Santhony.gutierrez@amd.com        m_readRequestTable.erase(line_addr);
40511308Santhony.gutierrez@amd.com    }
40611308Santhony.gutierrez@amd.com
40711308Santhony.gutierrez@amd.com    markRemoved();
40811308Santhony.gutierrez@amd.com}
40911308Santhony.gutierrez@amd.com
41011308Santhony.gutierrez@amd.combool
41111308Santhony.gutierrez@amd.comGPUCoalescer::handleLlsc(Addr address, GPUCoalescerRequest* request)
41211308Santhony.gutierrez@amd.com{
41311308Santhony.gutierrez@amd.com    //
41411308Santhony.gutierrez@amd.com    // The success flag indicates whether the LLSC operation was successful.
41511308Santhony.gutierrez@amd.com    // LL ops will always succeed, but SC may fail if the cache line is no
41611308Santhony.gutierrez@amd.com    // longer locked.
41711308Santhony.gutierrez@amd.com    //
41811308Santhony.gutierrez@amd.com    bool success = true;
41911308Santhony.gutierrez@amd.com    if (request->m_type == RubyRequestType_Store_Conditional) {
42011308Santhony.gutierrez@amd.com        if (!m_dataCache_ptr->isLocked(address, m_version)) {
42111308Santhony.gutierrez@amd.com            //
42211308Santhony.gutierrez@amd.com            // For failed SC requests, indicate the failure to the cpu by
42311308Santhony.gutierrez@amd.com            // setting the extra data to zero.
42411308Santhony.gutierrez@amd.com            //
42511308Santhony.gutierrez@amd.com            request->pkt->req->setExtraData(0);
42611308Santhony.gutierrez@amd.com            success = false;
42711308Santhony.gutierrez@amd.com        } else {
42811308Santhony.gutierrez@amd.com            //
42911308Santhony.gutierrez@amd.com            // For successful SC requests, indicate the success to the cpu by
43011308Santhony.gutierrez@amd.com            // setting the extra data to one.
43111308Santhony.gutierrez@amd.com            //
43211308Santhony.gutierrez@amd.com            request->pkt->req->setExtraData(1);
43311308Santhony.gutierrez@amd.com        }
43411308Santhony.gutierrez@amd.com        //
43511308Santhony.gutierrez@amd.com        // Independent of success, all SC operations must clear the lock
43611308Santhony.gutierrez@amd.com        //
43711308Santhony.gutierrez@amd.com        m_dataCache_ptr->clearLocked(address);
43811308Santhony.gutierrez@amd.com    } else if (request->m_type == RubyRequestType_Load_Linked) {
43911308Santhony.gutierrez@amd.com        //
44011308Santhony.gutierrez@amd.com        // Note: To fully follow Alpha LLSC semantics, should the LL clear any
44111308Santhony.gutierrez@amd.com        // previously locked cache lines?
44211308Santhony.gutierrez@amd.com        //
44311308Santhony.gutierrez@amd.com        m_dataCache_ptr->setLocked(address, m_version);
44411308Santhony.gutierrez@amd.com    } else if ((m_dataCache_ptr->isTagPresent(address)) &&
44511308Santhony.gutierrez@amd.com               (m_dataCache_ptr->isLocked(address, m_version))) {
44611308Santhony.gutierrez@amd.com        //
44711308Santhony.gutierrez@amd.com        // Normal writes should clear the locked address
44811308Santhony.gutierrez@amd.com        //
44911308Santhony.gutierrez@amd.com        m_dataCache_ptr->clearLocked(address);
45011308Santhony.gutierrez@amd.com    }
45111308Santhony.gutierrez@amd.com    return success;
45211308Santhony.gutierrez@amd.com}
45311308Santhony.gutierrez@amd.com
45411308Santhony.gutierrez@amd.comvoid
45511308Santhony.gutierrez@amd.comGPUCoalescer::writeCallback(Addr address, DataBlock& data)
45611308Santhony.gutierrez@amd.com{
45711308Santhony.gutierrez@amd.com    writeCallback(address, MachineType_NULL, data);
45811308Santhony.gutierrez@amd.com}
45911308Santhony.gutierrez@amd.com
46011308Santhony.gutierrez@amd.comvoid
46111308Santhony.gutierrez@amd.comGPUCoalescer::writeCallback(Addr address,
46211308Santhony.gutierrez@amd.com                         MachineType mach,
46311308Santhony.gutierrez@amd.com                         DataBlock& data)
46411308Santhony.gutierrez@amd.com{
46511308Santhony.gutierrez@amd.com    writeCallback(address, mach, data, Cycles(0), Cycles(0), Cycles(0));
46611308Santhony.gutierrez@amd.com}
46711308Santhony.gutierrez@amd.com
46811308Santhony.gutierrez@amd.comvoid
46911308Santhony.gutierrez@amd.comGPUCoalescer::writeCallback(Addr address,
47011308Santhony.gutierrez@amd.com                         MachineType mach,
47111308Santhony.gutierrez@amd.com                         DataBlock& data,
47211308Santhony.gutierrez@amd.com                         Cycles initialRequestTime,
47311308Santhony.gutierrez@amd.com                         Cycles forwardRequestTime,
47411308Santhony.gutierrez@amd.com                         Cycles firstResponseTime)
47511308Santhony.gutierrez@amd.com{
47611308Santhony.gutierrez@amd.com    writeCallback(address, mach, data,
47711308Santhony.gutierrez@amd.com                  initialRequestTime, forwardRequestTime, firstResponseTime,
47811308Santhony.gutierrez@amd.com                  false);
47911308Santhony.gutierrez@amd.com}
48011308Santhony.gutierrez@amd.com
48111308Santhony.gutierrez@amd.comvoid
48211308Santhony.gutierrez@amd.comGPUCoalescer::writeCallback(Addr address,
48311308Santhony.gutierrez@amd.com                         MachineType mach,
48411308Santhony.gutierrez@amd.com                         DataBlock& data,
48511308Santhony.gutierrez@amd.com                         Cycles initialRequestTime,
48611308Santhony.gutierrez@amd.com                         Cycles forwardRequestTime,
48711308Santhony.gutierrez@amd.com                         Cycles firstResponseTime,
48811308Santhony.gutierrez@amd.com                         bool isRegion)
48911308Santhony.gutierrez@amd.com{
49011308Santhony.gutierrez@amd.com    assert(address == makeLineAddress(address));
49111308Santhony.gutierrez@amd.com
49211308Santhony.gutierrez@amd.com    DPRINTF(GPUCoalescer, "write callback for address %#x\n", address);
49311308Santhony.gutierrez@amd.com    assert(m_writeRequestTable.count(makeLineAddress(address)));
49411308Santhony.gutierrez@amd.com
49511308Santhony.gutierrez@amd.com    RequestTable::iterator i = m_writeRequestTable.find(address);
49611308Santhony.gutierrez@amd.com    assert(i != m_writeRequestTable.end());
49711308Santhony.gutierrez@amd.com    GPUCoalescerRequest* request = i->second;
49811308Santhony.gutierrez@amd.com
49911308Santhony.gutierrez@amd.com    m_writeRequestTable.erase(i);
50011308Santhony.gutierrez@amd.com    markRemoved();
50111308Santhony.gutierrez@amd.com
50211308Santhony.gutierrez@amd.com    assert((request->m_type == RubyRequestType_ST) ||
50311308Santhony.gutierrez@amd.com           (request->m_type == RubyRequestType_ATOMIC) ||
50411308Santhony.gutierrez@amd.com           (request->m_type == RubyRequestType_ATOMIC_RETURN) ||
50511308Santhony.gutierrez@amd.com           (request->m_type == RubyRequestType_ATOMIC_NO_RETURN) ||
50611308Santhony.gutierrez@amd.com           (request->m_type == RubyRequestType_RMW_Read) ||
50711308Santhony.gutierrez@amd.com           (request->m_type == RubyRequestType_RMW_Write) ||
50811308Santhony.gutierrez@amd.com           (request->m_type == RubyRequestType_Load_Linked) ||
50911308Santhony.gutierrez@amd.com           (request->m_type == RubyRequestType_Store_Conditional) ||
51011308Santhony.gutierrez@amd.com           (request->m_type == RubyRequestType_Locked_RMW_Read) ||
51111308Santhony.gutierrez@amd.com           (request->m_type == RubyRequestType_Locked_RMW_Write) ||
51211308Santhony.gutierrez@amd.com           (request->m_type == RubyRequestType_FLUSH));
51311308Santhony.gutierrez@amd.com
51411308Santhony.gutierrez@amd.com
51511308Santhony.gutierrez@amd.com    //
51611308Santhony.gutierrez@amd.com    // For Alpha, properly handle LL, SC, and write requests with respect to
51711308Santhony.gutierrez@amd.com    // locked cache blocks.
51811308Santhony.gutierrez@amd.com    //
51911308Santhony.gutierrez@amd.com    // Not valid for Network_test protocl
52011308Santhony.gutierrez@amd.com    //
52111308Santhony.gutierrez@amd.com    bool success = true;
52211308Santhony.gutierrez@amd.com    if(!m_usingNetworkTester)
52311308Santhony.gutierrez@amd.com        success = handleLlsc(address, request);
52411308Santhony.gutierrez@amd.com
52511308Santhony.gutierrez@amd.com    if (request->m_type == RubyRequestType_Locked_RMW_Read) {
52611308Santhony.gutierrez@amd.com        m_controller->blockOnQueue(address, m_mandatory_q_ptr);
52711308Santhony.gutierrez@amd.com    } else if (request->m_type == RubyRequestType_Locked_RMW_Write) {
52811308Santhony.gutierrez@amd.com        m_controller->unblock(address);
52911308Santhony.gutierrez@amd.com    }
53011308Santhony.gutierrez@amd.com
53111308Santhony.gutierrez@amd.com    hitCallback(request, mach, data, success,
53211308Santhony.gutierrez@amd.com                request->issue_time, forwardRequestTime, firstResponseTime,
53311308Santhony.gutierrez@amd.com                isRegion);
53411308Santhony.gutierrez@amd.com}
53511308Santhony.gutierrez@amd.com
53611308Santhony.gutierrez@amd.comvoid
53711308Santhony.gutierrez@amd.comGPUCoalescer::readCallback(Addr address, DataBlock& data)
53811308Santhony.gutierrez@amd.com{
53911308Santhony.gutierrez@amd.com    readCallback(address, MachineType_NULL, data);
54011308Santhony.gutierrez@amd.com}
54111308Santhony.gutierrez@amd.com
54211308Santhony.gutierrez@amd.comvoid
54311308Santhony.gutierrez@amd.comGPUCoalescer::readCallback(Addr address,
54411308Santhony.gutierrez@amd.com                        MachineType mach,
54511308Santhony.gutierrez@amd.com                        DataBlock& data)
54611308Santhony.gutierrez@amd.com{
54711308Santhony.gutierrez@amd.com    readCallback(address, mach, data, Cycles(0), Cycles(0), Cycles(0));
54811308Santhony.gutierrez@amd.com}
54911308Santhony.gutierrez@amd.com
55011308Santhony.gutierrez@amd.comvoid
55111308Santhony.gutierrez@amd.comGPUCoalescer::readCallback(Addr address,
55211308Santhony.gutierrez@amd.com                        MachineType mach,
55311308Santhony.gutierrez@amd.com                        DataBlock& data,
55411308Santhony.gutierrez@amd.com                        Cycles initialRequestTime,
55511308Santhony.gutierrez@amd.com                        Cycles forwardRequestTime,
55611308Santhony.gutierrez@amd.com                        Cycles firstResponseTime)
55711308Santhony.gutierrez@amd.com{
55811308Santhony.gutierrez@amd.com
55911308Santhony.gutierrez@amd.com    readCallback(address, mach, data,
56011308Santhony.gutierrez@amd.com                 initialRequestTime, forwardRequestTime, firstResponseTime,
56111308Santhony.gutierrez@amd.com                 false);
56211308Santhony.gutierrez@amd.com}
56311308Santhony.gutierrez@amd.com
56411308Santhony.gutierrez@amd.comvoid
56511308Santhony.gutierrez@amd.comGPUCoalescer::readCallback(Addr address,
56611308Santhony.gutierrez@amd.com                        MachineType mach,
56711308Santhony.gutierrez@amd.com                        DataBlock& data,
56811308Santhony.gutierrez@amd.com                        Cycles initialRequestTime,
56911308Santhony.gutierrez@amd.com                        Cycles forwardRequestTime,
57011308Santhony.gutierrez@amd.com                        Cycles firstResponseTime,
57111308Santhony.gutierrez@amd.com                        bool isRegion)
57211308Santhony.gutierrez@amd.com{
57311308Santhony.gutierrez@amd.com    assert(address == makeLineAddress(address));
57411308Santhony.gutierrez@amd.com    assert(m_readRequestTable.count(makeLineAddress(address)));
57511308Santhony.gutierrez@amd.com
57611308Santhony.gutierrez@amd.com    DPRINTF(GPUCoalescer, "read callback for address %#x\n", address);
57711308Santhony.gutierrez@amd.com    RequestTable::iterator i = m_readRequestTable.find(address);
57811308Santhony.gutierrez@amd.com    assert(i != m_readRequestTable.end());
57911308Santhony.gutierrez@amd.com    GPUCoalescerRequest* request = i->second;
58011308Santhony.gutierrez@amd.com
58111308Santhony.gutierrez@amd.com    m_readRequestTable.erase(i);
58211308Santhony.gutierrez@amd.com    markRemoved();
58311308Santhony.gutierrez@amd.com
58411308Santhony.gutierrez@amd.com    assert((request->m_type == RubyRequestType_LD) ||
58511308Santhony.gutierrez@amd.com           (request->m_type == RubyRequestType_IFETCH));
58611308Santhony.gutierrez@amd.com
58711308Santhony.gutierrez@amd.com    hitCallback(request, mach, data, true,
58811308Santhony.gutierrez@amd.com                request->issue_time, forwardRequestTime, firstResponseTime,
58911308Santhony.gutierrez@amd.com                isRegion);
59011308Santhony.gutierrez@amd.com}
59111308Santhony.gutierrez@amd.com
59211308Santhony.gutierrez@amd.comvoid
59311308Santhony.gutierrez@amd.comGPUCoalescer::hitCallback(GPUCoalescerRequest* srequest,
59411308Santhony.gutierrez@amd.com                       MachineType mach,
59511308Santhony.gutierrez@amd.com                       DataBlock& data,
59611308Santhony.gutierrez@amd.com                       bool success,
59711308Santhony.gutierrez@amd.com                       Cycles initialRequestTime,
59811308Santhony.gutierrez@amd.com                       Cycles forwardRequestTime,
59911308Santhony.gutierrez@amd.com                       Cycles firstResponseTime,
60011308Santhony.gutierrez@amd.com                       bool isRegion)
60111308Santhony.gutierrez@amd.com{
60211308Santhony.gutierrez@amd.com    PacketPtr pkt = srequest->pkt;
60311308Santhony.gutierrez@amd.com    Addr request_address = pkt->getAddr();
60411308Santhony.gutierrez@amd.com    Addr request_line_address = makeLineAddress(request_address);
60511308Santhony.gutierrez@amd.com
60611308Santhony.gutierrez@amd.com    RubyRequestType type = srequest->m_type;
60711308Santhony.gutierrez@amd.com
60811308Santhony.gutierrez@amd.com    // Set this cache entry to the most recently used
60911308Santhony.gutierrez@amd.com    if (type == RubyRequestType_IFETCH) {
61011308Santhony.gutierrez@amd.com        if (m_instCache_ptr->isTagPresent(request_line_address))
61111308Santhony.gutierrez@amd.com            m_instCache_ptr->setMRU(request_line_address);
61211308Santhony.gutierrez@amd.com    } else {
61311308Santhony.gutierrez@amd.com        if (m_dataCache_ptr->isTagPresent(request_line_address))
61411308Santhony.gutierrez@amd.com            m_dataCache_ptr->setMRU(request_line_address);
61511308Santhony.gutierrez@amd.com    }
61611308Santhony.gutierrez@amd.com
61711308Santhony.gutierrez@amd.com    recordMissLatency(srequest, mach,
61811308Santhony.gutierrez@amd.com                      initialRequestTime,
61911308Santhony.gutierrez@amd.com                      forwardRequestTime,
62011308Santhony.gutierrez@amd.com                      firstResponseTime,
62111308Santhony.gutierrez@amd.com                      success, isRegion);
62211308Santhony.gutierrez@amd.com    // update the data
62311308Santhony.gutierrez@amd.com    //
62411308Santhony.gutierrez@amd.com    // MUST AD DOING THIS FOR EACH REQUEST IN COALESCER
62511308Santhony.gutierrez@amd.com    int len = reqCoalescer[request_line_address].size();
62611308Santhony.gutierrez@amd.com    std::vector<PacketPtr> mylist;
62711308Santhony.gutierrez@amd.com    for (int i = 0; i < len; ++i) {
62811308Santhony.gutierrez@amd.com        PacketPtr pkt = reqCoalescer[request_line_address][i].first;
62911308Santhony.gutierrez@amd.com        assert(type ==
63011308Santhony.gutierrez@amd.com               reqCoalescer[request_line_address][i].second[PrimaryType]);
63111308Santhony.gutierrez@amd.com        request_address = pkt->getAddr();
63211308Santhony.gutierrez@amd.com        request_line_address = makeLineAddress(pkt->getAddr());
63311308Santhony.gutierrez@amd.com        if (pkt->getPtr<uint8_t>()) {
63411308Santhony.gutierrez@amd.com            if ((type == RubyRequestType_LD) ||
63511308Santhony.gutierrez@amd.com                (type == RubyRequestType_ATOMIC) ||
63611308Santhony.gutierrez@amd.com                (type == RubyRequestType_ATOMIC_RETURN) ||
63711308Santhony.gutierrez@amd.com                (type == RubyRequestType_IFETCH) ||
63811308Santhony.gutierrez@amd.com                (type == RubyRequestType_RMW_Read) ||
63911308Santhony.gutierrez@amd.com                (type == RubyRequestType_Locked_RMW_Read) ||
64011308Santhony.gutierrez@amd.com                (type == RubyRequestType_Load_Linked)) {
64111308Santhony.gutierrez@amd.com                memcpy(pkt->getPtr<uint8_t>(),
64211308Santhony.gutierrez@amd.com                       data.getData(getOffset(request_address),
64311308Santhony.gutierrez@amd.com                                    pkt->getSize()),
64411308Santhony.gutierrez@amd.com                       pkt->getSize());
64511308Santhony.gutierrez@amd.com            } else {
64611308Santhony.gutierrez@amd.com                data.setData(pkt->getPtr<uint8_t>(),
64711308Santhony.gutierrez@amd.com                             getOffset(request_address), pkt->getSize());
64811308Santhony.gutierrez@amd.com            }
64911308Santhony.gutierrez@amd.com        } else {
65011308Santhony.gutierrez@amd.com            DPRINTF(MemoryAccess,
65111308Santhony.gutierrez@amd.com                    "WARNING.  Data not transfered from Ruby to M5 for type " \
65211308Santhony.gutierrez@amd.com                    "%s\n",
65311308Santhony.gutierrez@amd.com                    RubyRequestType_to_string(type));
65411308Santhony.gutierrez@amd.com        }
65511308Santhony.gutierrez@amd.com
65611308Santhony.gutierrez@amd.com        // If using the RubyTester, update the RubyTester sender state's
65711308Santhony.gutierrez@amd.com        // subBlock with the recieved data.  The tester will later access
65811308Santhony.gutierrez@amd.com        // this state.
65911308Santhony.gutierrez@amd.com        // Note: RubyPort will access it's sender state before the
66011308Santhony.gutierrez@amd.com        // RubyTester.
66111308Santhony.gutierrez@amd.com        if (m_usingRubyTester) {
66211308Santhony.gutierrez@amd.com            RubyPort::SenderState *requestSenderState =
66311308Santhony.gutierrez@amd.com                safe_cast<RubyPort::SenderState*>(pkt->senderState);
66411308Santhony.gutierrez@amd.com            RubyTester::SenderState* testerSenderState =
66511308Santhony.gutierrez@amd.com                safe_cast<RubyTester::SenderState*>(requestSenderState->predecessor);
66611308Santhony.gutierrez@amd.com            testerSenderState->subBlock.mergeFrom(data);
66711308Santhony.gutierrez@amd.com        }
66811308Santhony.gutierrez@amd.com
66911308Santhony.gutierrez@amd.com        mylist.push_back(pkt);
67011308Santhony.gutierrez@amd.com    }
67111308Santhony.gutierrez@amd.com    delete srequest;
67211308Santhony.gutierrez@amd.com    reqCoalescer.erase(request_line_address);
67311308Santhony.gutierrez@amd.com    assert(!reqCoalescer.count(request_line_address));
67411308Santhony.gutierrez@amd.com
67511308Santhony.gutierrez@amd.com
67611308Santhony.gutierrez@amd.com
67711308Santhony.gutierrez@amd.com    completeHitCallback(mylist, len);
67811308Santhony.gutierrez@amd.com}
67911308Santhony.gutierrez@amd.com
68011308Santhony.gutierrez@amd.combool
68111308Santhony.gutierrez@amd.comGPUCoalescer::empty() const
68211308Santhony.gutierrez@amd.com{
68311308Santhony.gutierrez@amd.com    return m_writeRequestTable.empty() && m_readRequestTable.empty();
68411308Santhony.gutierrez@amd.com}
68511308Santhony.gutierrez@amd.com
68611308Santhony.gutierrez@amd.com// Analyzes the packet to see if this request can be coalesced.
68711308Santhony.gutierrez@amd.com// If request can be coalesced, this request is added to the reqCoalescer table
68811308Santhony.gutierrez@amd.com// and makeRequest returns RequestStatus_Issued;
68911308Santhony.gutierrez@amd.com// If this is the first request to a cacheline, request is added to both
69011308Santhony.gutierrez@amd.com// newRequests queue and to the reqCoalescer table; makeRequest
69111308Santhony.gutierrez@amd.com// returns RequestStatus_Issued.
69211308Santhony.gutierrez@amd.com// If there is a pending request to this cacheline and this request
69311308Santhony.gutierrez@amd.com// can't be coalesced, RequestStatus_Aliased is returned and
69411308Santhony.gutierrez@amd.com// the packet needs to be reissued.
69511308Santhony.gutierrez@amd.comRequestStatus
69611308Santhony.gutierrez@amd.comGPUCoalescer::makeRequest(PacketPtr pkt)
69711308Santhony.gutierrez@amd.com{
69811308Santhony.gutierrez@amd.com    // Check for GPU Barrier Kernel End or Kernel Begin
69911308Santhony.gutierrez@amd.com    // Leave these to be handled by the child class
70011308Santhony.gutierrez@amd.com    // Kernel End/Barrier = isFlush + isRelease
70111308Santhony.gutierrez@amd.com    // Kernel Begin = isFlush + isAcquire
70211308Santhony.gutierrez@amd.com    if (pkt->req->isKernel()) {
70311308Santhony.gutierrez@amd.com        if (pkt->req->isAcquire()){
70411308Santhony.gutierrez@amd.com            // This is a Kernel Begin leave handling to
70511308Santhony.gutierrez@amd.com            // virtual xCoalescer::makeRequest
70611308Santhony.gutierrez@amd.com            return RequestStatus_Issued;
70711308Santhony.gutierrez@amd.com        }else if(pkt->req->isRelease()) {
70811308Santhony.gutierrez@amd.com            // This is a Kernel End leave handling to
70911308Santhony.gutierrez@amd.com            // virtual xCoalescer::makeRequest
71011308Santhony.gutierrez@amd.com            // If we are here then we didn't call
71111308Santhony.gutierrez@amd.com            // a virtual version of this function
71211308Santhony.gutierrez@amd.com            // so we will also schedule the callback
71311308Santhony.gutierrez@amd.com            int wf_id = 0;
71411308Santhony.gutierrez@amd.com            if (pkt->req->hasContextId()) {
71511308Santhony.gutierrez@amd.com                wf_id = pkt->req->contextId();
71611308Santhony.gutierrez@amd.com            }
71711308Santhony.gutierrez@amd.com            insertKernel(wf_id, pkt);
71811308Santhony.gutierrez@amd.com            newKernelEnds.push_back(wf_id);
71911308Santhony.gutierrez@amd.com            if (!issueEvent.scheduled()) {
72011308Santhony.gutierrez@amd.com                schedule(issueEvent, curTick());
72111308Santhony.gutierrez@amd.com            }
72211308Santhony.gutierrez@amd.com            return RequestStatus_Issued;
72311308Santhony.gutierrez@amd.com        }
72411308Santhony.gutierrez@amd.com    }
72511308Santhony.gutierrez@amd.com
72611308Santhony.gutierrez@amd.com    // If number of outstanding requests greater than the max allowed,
72711308Santhony.gutierrez@amd.com    // return RequestStatus_BufferFull. This logic can be extended to
72811308Santhony.gutierrez@amd.com    // support proper backpressure.
72911308Santhony.gutierrez@amd.com    if (m_outstanding_count >= m_max_outstanding_requests) {
73011308Santhony.gutierrez@amd.com        return RequestStatus_BufferFull;
73111308Santhony.gutierrez@amd.com    }
73211308Santhony.gutierrez@amd.com
73311308Santhony.gutierrez@amd.com    RubyRequestType primary_type = RubyRequestType_NULL;
73411308Santhony.gutierrez@amd.com    RubyRequestType secondary_type = RubyRequestType_NULL;
73511308Santhony.gutierrez@amd.com
73611308Santhony.gutierrez@amd.com    if (pkt->isLLSC()) {
73711308Santhony.gutierrez@amd.com        //
73811308Santhony.gutierrez@amd.com        // Alpha LL/SC instructions need to be handled carefully by the cache
73911308Santhony.gutierrez@amd.com        // coherence protocol to ensure they follow the proper semantics. In
74011308Santhony.gutierrez@amd.com        // particular, by identifying the operations as atomic, the protocol
74111308Santhony.gutierrez@amd.com        // should understand that migratory sharing optimizations should not
74211308Santhony.gutierrez@amd.com        // be performed (i.e. a load between the LL and SC should not steal
74311308Santhony.gutierrez@amd.com        // away exclusive permission).
74411308Santhony.gutierrez@amd.com        //
74511308Santhony.gutierrez@amd.com        if (pkt->isWrite()) {
74611308Santhony.gutierrez@amd.com            primary_type = RubyRequestType_Store_Conditional;
74711308Santhony.gutierrez@amd.com        } else {
74811308Santhony.gutierrez@amd.com            assert(pkt->isRead());
74911308Santhony.gutierrez@amd.com            primary_type = RubyRequestType_Load_Linked;
75011308Santhony.gutierrez@amd.com        }
75111308Santhony.gutierrez@amd.com        secondary_type = RubyRequestType_ATOMIC;
75211308Santhony.gutierrez@amd.com    } else if (pkt->req->isLockedRMW()) {
75311308Santhony.gutierrez@amd.com        //
75411308Santhony.gutierrez@amd.com        // x86 locked instructions are translated to store cache coherence
75511308Santhony.gutierrez@amd.com        // requests because these requests should always be treated as read
75611308Santhony.gutierrez@amd.com        // exclusive operations and should leverage any migratory sharing
75711308Santhony.gutierrez@amd.com        // optimization built into the protocol.
75811308Santhony.gutierrez@amd.com        //
75911308Santhony.gutierrez@amd.com        if (pkt->isWrite()) {
76011308Santhony.gutierrez@amd.com            primary_type = RubyRequestType_Locked_RMW_Write;
76111308Santhony.gutierrez@amd.com        } else {
76211308Santhony.gutierrez@amd.com            assert(pkt->isRead());
76311308Santhony.gutierrez@amd.com            primary_type = RubyRequestType_Locked_RMW_Read;
76411308Santhony.gutierrez@amd.com        }
76511308Santhony.gutierrez@amd.com        secondary_type = RubyRequestType_ST;
76611308Santhony.gutierrez@amd.com    } else if (pkt->isAtomicOp()) {
76711308Santhony.gutierrez@amd.com        //
76811308Santhony.gutierrez@amd.com        // GPU Atomic Operation
76911308Santhony.gutierrez@amd.com        //
77011308Santhony.gutierrez@amd.com        primary_type = RubyRequestType_ATOMIC;
77111308Santhony.gutierrez@amd.com        secondary_type = RubyRequestType_ATOMIC;
77211308Santhony.gutierrez@amd.com    } else {
77311308Santhony.gutierrez@amd.com        if (pkt->isRead()) {
77411308Santhony.gutierrez@amd.com            if (pkt->req->isInstFetch()) {
77511308Santhony.gutierrez@amd.com                primary_type = secondary_type = RubyRequestType_IFETCH;
77611308Santhony.gutierrez@amd.com            } else {
77711308Santhony.gutierrez@amd.com#if THE_ISA == X86_ISA
77811308Santhony.gutierrez@amd.com                uint32_t flags = pkt->req->getFlags();
77911308Santhony.gutierrez@amd.com                bool storeCheck = flags &
78011308Santhony.gutierrez@amd.com                        (TheISA::StoreCheck << TheISA::FlagShift);
78111308Santhony.gutierrez@amd.com#else
78211308Santhony.gutierrez@amd.com                bool storeCheck = false;
78311308Santhony.gutierrez@amd.com#endif // X86_ISA
78411308Santhony.gutierrez@amd.com                if (storeCheck) {
78511308Santhony.gutierrez@amd.com                    primary_type = RubyRequestType_RMW_Read;
78611308Santhony.gutierrez@amd.com                    secondary_type = RubyRequestType_ST;
78711308Santhony.gutierrez@amd.com                } else {
78811308Santhony.gutierrez@amd.com                    primary_type = secondary_type = RubyRequestType_LD;
78911308Santhony.gutierrez@amd.com                }
79011308Santhony.gutierrez@amd.com            }
79111308Santhony.gutierrez@amd.com        } else if (pkt->isWrite()) {
79211308Santhony.gutierrez@amd.com            //
79311308Santhony.gutierrez@amd.com            // Note: M5 packets do not differentiate ST from RMW_Write
79411308Santhony.gutierrez@amd.com            //
79511308Santhony.gutierrez@amd.com            primary_type = secondary_type = RubyRequestType_ST;
79611308Santhony.gutierrez@amd.com        } else if (pkt->isFlush()) {
79711308Santhony.gutierrez@amd.com            primary_type = secondary_type = RubyRequestType_FLUSH;
79811308Santhony.gutierrez@amd.com        } else if (pkt->req->isRelease() || pkt->req->isAcquire()) {
79911308Santhony.gutierrez@amd.com            if (assumingRfOCoherence) {
80011308Santhony.gutierrez@amd.com                // If we reached here, this request must be a memFence
80111308Santhony.gutierrez@amd.com                // and the protocol implements RfO, the coalescer can
80211308Santhony.gutierrez@amd.com                // assume sequentially consistency and schedule the callback
80311308Santhony.gutierrez@amd.com                // immediately.
80411308Santhony.gutierrez@amd.com                // Currently the code implements fence callbacks
80511308Santhony.gutierrez@amd.com                // by reusing the mechanism for kernel completions.
80611308Santhony.gutierrez@amd.com                // This should be fixed.
80711308Santhony.gutierrez@amd.com                int wf_id = 0;
80811308Santhony.gutierrez@amd.com                if (pkt->req->hasContextId()) {
80911308Santhony.gutierrez@amd.com                    wf_id = pkt->req->contextId();
81011308Santhony.gutierrez@amd.com                }
81111308Santhony.gutierrez@amd.com                insertKernel(wf_id, pkt);
81211308Santhony.gutierrez@amd.com                newKernelEnds.push_back(wf_id);
81311308Santhony.gutierrez@amd.com                if (!issueEvent.scheduled()) {
81411308Santhony.gutierrez@amd.com                    schedule(issueEvent, curTick());
81511308Santhony.gutierrez@amd.com                }
81611308Santhony.gutierrez@amd.com                return RequestStatus_Issued;
81711308Santhony.gutierrez@amd.com            } else {
81811308Santhony.gutierrez@amd.com                // If not RfO, return issued here and let the child coalescer
81911308Santhony.gutierrez@amd.com                // take care of it.
82011308Santhony.gutierrez@amd.com                return RequestStatus_Issued;
82111308Santhony.gutierrez@amd.com            }
82211308Santhony.gutierrez@amd.com        } else {
82311308Santhony.gutierrez@amd.com            panic("Unsupported ruby packet type\n");
82411308Santhony.gutierrez@amd.com        }
82511308Santhony.gutierrez@amd.com    }
82611308Santhony.gutierrez@amd.com
82711308Santhony.gutierrez@amd.com    // Check if there is any pending request to this cache line from
82811308Santhony.gutierrez@amd.com    // previous cycles.
82911308Santhony.gutierrez@amd.com    // If there is a pending request, return aliased. Since coalescing
83011308Santhony.gutierrez@amd.com    // across time is not permitted, aliased requests are not coalesced.
83111308Santhony.gutierrez@amd.com    // If a request for this address has already been issued, we must block
83211308Santhony.gutierrez@amd.com    RequestStatus status = getRequestStatus(pkt, primary_type);
83311308Santhony.gutierrez@amd.com    if (status != RequestStatus_Ready)
83411308Santhony.gutierrez@amd.com        return status;
83511308Santhony.gutierrez@amd.com
83611308Santhony.gutierrez@amd.com    Addr line_addr = makeLineAddress(pkt->getAddr());
83711308Santhony.gutierrez@amd.com
83811308Santhony.gutierrez@amd.com    // Check if this request can be coalesced with previous
83911308Santhony.gutierrez@amd.com    // requests from this cycle.
84011308Santhony.gutierrez@amd.com    if (!reqCoalescer.count(line_addr)) {
84111308Santhony.gutierrez@amd.com        // This is the first access to this cache line.
84211308Santhony.gutierrez@amd.com        // A new request to the memory subsystem has to be
84311308Santhony.gutierrez@amd.com        // made in the next cycle for this cache line, so
84411308Santhony.gutierrez@amd.com        // add this line addr to the "newRequests" queue
84511308Santhony.gutierrez@amd.com        newRequests.push_back(line_addr);
84611308Santhony.gutierrez@amd.com
84711308Santhony.gutierrez@amd.com    // There was a request to this cache line in this cycle,
84811308Santhony.gutierrez@amd.com    // let us see if we can coalesce this request with the previous
84911308Santhony.gutierrez@amd.com    // requests from this cycle
85011308Santhony.gutierrez@amd.com    } else if (primary_type !=
85111308Santhony.gutierrez@amd.com               reqCoalescer[line_addr][0].second[PrimaryType]) {
85211308Santhony.gutierrez@amd.com        // can't coalesce loads, stores and atomics!
85311308Santhony.gutierrez@amd.com        return RequestStatus_Aliased;
85411308Santhony.gutierrez@amd.com    } else if (pkt->req->isLockedRMW() ||
85511308Santhony.gutierrez@amd.com               reqCoalescer[line_addr][0].first->req->isLockedRMW()) {
85611308Santhony.gutierrez@amd.com        // can't coalesce locked accesses, but can coalesce atomics!
85711308Santhony.gutierrez@amd.com        return RequestStatus_Aliased;
85811308Santhony.gutierrez@amd.com    } else if (pkt->req->hasContextId() && pkt->req->isRelease() &&
85911308Santhony.gutierrez@amd.com               pkt->req->contextId() !=
86011308Santhony.gutierrez@amd.com               reqCoalescer[line_addr][0].first->req->contextId()) {
86111308Santhony.gutierrez@amd.com        // can't coalesce releases from different wavefronts
86211308Santhony.gutierrez@amd.com        return RequestStatus_Aliased;
86311308Santhony.gutierrez@amd.com    }
86411308Santhony.gutierrez@amd.com
86511308Santhony.gutierrez@amd.com    // in addition to the packet, we need to save both request types
86611308Santhony.gutierrez@amd.com    reqCoalescer[line_addr].push_back(
86711308Santhony.gutierrez@amd.com            RequestDesc(pkt, std::vector<RubyRequestType>()) );
86811308Santhony.gutierrez@amd.com    reqCoalescer[line_addr].back().second.push_back(primary_type);
86911308Santhony.gutierrez@amd.com    reqCoalescer[line_addr].back().second.push_back(secondary_type);
87011308Santhony.gutierrez@amd.com    if (!issueEvent.scheduled())
87111308Santhony.gutierrez@amd.com        schedule(issueEvent, curTick());
87211308Santhony.gutierrez@amd.com    // TODO: issue hardware prefetches here
87311308Santhony.gutierrez@amd.com    return RequestStatus_Issued;
87411308Santhony.gutierrez@amd.com}
87511308Santhony.gutierrez@amd.com
87611308Santhony.gutierrez@amd.comvoid
87711308Santhony.gutierrez@amd.comGPUCoalescer::issueRequest(PacketPtr pkt, RubyRequestType secondary_type)
87811308Santhony.gutierrez@amd.com{
87911308Santhony.gutierrez@amd.com
88011308Santhony.gutierrez@amd.com    int proc_id = -1;
88111308Santhony.gutierrez@amd.com    if (pkt != NULL && pkt->req->hasContextId()) {
88211308Santhony.gutierrez@amd.com        proc_id = pkt->req->contextId();
88311308Santhony.gutierrez@amd.com    }
88411308Santhony.gutierrez@amd.com
88511308Santhony.gutierrez@amd.com    // If valid, copy the pc to the ruby request
88611308Santhony.gutierrez@amd.com    Addr pc = 0;
88711308Santhony.gutierrez@amd.com    if (pkt->req->hasPC()) {
88811308Santhony.gutierrez@amd.com        pc = pkt->req->getPC();
88911308Santhony.gutierrez@amd.com    }
89011308Santhony.gutierrez@amd.com
89111308Santhony.gutierrez@amd.com    // At the moment setting scopes only counts
89211308Santhony.gutierrez@amd.com    // for GPU spill space accesses
89311308Santhony.gutierrez@amd.com    // which is pkt->req->isStack()
89411308Santhony.gutierrez@amd.com    // this scope is REPLACE since it
89511308Santhony.gutierrez@amd.com    // does not need to be flushed at the end
89611308Santhony.gutierrez@amd.com    // of a kernel Private and local may need
89711308Santhony.gutierrez@amd.com    // to be visible at the end of the kernel
89811308Santhony.gutierrez@amd.com    HSASegment accessSegment = reqSegmentToHSASegment(pkt->req);
89911308Santhony.gutierrez@amd.com    HSAScope accessScope = reqScopeToHSAScope(pkt->req);
90011308Santhony.gutierrez@amd.com
90111308Santhony.gutierrez@amd.com    Addr line_addr = makeLineAddress(pkt->getAddr());
90211308Santhony.gutierrez@amd.com
90311308Santhony.gutierrez@amd.com    // Creating WriteMask that records written bytes
90411308Santhony.gutierrez@amd.com    // and atomic operations. This enables partial writes
90511308Santhony.gutierrez@amd.com    // and partial reads of those writes
90611308Santhony.gutierrez@amd.com    DataBlock dataBlock;
90711308Santhony.gutierrez@amd.com    dataBlock.clear();
90811308Santhony.gutierrez@amd.com    uint32_t blockSize = RubySystem::getBlockSizeBytes();
90911308Santhony.gutierrez@amd.com    std::vector<bool> accessMask(blockSize,false);
91011308Santhony.gutierrez@amd.com    std::vector< std::pair<int,AtomicOpFunctor*> > atomicOps;
91111308Santhony.gutierrez@amd.com    uint32_t tableSize = reqCoalescer[line_addr].size();
91211308Santhony.gutierrez@amd.com    for (int i = 0; i < tableSize; i++) {
91311308Santhony.gutierrez@amd.com        PacketPtr tmpPkt = reqCoalescer[line_addr][i].first;
91411308Santhony.gutierrez@amd.com        uint32_t tmpOffset = (tmpPkt->getAddr()) - line_addr;
91511308Santhony.gutierrez@amd.com        uint32_t tmpSize = tmpPkt->getSize();
91611308Santhony.gutierrez@amd.com        if (tmpPkt->isAtomicOp()) {
91711308Santhony.gutierrez@amd.com            std::pair<int,AtomicOpFunctor *> tmpAtomicOp(tmpOffset,
91811308Santhony.gutierrez@amd.com                                                        tmpPkt->getAtomicOp());
91911308Santhony.gutierrez@amd.com            atomicOps.push_back(tmpAtomicOp);
92011308Santhony.gutierrez@amd.com        } else if(tmpPkt->isWrite()) {
92111308Santhony.gutierrez@amd.com            dataBlock.setData(tmpPkt->getPtr<uint8_t>(),
92211308Santhony.gutierrez@amd.com                              tmpOffset, tmpSize);
92311308Santhony.gutierrez@amd.com        }
92411308Santhony.gutierrez@amd.com        for (int j = 0; j < tmpSize; j++) {
92511308Santhony.gutierrez@amd.com            accessMask[tmpOffset + j] = true;
92611308Santhony.gutierrez@amd.com        }
92711308Santhony.gutierrez@amd.com    }
92811308Santhony.gutierrez@amd.com    std::shared_ptr<RubyRequest> msg;
92911308Santhony.gutierrez@amd.com    if (pkt->isAtomicOp()) {
93011308Santhony.gutierrez@amd.com        msg = std::make_shared<RubyRequest>(clockEdge(), pkt->getAddr(),
93111308Santhony.gutierrez@amd.com                              pkt->getPtr<uint8_t>(),
93211308Santhony.gutierrez@amd.com                              pkt->getSize(), pc, secondary_type,
93311308Santhony.gutierrez@amd.com                              RubyAccessMode_Supervisor, pkt,
93411308Santhony.gutierrez@amd.com                              PrefetchBit_No, proc_id, 100,
93511308Santhony.gutierrez@amd.com                              blockSize, accessMask,
93611308Santhony.gutierrez@amd.com                              dataBlock, atomicOps,
93711308Santhony.gutierrez@amd.com                              accessScope, accessSegment);
93811308Santhony.gutierrez@amd.com    } else {
93911308Santhony.gutierrez@amd.com        msg = std::make_shared<RubyRequest>(clockEdge(), pkt->getAddr(),
94011308Santhony.gutierrez@amd.com                              pkt->getPtr<uint8_t>(),
94111308Santhony.gutierrez@amd.com                              pkt->getSize(), pc, secondary_type,
94211308Santhony.gutierrez@amd.com                              RubyAccessMode_Supervisor, pkt,
94311308Santhony.gutierrez@amd.com                              PrefetchBit_No, proc_id, 100,
94411308Santhony.gutierrez@amd.com                              blockSize, accessMask,
94511308Santhony.gutierrez@amd.com                              dataBlock,
94611308Santhony.gutierrez@amd.com                              accessScope, accessSegment);
94711308Santhony.gutierrez@amd.com    }
94811308Santhony.gutierrez@amd.com    DPRINTFR(ProtocolTrace, "%15s %3s %10s%20s %6s>%-6s %s %s\n",
94911308Santhony.gutierrez@amd.com             curTick(), m_version, "Coal", "Begin", "", "",
95011308Santhony.gutierrez@amd.com             printAddress(msg->getPhysicalAddress()),
95111308Santhony.gutierrez@amd.com             RubyRequestType_to_string(secondary_type));
95211308Santhony.gutierrez@amd.com
95311308Santhony.gutierrez@amd.com    fatal_if(secondary_type == RubyRequestType_IFETCH,
95411308Santhony.gutierrez@amd.com             "there should not be any I-Fetch requests in the GPU Coalescer");
95511308Santhony.gutierrez@amd.com
95611308Santhony.gutierrez@amd.com    // Send the message to the cache controller
95711308Santhony.gutierrez@amd.com    fatal_if(m_data_cache_hit_latency == 0,
95811308Santhony.gutierrez@amd.com             "should not have a latency of zero");
95911308Santhony.gutierrez@amd.com
96011308Santhony.gutierrez@amd.com    assert(m_mandatory_q_ptr);
96111308Santhony.gutierrez@amd.com    m_mandatory_q_ptr->enqueue(msg, clockEdge(), m_data_cache_hit_latency);
96211308Santhony.gutierrez@amd.com}
96311308Santhony.gutierrez@amd.com
96411308Santhony.gutierrez@amd.comtemplate <class KEY, class VALUE>
96511308Santhony.gutierrez@amd.comstd::ostream &
96611308Santhony.gutierrez@amd.comoperator<<(ostream &out, const std::unordered_map<KEY, VALUE> &map)
96711308Santhony.gutierrez@amd.com{
96811308Santhony.gutierrez@amd.com    out << "[";
96911308Santhony.gutierrez@amd.com    for (auto i = map.begin(); i != map.end(); ++i)
97011308Santhony.gutierrez@amd.com        out << " " << i->first << "=" << i->second;
97111308Santhony.gutierrez@amd.com    out << " ]";
97211308Santhony.gutierrez@amd.com
97311308Santhony.gutierrez@amd.com    return out;
97411308Santhony.gutierrez@amd.com}
97511308Santhony.gutierrez@amd.com
97611308Santhony.gutierrez@amd.comvoid
97711308Santhony.gutierrez@amd.comGPUCoalescer::print(ostream& out) const
97811308Santhony.gutierrez@amd.com{
97911308Santhony.gutierrez@amd.com    out << "[GPUCoalescer: " << m_version
98011308Santhony.gutierrez@amd.com        << ", outstanding requests: " << m_outstanding_count
98111308Santhony.gutierrez@amd.com        << ", read request table: " << m_readRequestTable
98211308Santhony.gutierrez@amd.com        << ", write request table: " << m_writeRequestTable
98311308Santhony.gutierrez@amd.com        << "]";
98411308Santhony.gutierrez@amd.com}
98511308Santhony.gutierrez@amd.com
98611308Santhony.gutierrez@amd.com// this can be called from setState whenever coherence permissions are
98711308Santhony.gutierrez@amd.com// upgraded when invoked, coherence violations will be checked for the
98811308Santhony.gutierrez@amd.com// given block
98911308Santhony.gutierrez@amd.comvoid
99011308Santhony.gutierrez@amd.comGPUCoalescer::checkCoherence(Addr addr)
99111308Santhony.gutierrez@amd.com{
99211308Santhony.gutierrez@amd.com#ifdef CHECK_COHERENCE
99311308Santhony.gutierrez@amd.com    m_ruby_system->checkGlobalCoherenceInvariant(addr);
99411308Santhony.gutierrez@amd.com#endif
99511308Santhony.gutierrez@amd.com}
99611308Santhony.gutierrez@amd.com
99711308Santhony.gutierrez@amd.comvoid
99811308Santhony.gutierrez@amd.comGPUCoalescer::recordRequestType(SequencerRequestType requestType) {
99911308Santhony.gutierrez@amd.com    DPRINTF(RubyStats, "Recorded statistic: %s\n",
100011308Santhony.gutierrez@amd.com            SequencerRequestType_to_string(requestType));
100111308Santhony.gutierrez@amd.com}
100211308Santhony.gutierrez@amd.com
100311308Santhony.gutierrez@amd.comGPUCoalescer::IssueEvent::IssueEvent(GPUCoalescer* _seq)
100411308Santhony.gutierrez@amd.com    : Event(Progress_Event_Pri), seq(_seq)
100511308Santhony.gutierrez@amd.com{
100611308Santhony.gutierrez@amd.com}
100711308Santhony.gutierrez@amd.com
100811308Santhony.gutierrez@amd.com
100911308Santhony.gutierrez@amd.comvoid
101011308Santhony.gutierrez@amd.comGPUCoalescer::completeIssue()
101111308Santhony.gutierrez@amd.com{
101211308Santhony.gutierrez@amd.com    // newRequests has the cacheline addresses of all the
101311308Santhony.gutierrez@amd.com    // requests which need to be issued to the memory subsystem
101411308Santhony.gutierrez@amd.com    // in this cycle
101511308Santhony.gutierrez@amd.com    int len = newRequests.size();
101611308Santhony.gutierrez@amd.com    DPRINTF(GPUCoalescer, "Completing issue for %d new requests.\n", len);
101711308Santhony.gutierrez@amd.com    for (int i = 0; i < len; ++i) {
101811308Santhony.gutierrez@amd.com        // Get the requests from reqCoalescer table. Get only the
101911308Santhony.gutierrez@amd.com        // first request for each cacheline, the remaining requests
102011308Santhony.gutierrez@amd.com        // can be coalesced with the first request. So, only
102111308Santhony.gutierrez@amd.com        // one request is issued per cacheline.
102211308Santhony.gutierrez@amd.com        RequestDesc info = reqCoalescer[newRequests[i]][0];
102311308Santhony.gutierrez@amd.com        PacketPtr pkt = info.first;
102411308Santhony.gutierrez@amd.com        DPRINTF(GPUCoalescer, "Completing for newReq %d: paddr %#x\n",
102511308Santhony.gutierrez@amd.com                i, pkt->req->getPaddr());
102611308Santhony.gutierrez@amd.com        // Insert this request to the read/writeRequestTables. These tables
102711308Santhony.gutierrez@amd.com        // are used to track aliased requests in makeRequest subroutine
102811308Santhony.gutierrez@amd.com        bool found = insertRequest(pkt, info.second[PrimaryType]);
102911308Santhony.gutierrez@amd.com
103011308Santhony.gutierrez@amd.com        if (found) {
103111308Santhony.gutierrez@amd.com            panic("GPUCoalescer::makeRequest should never be called if the "
103211308Santhony.gutierrez@amd.com                  "request is already outstanding\n");
103311308Santhony.gutierrez@amd.com        }
103411308Santhony.gutierrez@amd.com
103511308Santhony.gutierrez@amd.com        // Issue request to ruby subsystem
103611308Santhony.gutierrez@amd.com        issueRequest(pkt, info.second[SecondaryType]);
103711308Santhony.gutierrez@amd.com    }
103811308Santhony.gutierrez@amd.com    newRequests.clear();
103911308Santhony.gutierrez@amd.com
104011308Santhony.gutierrez@amd.com    // have Kernel End releases been issued this cycle
104111308Santhony.gutierrez@amd.com    len = newKernelEnds.size();
104211308Santhony.gutierrez@amd.com    for (int i = 0; i < len; i++) {
104311308Santhony.gutierrez@amd.com        kernelCallback(newKernelEnds[i]);
104411308Santhony.gutierrez@amd.com    }
104511308Santhony.gutierrez@amd.com    newKernelEnds.clear();
104611308Santhony.gutierrez@amd.com}
104711308Santhony.gutierrez@amd.com
104811308Santhony.gutierrez@amd.comvoid
104911308Santhony.gutierrez@amd.comGPUCoalescer::IssueEvent::process()
105011308Santhony.gutierrez@amd.com{
105111308Santhony.gutierrez@amd.com    seq->completeIssue();
105211308Santhony.gutierrez@amd.com}
105311308Santhony.gutierrez@amd.com
105411308Santhony.gutierrez@amd.comconst char *
105511308Santhony.gutierrez@amd.comGPUCoalescer::IssueEvent::description() const
105611308Santhony.gutierrez@amd.com{
105711308Santhony.gutierrez@amd.com    return "Issue coalesced request";
105811308Santhony.gutierrez@amd.com}
105911308Santhony.gutierrez@amd.com
106011308Santhony.gutierrez@amd.comvoid
106111308Santhony.gutierrez@amd.comGPUCoalescer::evictionCallback(Addr address)
106211308Santhony.gutierrez@amd.com{
106311308Santhony.gutierrez@amd.com    ruby_eviction_callback(address);
106411308Santhony.gutierrez@amd.com}
106511308Santhony.gutierrez@amd.com
106611308Santhony.gutierrez@amd.comvoid
106711308Santhony.gutierrez@amd.comGPUCoalescer::kernelCallback(int wavefront_id)
106811308Santhony.gutierrez@amd.com{
106911308Santhony.gutierrez@amd.com    assert(kernelEndList.count(wavefront_id));
107011308Santhony.gutierrez@amd.com
107111308Santhony.gutierrez@amd.com    ruby_hit_callback(kernelEndList[wavefront_id]);
107211308Santhony.gutierrez@amd.com
107311308Santhony.gutierrez@amd.com    kernelEndList.erase(wavefront_id);
107411308Santhony.gutierrez@amd.com}
107511308Santhony.gutierrez@amd.com
107611308Santhony.gutierrez@amd.comvoid
107711308Santhony.gutierrez@amd.comGPUCoalescer::atomicCallback(Addr address,
107811308Santhony.gutierrez@amd.com                             MachineType mach,
107911308Santhony.gutierrez@amd.com                             const DataBlock& data)
108011308Santhony.gutierrez@amd.com{
108111308Santhony.gutierrez@amd.com    assert(address == makeLineAddress(address));
108211308Santhony.gutierrez@amd.com
108311308Santhony.gutierrez@amd.com    DPRINTF(GPUCoalescer, "atomic callback for address %#x\n", address);
108411308Santhony.gutierrez@amd.com    assert(m_writeRequestTable.count(makeLineAddress(address)));
108511308Santhony.gutierrez@amd.com
108611308Santhony.gutierrez@amd.com    RequestTable::iterator i = m_writeRequestTable.find(address);
108711308Santhony.gutierrez@amd.com    assert(i != m_writeRequestTable.end());
108811308Santhony.gutierrez@amd.com    GPUCoalescerRequest* srequest = i->second;
108911308Santhony.gutierrez@amd.com
109011308Santhony.gutierrez@amd.com    m_writeRequestTable.erase(i);
109111308Santhony.gutierrez@amd.com    markRemoved();
109211308Santhony.gutierrez@amd.com
109311308Santhony.gutierrez@amd.com    assert((srequest->m_type == RubyRequestType_ATOMIC) ||
109411308Santhony.gutierrez@amd.com           (srequest->m_type == RubyRequestType_ATOMIC_RETURN) ||
109511308Santhony.gutierrez@amd.com           (srequest->m_type == RubyRequestType_ATOMIC_NO_RETURN));
109611308Santhony.gutierrez@amd.com
109711308Santhony.gutierrez@amd.com
109811308Santhony.gutierrez@amd.com    // Atomics don't write to cache, so there is no MRU update...
109911308Santhony.gutierrez@amd.com
110011308Santhony.gutierrez@amd.com    recordMissLatency(srequest, mach,
110111308Santhony.gutierrez@amd.com                      srequest->issue_time, Cycles(0), Cycles(0), true, false);
110211308Santhony.gutierrez@amd.com
110311308Santhony.gutierrez@amd.com    PacketPtr pkt = srequest->pkt;
110411308Santhony.gutierrez@amd.com    Addr request_address = pkt->getAddr();
110511308Santhony.gutierrez@amd.com    Addr request_line_address = makeLineAddress(pkt->getAddr());
110611308Santhony.gutierrez@amd.com
110711308Santhony.gutierrez@amd.com    int len = reqCoalescer[request_line_address].size();
110811308Santhony.gutierrez@amd.com    std::vector<PacketPtr> mylist;
110911308Santhony.gutierrez@amd.com    for (int i = 0; i < len; ++i) {
111011308Santhony.gutierrez@amd.com        PacketPtr pkt = reqCoalescer[request_line_address][i].first;
111111308Santhony.gutierrez@amd.com        assert(srequest->m_type ==
111211308Santhony.gutierrez@amd.com               reqCoalescer[request_line_address][i].second[PrimaryType]);
111311308Santhony.gutierrez@amd.com        request_address = (pkt->getAddr());
111411308Santhony.gutierrez@amd.com        request_line_address = makeLineAddress(request_address);
111511308Santhony.gutierrez@amd.com        if (pkt->getPtr<uint8_t>() &&
111611308Santhony.gutierrez@amd.com            srequest->m_type != RubyRequestType_ATOMIC_NO_RETURN) {
111711308Santhony.gutierrez@amd.com            /* atomics are done in memory, and return the data *before* the atomic op... */
111811308Santhony.gutierrez@amd.com            memcpy(pkt->getPtr<uint8_t>(),
111911308Santhony.gutierrez@amd.com                   data.getData(getOffset(request_address),
112011308Santhony.gutierrez@amd.com                                pkt->getSize()),
112111308Santhony.gutierrez@amd.com                   pkt->getSize());
112211308Santhony.gutierrez@amd.com        } else {
112311308Santhony.gutierrez@amd.com            DPRINTF(MemoryAccess,
112411308Santhony.gutierrez@amd.com                    "WARNING.  Data not transfered from Ruby to M5 for type " \
112511308Santhony.gutierrez@amd.com                    "%s\n",
112611308Santhony.gutierrez@amd.com                    RubyRequestType_to_string(srequest->m_type));
112711308Santhony.gutierrez@amd.com        }
112811308Santhony.gutierrez@amd.com
112911308Santhony.gutierrez@amd.com        // If using the RubyTester, update the RubyTester sender state's
113011308Santhony.gutierrez@amd.com        // subBlock with the recieved data.  The tester will later access
113111308Santhony.gutierrez@amd.com        // this state.
113211308Santhony.gutierrez@amd.com        // Note: RubyPort will access it's sender state before the
113311308Santhony.gutierrez@amd.com        // RubyTester.
113411308Santhony.gutierrez@amd.com        if (m_usingRubyTester) {
113511308Santhony.gutierrez@amd.com            RubyPort::SenderState *requestSenderState =
113611308Santhony.gutierrez@amd.com                safe_cast<RubyPort::SenderState*>(pkt->senderState);
113711308Santhony.gutierrez@amd.com            RubyTester::SenderState* testerSenderState =
113811308Santhony.gutierrez@amd.com                safe_cast<RubyTester::SenderState*>(requestSenderState->predecessor);
113911308Santhony.gutierrez@amd.com            testerSenderState->subBlock.mergeFrom(data);
114011308Santhony.gutierrez@amd.com        }
114111308Santhony.gutierrez@amd.com
114211308Santhony.gutierrez@amd.com        mylist.push_back(pkt);
114311308Santhony.gutierrez@amd.com    }
114411308Santhony.gutierrez@amd.com    delete srequest;
114511308Santhony.gutierrez@amd.com    reqCoalescer.erase(request_line_address);
114611308Santhony.gutierrez@amd.com    assert(!reqCoalescer.count(request_line_address));
114711308Santhony.gutierrez@amd.com
114811308Santhony.gutierrez@amd.com    completeHitCallback(mylist, len);
114911308Santhony.gutierrez@amd.com}
115011308Santhony.gutierrez@amd.com
115111308Santhony.gutierrez@amd.comvoid
115211308Santhony.gutierrez@amd.comGPUCoalescer::recordCPReadCallBack(MachineID myMachID, MachineID senderMachID)
115311308Santhony.gutierrez@amd.com{
115411308Santhony.gutierrez@amd.com    if(myMachID == senderMachID) {
115511308Santhony.gutierrez@amd.com        CP_TCPLdHits++;
115611308Santhony.gutierrez@amd.com    } else if(machineIDToMachineType(senderMachID) == MachineType_TCP) {
115711308Santhony.gutierrez@amd.com        CP_TCPLdTransfers++;
115811308Santhony.gutierrez@amd.com    } else if(machineIDToMachineType(senderMachID) == MachineType_TCC) {
115911308Santhony.gutierrez@amd.com        CP_TCCLdHits++;
116011308Santhony.gutierrez@amd.com    } else {
116111308Santhony.gutierrez@amd.com        CP_LdMiss++;
116211308Santhony.gutierrez@amd.com    }
116311308Santhony.gutierrez@amd.com}
116411308Santhony.gutierrez@amd.com
116511308Santhony.gutierrez@amd.comvoid
116611308Santhony.gutierrez@amd.comGPUCoalescer::recordCPWriteCallBack(MachineID myMachID, MachineID senderMachID)
116711308Santhony.gutierrez@amd.com{
116811308Santhony.gutierrez@amd.com    if(myMachID == senderMachID) {
116911308Santhony.gutierrez@amd.com        CP_TCPStHits++;
117011308Santhony.gutierrez@amd.com    } else if(machineIDToMachineType(senderMachID) == MachineType_TCP) {
117111308Santhony.gutierrez@amd.com        CP_TCPStTransfers++;
117211308Santhony.gutierrez@amd.com    } else if(machineIDToMachineType(senderMachID) == MachineType_TCC) {
117311308Santhony.gutierrez@amd.com        CP_TCCStHits++;
117411308Santhony.gutierrez@amd.com    } else {
117511308Santhony.gutierrez@amd.com        CP_StMiss++;
117611308Santhony.gutierrez@amd.com    }
117711308Santhony.gutierrez@amd.com}
117811308Santhony.gutierrez@amd.com
117911308Santhony.gutierrez@amd.comvoid
118011308Santhony.gutierrez@amd.comGPUCoalescer::completeHitCallback(std::vector<PacketPtr> & mylist, int len)
118111308Santhony.gutierrez@amd.com{
118211308Santhony.gutierrez@amd.com    for (int i = 0; i < len; ++i) {
118311308Santhony.gutierrez@amd.com        RubyPort::SenderState *ss =
118411308Santhony.gutierrez@amd.com            safe_cast<RubyPort::SenderState *>(mylist[i]->senderState);
118511308Santhony.gutierrez@amd.com        MemSlavePort *port = ss->port;
118611308Santhony.gutierrez@amd.com        assert(port != NULL);
118711308Santhony.gutierrez@amd.com
118811308Santhony.gutierrez@amd.com        mylist[i]->senderState = ss->predecessor;
118911308Santhony.gutierrez@amd.com        delete ss;
119011308Santhony.gutierrez@amd.com        port->hitCallback(mylist[i]);
119111308Santhony.gutierrez@amd.com        trySendRetries();
119211308Santhony.gutierrez@amd.com    }
119311308Santhony.gutierrez@amd.com
119411308Santhony.gutierrez@amd.com    testDrainComplete();
119511308Santhony.gutierrez@amd.com}
119611308Santhony.gutierrez@amd.com
119711308Santhony.gutierrez@amd.comPacketPtr
119811308Santhony.gutierrez@amd.comGPUCoalescer::mapAddrToPkt(Addr address)
119911308Santhony.gutierrez@amd.com{
120011308Santhony.gutierrez@amd.com    RequestTable::iterator i = m_readRequestTable.find(address);
120111308Santhony.gutierrez@amd.com    assert(i != m_readRequestTable.end());
120211308Santhony.gutierrez@amd.com    GPUCoalescerRequest* request = i->second;
120311308Santhony.gutierrez@amd.com    return request->pkt;
120411308Santhony.gutierrez@amd.com}
120511308Santhony.gutierrez@amd.com
120611308Santhony.gutierrez@amd.comvoid
120711308Santhony.gutierrez@amd.comGPUCoalescer::recordMissLatency(GPUCoalescerRequest* srequest,
120811308Santhony.gutierrez@amd.com                                MachineType mach,
120911308Santhony.gutierrez@amd.com                                Cycles initialRequestTime,
121011308Santhony.gutierrez@amd.com                                Cycles forwardRequestTime,
121111308Santhony.gutierrez@amd.com                                Cycles firstResponseTime,
121211308Santhony.gutierrez@amd.com                                bool success, bool isRegion)
121311308Santhony.gutierrez@amd.com{
121411308Santhony.gutierrez@amd.com    RubyRequestType type = srequest->m_type;
121511308Santhony.gutierrez@amd.com    Cycles issued_time = srequest->issue_time;
121611308Santhony.gutierrez@amd.com    Cycles completion_time = curCycle();
121711308Santhony.gutierrez@amd.com    assert(completion_time >= issued_time);
121811308Santhony.gutierrez@amd.com    Cycles total_lat = completion_time - issued_time;
121911308Santhony.gutierrez@amd.com
122011308Santhony.gutierrez@amd.com    // cache stats (valid for RfO protocol only)
122111308Santhony.gutierrez@amd.com    if (mach == MachineType_TCP) {
122211308Santhony.gutierrez@amd.com        if (type == RubyRequestType_LD) {
122311308Santhony.gutierrez@amd.com            GPU_TCPLdHits++;
122411308Santhony.gutierrez@amd.com        } else {
122511308Santhony.gutierrez@amd.com            GPU_TCPStHits++;
122611308Santhony.gutierrez@amd.com        }
122711308Santhony.gutierrez@amd.com    } else if (mach == MachineType_L1Cache_wCC) {
122811308Santhony.gutierrez@amd.com        if (type == RubyRequestType_LD) {
122911308Santhony.gutierrez@amd.com            GPU_TCPLdTransfers++;
123011308Santhony.gutierrez@amd.com        } else {
123111308Santhony.gutierrez@amd.com            GPU_TCPStTransfers++;
123211308Santhony.gutierrez@amd.com        }
123311308Santhony.gutierrez@amd.com    } else if (mach == MachineType_TCC) {
123411308Santhony.gutierrez@amd.com        if (type == RubyRequestType_LD) {
123511308Santhony.gutierrez@amd.com            GPU_TCCLdHits++;
123611308Santhony.gutierrez@amd.com        } else {
123711308Santhony.gutierrez@amd.com            GPU_TCCStHits++;
123811308Santhony.gutierrez@amd.com        }
123911308Santhony.gutierrez@amd.com    } else  {
124011308Santhony.gutierrez@amd.com        if (type == RubyRequestType_LD) {
124111308Santhony.gutierrez@amd.com            GPU_LdMiss++;
124211308Santhony.gutierrez@amd.com        } else {
124311308Santhony.gutierrez@amd.com            GPU_StMiss++;
124411308Santhony.gutierrez@amd.com        }
124511308Santhony.gutierrez@amd.com    }
124611308Santhony.gutierrez@amd.com
124711308Santhony.gutierrez@amd.com    // Profile all access latency, even zero latency accesses
124811308Santhony.gutierrez@amd.com    m_latencyHist.sample(total_lat);
124911308Santhony.gutierrez@amd.com    m_typeLatencyHist[type]->sample(total_lat);
125011308Santhony.gutierrez@amd.com
125111308Santhony.gutierrez@amd.com    // Profile the miss latency for all non-zero demand misses
125211308Santhony.gutierrez@amd.com    if (total_lat != Cycles(0)) {
125311308Santhony.gutierrez@amd.com        m_missLatencyHist.sample(total_lat);
125411308Santhony.gutierrez@amd.com        m_missTypeLatencyHist[type]->sample(total_lat);
125511308Santhony.gutierrez@amd.com
125611308Santhony.gutierrez@amd.com        if (mach != MachineType_NUM) {
125711308Santhony.gutierrez@amd.com            m_missMachLatencyHist[mach]->sample(total_lat);
125811308Santhony.gutierrez@amd.com            m_missTypeMachLatencyHist[type][mach]->sample(total_lat);
125911308Santhony.gutierrez@amd.com
126011308Santhony.gutierrez@amd.com            if ((issued_time <= initialRequestTime) &&
126111308Santhony.gutierrez@amd.com                (initialRequestTime <= forwardRequestTime) &&
126211308Santhony.gutierrez@amd.com                (forwardRequestTime <= firstResponseTime) &&
126311308Santhony.gutierrez@amd.com                (firstResponseTime <= completion_time)) {
126411308Santhony.gutierrez@amd.com
126511308Santhony.gutierrez@amd.com                m_IssueToInitialDelayHist[mach]->sample(
126611308Santhony.gutierrez@amd.com                    initialRequestTime - issued_time);
126711308Santhony.gutierrez@amd.com                m_InitialToForwardDelayHist[mach]->sample(
126811308Santhony.gutierrez@amd.com                    forwardRequestTime - initialRequestTime);
126911308Santhony.gutierrez@amd.com                m_ForwardToFirstResponseDelayHist[mach]->sample(
127011308Santhony.gutierrez@amd.com                    firstResponseTime - forwardRequestTime);
127111308Santhony.gutierrez@amd.com                m_FirstResponseToCompletionDelayHist[mach]->sample(
127211308Santhony.gutierrez@amd.com                    completion_time - firstResponseTime);
127311308Santhony.gutierrez@amd.com            }
127411308Santhony.gutierrez@amd.com        }
127511308Santhony.gutierrez@amd.com
127611308Santhony.gutierrez@amd.com    }
127711308Santhony.gutierrez@amd.com
127811308Santhony.gutierrez@amd.com    DPRINTFR(ProtocolTrace, "%15s %3s %10s%20s %6s>%-6s %s %d cycles\n",
127911308Santhony.gutierrez@amd.com             curTick(), m_version, "Coal",
128011308Santhony.gutierrez@amd.com             success ? "Done" : "SC_Failed", "", "",
128111308Santhony.gutierrez@amd.com             printAddress(srequest->pkt->getAddr()), total_lat);
128211308Santhony.gutierrez@amd.com}
128311308Santhony.gutierrez@amd.com
128411308Santhony.gutierrez@amd.comvoid
128511308Santhony.gutierrez@amd.comGPUCoalescer::regStats()
128611308Santhony.gutierrez@amd.com{
128711308Santhony.gutierrez@amd.com    // These statistical variables are not for display.
128811308Santhony.gutierrez@amd.com    // The profiler will collate these across different
128911308Santhony.gutierrez@amd.com    // coalescers and display those collated statistics.
129011308Santhony.gutierrez@amd.com    m_outstandReqHist.init(10);
129111308Santhony.gutierrez@amd.com    m_latencyHist.init(10);
129211308Santhony.gutierrez@amd.com    m_missLatencyHist.init(10);
129311308Santhony.gutierrez@amd.com
129411308Santhony.gutierrez@amd.com    for (int i = 0; i < RubyRequestType_NUM; i++) {
129511308Santhony.gutierrez@amd.com        m_typeLatencyHist.push_back(new Stats::Histogram());
129611308Santhony.gutierrez@amd.com        m_typeLatencyHist[i]->init(10);
129711308Santhony.gutierrez@amd.com
129811308Santhony.gutierrez@amd.com        m_missTypeLatencyHist.push_back(new Stats::Histogram());
129911308Santhony.gutierrez@amd.com        m_missTypeLatencyHist[i]->init(10);
130011308Santhony.gutierrez@amd.com    }
130111308Santhony.gutierrez@amd.com
130211308Santhony.gutierrez@amd.com    for (int i = 0; i < MachineType_NUM; i++) {
130311308Santhony.gutierrez@amd.com        m_missMachLatencyHist.push_back(new Stats::Histogram());
130411308Santhony.gutierrez@amd.com        m_missMachLatencyHist[i]->init(10);
130511308Santhony.gutierrez@amd.com
130611308Santhony.gutierrez@amd.com        m_IssueToInitialDelayHist.push_back(new Stats::Histogram());
130711308Santhony.gutierrez@amd.com        m_IssueToInitialDelayHist[i]->init(10);
130811308Santhony.gutierrez@amd.com
130911308Santhony.gutierrez@amd.com        m_InitialToForwardDelayHist.push_back(new Stats::Histogram());
131011308Santhony.gutierrez@amd.com        m_InitialToForwardDelayHist[i]->init(10);
131111308Santhony.gutierrez@amd.com
131211308Santhony.gutierrez@amd.com        m_ForwardToFirstResponseDelayHist.push_back(new Stats::Histogram());
131311308Santhony.gutierrez@amd.com        m_ForwardToFirstResponseDelayHist[i]->init(10);
131411308Santhony.gutierrez@amd.com
131511308Santhony.gutierrez@amd.com        m_FirstResponseToCompletionDelayHist.push_back(new Stats::Histogram());
131611308Santhony.gutierrez@amd.com        m_FirstResponseToCompletionDelayHist[i]->init(10);
131711308Santhony.gutierrez@amd.com    }
131811308Santhony.gutierrez@amd.com
131911308Santhony.gutierrez@amd.com    for (int i = 0; i < RubyRequestType_NUM; i++) {
132011308Santhony.gutierrez@amd.com        m_missTypeMachLatencyHist.push_back(std::vector<Stats::Histogram *>());
132111308Santhony.gutierrez@amd.com
132211308Santhony.gutierrez@amd.com        for (int j = 0; j < MachineType_NUM; j++) {
132311308Santhony.gutierrez@amd.com            m_missTypeMachLatencyHist[i].push_back(new Stats::Histogram());
132411308Santhony.gutierrez@amd.com            m_missTypeMachLatencyHist[i][j]->init(10);
132511308Santhony.gutierrez@amd.com        }
132611308Santhony.gutierrez@amd.com    }
132711308Santhony.gutierrez@amd.com
132811308Santhony.gutierrez@amd.com    // GPU cache stats
132911308Santhony.gutierrez@amd.com    GPU_TCPLdHits
133011308Santhony.gutierrez@amd.com        .name(name() + ".gpu_tcp_ld_hits")
133111308Santhony.gutierrez@amd.com        .desc("loads that hit in the TCP")
133211308Santhony.gutierrez@amd.com        ;
133311308Santhony.gutierrez@amd.com    GPU_TCPLdTransfers
133411308Santhony.gutierrez@amd.com        .name(name() + ".gpu_tcp_ld_transfers")
133511308Santhony.gutierrez@amd.com        .desc("TCP to TCP load transfers")
133611308Santhony.gutierrez@amd.com        ;
133711308Santhony.gutierrez@amd.com    GPU_TCCLdHits
133811308Santhony.gutierrez@amd.com        .name(name() + ".gpu_tcc_ld_hits")
133911308Santhony.gutierrez@amd.com        .desc("loads that hit in the TCC")
134011308Santhony.gutierrez@amd.com        ;
134111308Santhony.gutierrez@amd.com    GPU_LdMiss
134211308Santhony.gutierrez@amd.com        .name(name() + ".gpu_ld_misses")
134311308Santhony.gutierrez@amd.com        .desc("loads that miss in the GPU")
134411308Santhony.gutierrez@amd.com        ;
134511308Santhony.gutierrez@amd.com
134611308Santhony.gutierrez@amd.com    GPU_TCPStHits
134711308Santhony.gutierrez@amd.com        .name(name() + ".gpu_tcp_st_hits")
134811308Santhony.gutierrez@amd.com        .desc("stores that hit in the TCP")
134911308Santhony.gutierrez@amd.com        ;
135011308Santhony.gutierrez@amd.com    GPU_TCPStTransfers
135111308Santhony.gutierrez@amd.com        .name(name() + ".gpu_tcp_st_transfers")
135211308Santhony.gutierrez@amd.com        .desc("TCP to TCP store transfers")
135311308Santhony.gutierrez@amd.com        ;
135411308Santhony.gutierrez@amd.com    GPU_TCCStHits
135511308Santhony.gutierrez@amd.com        .name(name() + ".gpu_tcc_st_hits")
135611308Santhony.gutierrez@amd.com        .desc("stores that hit in the TCC")
135711308Santhony.gutierrez@amd.com        ;
135811308Santhony.gutierrez@amd.com    GPU_StMiss
135911308Santhony.gutierrez@amd.com        .name(name() + ".gpu_st_misses")
136011308Santhony.gutierrez@amd.com        .desc("stores that miss in the GPU")
136111308Santhony.gutierrez@amd.com        ;
136211308Santhony.gutierrez@amd.com
136311308Santhony.gutierrez@amd.com    // CP cache stats
136411308Santhony.gutierrez@amd.com    CP_TCPLdHits
136511308Santhony.gutierrez@amd.com        .name(name() + ".cp_tcp_ld_hits")
136611308Santhony.gutierrez@amd.com        .desc("loads that hit in the TCP")
136711308Santhony.gutierrez@amd.com        ;
136811308Santhony.gutierrez@amd.com    CP_TCPLdTransfers
136911308Santhony.gutierrez@amd.com        .name(name() + ".cp_tcp_ld_transfers")
137011308Santhony.gutierrez@amd.com        .desc("TCP to TCP load transfers")
137111308Santhony.gutierrez@amd.com        ;
137211308Santhony.gutierrez@amd.com    CP_TCCLdHits
137311308Santhony.gutierrez@amd.com        .name(name() + ".cp_tcc_ld_hits")
137411308Santhony.gutierrez@amd.com        .desc("loads that hit in the TCC")
137511308Santhony.gutierrez@amd.com        ;
137611308Santhony.gutierrez@amd.com    CP_LdMiss
137711308Santhony.gutierrez@amd.com        .name(name() + ".cp_ld_misses")
137811308Santhony.gutierrez@amd.com        .desc("loads that miss in the GPU")
137911308Santhony.gutierrez@amd.com        ;
138011308Santhony.gutierrez@amd.com
138111308Santhony.gutierrez@amd.com    CP_TCPStHits
138211308Santhony.gutierrez@amd.com        .name(name() + ".cp_tcp_st_hits")
138311308Santhony.gutierrez@amd.com        .desc("stores that hit in the TCP")
138411308Santhony.gutierrez@amd.com        ;
138511308Santhony.gutierrez@amd.com    CP_TCPStTransfers
138611308Santhony.gutierrez@amd.com        .name(name() + ".cp_tcp_st_transfers")
138711308Santhony.gutierrez@amd.com        .desc("TCP to TCP store transfers")
138811308Santhony.gutierrez@amd.com        ;
138911308Santhony.gutierrez@amd.com    CP_TCCStHits
139011308Santhony.gutierrez@amd.com        .name(name() + ".cp_tcc_st_hits")
139111308Santhony.gutierrez@amd.com        .desc("stores that hit in the TCC")
139211308Santhony.gutierrez@amd.com        ;
139311308Santhony.gutierrez@amd.com    CP_StMiss
139411308Santhony.gutierrez@amd.com        .name(name() + ".cp_st_misses")
139511308Santhony.gutierrez@amd.com        .desc("stores that miss in the GPU")
139611308Santhony.gutierrez@amd.com        ;
139711308Santhony.gutierrez@amd.com}
1398