1/* 2 * Copyright (c) 2011-2015 Advanced Micro Devices, Inc. 3 * All rights reserved. 4 * 5 * For use for simulation and test purposes only 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions are met: 9 * 10 * 1. Redistributions of source code must retain the above copyright notice, 11 * this list of conditions and the following disclaimer. 12 * 13 * 2. Redistributions in binary form must reproduce the above copyright notice, 14 * this list of conditions and the following disclaimer in the documentation 15 * and/or other materials provided with the distribution. 16 * 17 * 3. Neither the name of the copyright holder nor the names of its contributors 18 * may be used to endorse or promote products derived from this software 19 * without specific prior written permission. 20 * 21 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 22 * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 23 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 24 * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE 25 * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 26 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 27 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 28 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 29 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 30 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 31 * POSSIBILITY OF SUCH DAMAGE. 32 * 33 * Author: Steve Reinhardt 34 */ 35 36#ifndef __SHADER_HH__ 37#define __SHADER_HH__ 38 39#include <functional> 40#include <string> 41 42#include "arch/isa.hh" 43#include "arch/isa_traits.hh" 44#include "base/types.hh" 45#include "cpu/simple/atomic.hh" 46#include "cpu/simple/timing.hh" 47#include "cpu/simple_thread.hh" 48#include "cpu/thread_context.hh" 49#include "cpu/thread_state.hh" 50#include "enums/MemType.hh" 51#include "gpu-compute/compute_unit.hh" 52#include "gpu-compute/gpu_tlb.hh" 53#include "gpu-compute/lds_state.hh" 54#include "gpu-compute/qstruct.hh" 55#include "mem/page_table.hh" 56#include "mem/port.hh" 57#include "mem/request.hh" 58#include "params/Shader.hh" 59#include "sim/faults.hh" 60#include "sim/process.hh" 61#include "sim/sim_object.hh" 62 63class BaseTLB; 64class GpuDispatcher; 65 66namespace TheISA 67{ 68 class GpuTLB; 69} 70 71static const int LDS_SIZE = 65536; 72 73// Class Shader: This describes a single shader instance. Most 74// configurations will only have a single shader. 75 76class Shader : public ClockedObject 77{ 78 protected: 79 // Shader's clock period in terms of number of ticks of curTime, 80 // aka global simulation clock 81 Tick clock; 82 83 public: 84 typedef ShaderParams Params; 85 enum hsail_mode_e {SIMT,VECTOR_SCALAR}; 86 87 // clock related functions ; maps to-and-from 88 // Simulation ticks and shader clocks. 89 Tick frequency() const { return SimClock::Frequency / clock; } 90 91 Tick ticks(int numCycles) const { return (Tick)clock * numCycles; } 92 93 Tick getClock() const { return clock; } 94 Tick curCycle() const { return curTick() / clock; } 95 Tick tickToCycles(Tick val) const { return val / clock;} 96 97 98 SimpleThread *cpuThread; 99 ThreadContext *gpuTc; 100 BaseCPU *cpuPointer; 101 102 void processTick(); 103 EventFunctionWrapper tickEvent; 104 105 // is this simulation going to be timing mode in the memory? 106 bool timingSim; 107 hsail_mode_e hsail_mode; 108 109 // If set, issue acq packet @ kernel launch 110 int impl_kern_boundary_sync; 111 // If set, generate a separate packet for acquire/release on 112 // ld_acquire/st_release/atomic operations 113 int separate_acquire_release; 114 // If set, fetch returns may be coissued with instructions 115 int coissue_return; 116 // If set, always dump all 64 gprs to trace 117 int trace_vgpr_all; 118 // Number of cu units in the shader 119 int n_cu; 120 // Number of wavefront slots per cu 121 int n_wf; 122 // The size of global memory 123 int globalMemSize; 124 125 /* 126 * Bytes/work-item for call instruction 127 * The number of arguments for an hsail function will 128 * vary. We simply determine the maximum # of arguments 129 * required by any hsail function up front before the 130 * simulation (during parsing of the Brig) and record 131 * that number here. 132 */ 133 int funcargs_size; 134 135 // Tracks CU that rr dispatcher should attempt scheduling 136 int nextSchedCu; 137 138 // Size of scheduled add queue 139 uint32_t sa_n; 140 141 // Pointer to value to be increments 142 std::vector<uint32_t*> sa_val; 143 // When to do the increment 144 std::vector<uint64_t> sa_when; 145 // Amount to increment by 146 std::vector<int32_t> sa_x; 147 148 // List of Compute Units (CU's) 149 std::vector<ComputeUnit*> cuList; 150 151 uint64_t tick_cnt; 152 uint64_t box_tick_cnt; 153 uint64_t start_tick_cnt; 154 155 GpuDispatcher *dispatcher; 156 157 Shader(const Params *p); 158 ~Shader(); 159 virtual void init(); 160 161 // Run shader 162 void exec(); 163 164 // Check to see if shader is busy 165 bool busy(); 166 167 // Schedule a 32-bit value to be incremented some time in the future 168 void ScheduleAdd(uint32_t *val, Tick when, int x); 169 bool processTimingPacket(PacketPtr pkt); 170 171 void AccessMem(uint64_t address, void *ptr, uint32_t size, int cu_id, 172 MemCmd cmd, bool suppress_func_errors); 173 174 void ReadMem(uint64_t address, void *ptr, uint32_t sz, int cu_id); 175 176 void ReadMem(uint64_t address, void *ptr, uint32_t sz, int cu_id, 177 bool suppress_func_errors); 178 179 void WriteMem(uint64_t address, void *ptr, uint32_t sz, int cu_id); 180 181 void WriteMem(uint64_t address, void *ptr, uint32_t sz, int cu_id, 182 bool suppress_func_errors); 183 184 void doFunctionalAccess(const RequestPtr &req, MemCmd cmd, void *data, 185 bool suppress_func_errors, int cu_id); 186 187 void 188 registerCU(int cu_id, ComputeUnit *compute_unit) 189 { 190 cuList[cu_id] = compute_unit; 191 } 192 193 void handshake(GpuDispatcher *dispatcher); 194 bool dispatch_workgroups(NDRange *ndr); 195 Addr mmap(int length); 196 void functionalTLBAccess(PacketPtr pkt, int cu_id, BaseTLB::Mode mode); 197 void updateContext(int cid); 198 void hostWakeUp(BaseCPU *cpu); 199}; 200 201#endif // __SHADER_HH__ 202