GPU.py revision 11534
12736Sktlim@umich.edu# 22736Sktlim@umich.edu# Copyright (c) 2015 Advanced Micro Devices, Inc. 32736Sktlim@umich.edu# All rights reserved. 42736Sktlim@umich.edu# 52736Sktlim@umich.edu# For use for simulation and test purposes only 62736Sktlim@umich.edu# 72736Sktlim@umich.edu# Redistribution and use in source and binary forms, with or without 82736Sktlim@umich.edu# modification, are permitted provided that the following conditions are met: 92736Sktlim@umich.edu# 102736Sktlim@umich.edu# 1. Redistributions of source code must retain the above copyright notice, 112736Sktlim@umich.edu# this list of conditions and the following disclaimer. 122736Sktlim@umich.edu# 132736Sktlim@umich.edu# 2. Redistributions in binary form must reproduce the above copyright notice, 142736Sktlim@umich.edu# this list of conditions and the following disclaimer in the documentation 152736Sktlim@umich.edu# and/or other materials provided with the distribution. 162736Sktlim@umich.edu# 172736Sktlim@umich.edu# 3. Neither the name of the copyright holder nor the names of its contributors 182736Sktlim@umich.edu# may be used to endorse or promote products derived from this software 192736Sktlim@umich.edu# without specific prior written permission. 202736Sktlim@umich.edu# 212736Sktlim@umich.edu# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 222736Sktlim@umich.edu# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 232736Sktlim@umich.edu# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 242736Sktlim@umich.edu# ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE 252736Sktlim@umich.edu# LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 262736Sktlim@umich.edu# CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 272736Sktlim@umich.edu# SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 282736Sktlim@umich.edu# INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 292736Sktlim@umich.edu# CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 302736Sktlim@umich.edu# ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 312736Sktlim@umich.edu# POSSIBILITY OF SUCH DAMAGE. 322736Sktlim@umich.edu# 332736Sktlim@umich.edu# Author: Steve Reinhardt 342736Sktlim@umich.edu# 354762Snate@binkert.org 364762Snate@binkert.orgfrom ClockedObject import ClockedObject 372736Sktlim@umich.edufrom Device import DmaDevice 382736Sktlim@umich.edufrom m5.defines import buildEnv 392736Sktlim@umich.edufrom m5.params import * 402736Sktlim@umich.edufrom m5.proxy import * 412736Sktlim@umich.edufrom m5.SimObject import SimObject 422736Sktlim@umich.edufrom MemObject import MemObject 432736Sktlim@umich.edufrom Process import EmulatedDriver 442736Sktlim@umich.edufrom Bridge import Bridge 452736Sktlim@umich.edufrom LdsState import LdsState 462736Sktlim@umich.edu 472736Sktlim@umich.educlass PrefetchType(Enum): vals = [ 482736Sktlim@umich.edu 'PF_CU', 492736Sktlim@umich.edu 'PF_PHASE', 502736Sktlim@umich.edu 'PF_WF', 512736Sktlim@umich.edu 'PF_STRIDE', 522736Sktlim@umich.edu 'PF_END', 532736Sktlim@umich.edu ] 542736Sktlim@umich.edu 552736Sktlim@umich.educlass VectorRegisterFile(SimObject): 562736Sktlim@umich.edu type = 'VectorRegisterFile' 572736Sktlim@umich.edu cxx_class = 'VectorRegisterFile' 582736Sktlim@umich.edu cxx_header = 'gpu-compute/vector_register_file.hh' 592736Sktlim@umich.edu 602736Sktlim@umich.edu simd_id = Param.Int(0, 'SIMD ID associated with this VRF') 612736Sktlim@umich.edu num_regs_per_simd = Param.Int(2048, 'number of vector registers per SIMD') 622736Sktlim@umich.edu wfSize = Param.Int(64, 'Wavefront size (in work items)') 632736Sktlim@umich.edu min_alloc = Param.Int(4, 'min number of VGPRs allocated per WF') 642736Sktlim@umich.edu 652736Sktlim@umich.educlass Wavefront(SimObject): 662736Sktlim@umich.edu type = 'Wavefront' 672736Sktlim@umich.edu cxx_class = 'Wavefront' 682736Sktlim@umich.edu cxx_header = 'gpu-compute/wavefront.hh' 692736Sktlim@umich.edu 702736Sktlim@umich.edu simdId = Param.Int('SIMD id (0-ComputeUnit.num_SIMDs)') 712736Sktlim@umich.edu wf_slot_id = Param.Int('wavefront id (0-ComputeUnit.max_wfs)') 722736Sktlim@umich.edu wfSize = Param.Int(64, 'Wavefront size (in work items)') 732736Sktlim@umich.edu 742736Sktlim@umich.educlass ComputeUnit(MemObject): 752736Sktlim@umich.edu type = 'ComputeUnit' 762736Sktlim@umich.edu cxx_class = 'ComputeUnit' 772736Sktlim@umich.edu cxx_header = 'gpu-compute/compute_unit.hh' 782736Sktlim@umich.edu 792736Sktlim@umich.edu wavefronts = VectorParam.Wavefront('Number of wavefronts') 802736Sktlim@umich.edu wfSize = Param.Int(64, 'Wavefront size (in work items)') 812736Sktlim@umich.edu num_SIMDs = Param.Int(4, 'number of SIMD units per CU') 822736Sktlim@umich.edu 832736Sktlim@umich.edu spbypass_pipe_length = Param.Int(4, 'vector ALU Single Precision bypass '\ 842736Sktlim@umich.edu 'latency') 852736Sktlim@umich.edu 862736Sktlim@umich.edu dpbypass_pipe_length = Param.Int(8, 'vector ALU Double Precision bypass '\ 872736Sktlim@umich.edu 'latency') 882736Sktlim@umich.edu 892736Sktlim@umich.edu issue_period = Param.Int(4, 'number of cycles per issue period') 902736Sktlim@umich.edu num_global_mem_pipes = Param.Int(1,'number of global memory pipes per CU') 912736Sktlim@umich.edu num_shared_mem_pipes = Param.Int(1,'number of shared memory pipes per CU') 922736Sktlim@umich.edu n_wf = Param.Int(1, 'Number of wavefront slots per SIMD') 932736Sktlim@umich.edu mem_req_latency = Param.Int(9, "Latency for request from the cu to ruby. "\ 942736Sktlim@umich.edu "Represents the pipeline to reach the TCP and "\ 952736Sktlim@umich.edu "specified in GPU clock cycles") 962736Sktlim@umich.edu mem_resp_latency = Param.Int(9, "Latency for responses from ruby to the "\ 972736Sktlim@umich.edu "cu. Represents the pipeline between the TCP "\ 982736Sktlim@umich.edu "and cu as well as TCP data array access. "\ 992736Sktlim@umich.edu "Specified in GPU clock cycles") 1002736Sktlim@umich.edu system = Param.System(Parent.any, "system object") 1012736Sktlim@umich.edu cu_id = Param.Int('CU id') 1022736Sktlim@umich.edu vrf_to_coalescer_bus_width = Param.Int(32, "VRF->Coalescer data bus width "\ 1032736Sktlim@umich.edu "in bytes") 1042736Sktlim@umich.edu coalescer_to_vrf_bus_width = Param.Int(32, "Coalescer->VRF data bus width "\ 1052736Sktlim@umich.edu "in bytes") 1062736Sktlim@umich.edu 1072736Sktlim@umich.edu memory_port = VectorMasterPort("Port to the memory system") 1082736Sktlim@umich.edu translation_port = VectorMasterPort('Port to the TLB hierarchy') 1092736Sktlim@umich.edu sqc_port = MasterPort("Port to the SQC (I-cache") 1102736Sktlim@umich.edu sqc_tlb_port = MasterPort("Port to the TLB for the SQC (I-cache)") 1112736Sktlim@umich.edu perLaneTLB = Param.Bool(False, "enable per-lane TLB") 1122736Sktlim@umich.edu prefetch_depth = Param.Int(0, "Number of prefetches triggered at a time"\ 1132736Sktlim@umich.edu "(0 turns off prefetching)") 1142736Sktlim@umich.edu prefetch_stride = Param.Int(1, "Fixed Prefetch Stride (1 means next-page)") 1152736Sktlim@umich.edu prefetch_prev_type = Param.PrefetchType('PF_PHASE', "Prefetch the stride "\ 1162736Sktlim@umich.edu "from last mem req in lane of "\ 1172736Sktlim@umich.edu "CU|Phase|Wavefront") 1182736Sktlim@umich.edu execPolicy = Param.String("OLDEST-FIRST", "WF execution selection policy"); 1192736Sktlim@umich.edu xactCasMode = Param.Bool(False, "Behavior of xact_cas_load magic instr."); 1204762Snate@binkert.org debugSegFault = Param.Bool(False, "enable debugging GPU seg faults") 1214762Snate@binkert.org functionalTLB = Param.Bool(False, "Assume TLB causes no delay") 1222736Sktlim@umich.edu 1234762Snate@binkert.org localMemBarrier = Param.Bool(False, "Assume Barriers do not wait on "\ 1242736Sktlim@umich.edu "kernel end") 1252736Sktlim@umich.edu 1262736Sktlim@umich.edu countPages = Param.Bool(False, "Generate per-CU file of all pages touched "\ 1272736Sktlim@umich.edu "and how many times") 1282736Sktlim@umich.edu global_mem_queue_size = Param.Int(256, "Number of entries in the global " 1294762Snate@binkert.org "memory pipeline's queues") 1304762Snate@binkert.org local_mem_queue_size = Param.Int(256, "Number of entries in the local " 1312736Sktlim@umich.edu "memory pipeline's queues") 1324762Snate@binkert.org ldsBus = Bridge() # the bridge between the CU and its LDS 1332736Sktlim@umich.edu ldsPort = MasterPort("The port that goes to the LDS") 134 localDataStore = Param.LdsState("the LDS for this CU") 135 136 vector_register_file = VectorParam.VectorRegisterFile("Vector register "\ 137 "file") 138 139class Shader(ClockedObject): 140 type = 'Shader' 141 cxx_class = 'Shader' 142 cxx_header = 'gpu-compute/shader.hh' 143 144 CUs = VectorParam.ComputeUnit('Number of compute units') 145 n_wf = Param.Int(1, 'Number of wavefront slots per SIMD') 146 impl_kern_boundary_sync = Param.Bool(True, """Insert acq/rel packets into 147 ruby at kernel boundaries""") 148 separate_acquire_release = Param.Bool(False, 149 """Do ld_acquire/st_release generate separate requests for the 150 acquire and release?""") 151 globalmem = Param.MemorySize('64kB', 'Memory size') 152 timing = Param.Bool(False, 'timing memory accesses') 153 154 cpu_pointer = Param.BaseCPU(NULL, "pointer to base CPU") 155 translation = Param.Bool(False, "address translation"); 156 157class ClDriver(EmulatedDriver): 158 type = 'ClDriver' 159 cxx_header = 'gpu-compute/cl_driver.hh' 160 codefile = VectorParam.String('code file name(s)') 161 162class GpuDispatcher(DmaDevice): 163 type = 'GpuDispatcher' 164 cxx_header = 'gpu-compute/dispatcher.hh' 165 # put at 8GB line for now 166 pio_addr = Param.Addr(0x200000000, "Device Address") 167 pio_latency = Param.Latency('1ns', "Programmed IO latency") 168 shader_pointer = Param.Shader('pointer to shader') 169 translation_port = MasterPort('Port to the dispatcher TLB') 170 cpu = Param.BaseCPU("CPU to wake up on kernel completion") 171 172 cl_driver = Param.ClDriver('pointer to driver') 173 174class OpType(Enum): vals = [ 175 'OT_NULL', 176 'OT_ALU', 177 'OT_SPECIAL', 178 'OT_GLOBAL_READ', 179 'OT_GLOBAL_WRITE', 180 'OT_GLOBAL_ATOMIC', 181 'OT_GLOBAL_HIST', 182 'OT_GLOBAL_LDAS', 183 'OT_SHARED_READ', 184 'OT_SHARED_WRITE', 185 'OT_SHARED_ATOMIC', 186 'OT_SHARED_HIST', 187 'OT_SHARED_LDAS', 188 'OT_PRIVATE_READ', 189 'OT_PRIVATE_WRITE', 190 'OT_PRIVATE_ATOMIC', 191 'OT_PRIVATE_HIST', 192 'OT_PRIVATE_LDAS', 193 'OT_SPILL_READ', 194 'OT_SPILL_WRITE', 195 'OT_SPILL_ATOMIC', 196 'OT_SPILL_HIST', 197 'OT_SPILL_LDAS', 198 'OT_READONLY_READ', 199 'OT_READONLY_WRITE', 200 'OT_READONLY_ATOMIC', 201 'OT_READONLY_HIST', 202 'OT_READONLY_LDAS', 203 'OT_FLAT_READ', 204 'OT_FLAT_WRITE', 205 'OT_FLAT_ATOMIC', 206 'OT_FLAT_HIST', 207 'OT_FLAT_LDAS', 208 'OT_KERN_READ', 209 'OT_BRANCH', 210 211 # note: Only the OT_BOTH_MEMFENCE seems to be supported in the 1.0F version 212 # of the compiler. 213 'OT_SHARED_MEMFENCE', 214 'OT_GLOBAL_MEMFENCE', 215 'OT_BOTH_MEMFENCE', 216 217 'OT_BARRIER', 218 'OT_PRINT', 219 'OT_RET', 220 'OT_NOP', 221 'OT_ARG' 222 ] 223 224class MemType(Enum): vals = [ 225 'M_U8', 226 'M_U16', 227 'M_U32', 228 'M_U64', 229 'M_S8', 230 'M_S16', 231 'M_S32', 232 'M_S64', 233 'M_F16', 234 'M_F32', 235 'M_F64', 236 ] 237 238class MemOpType(Enum): vals = [ 239 'MO_LD', 240 'MO_ST', 241 'MO_LDAS', 242 'MO_LDA', 243 'MO_AAND', 244 'MO_AOR', 245 'MO_AXOR', 246 'MO_ACAS', 247 'MO_AEXCH', 248 'MO_AADD', 249 'MO_ASUB', 250 'MO_AINC', 251 'MO_ADEC', 252 'MO_AMAX', 253 'MO_AMIN', 254 'MO_ANRAND', 255 'MO_ANROR', 256 'MO_ANRXOR', 257 'MO_ANRCAS', 258 'MO_ANREXCH', 259 'MO_ANRADD', 260 'MO_ANRSUB', 261 'MO_ANRINC', 262 'MO_ANRDEC', 263 'MO_ANRMAX', 264 'MO_ANRMIN', 265 'MO_HAND', 266 'MO_HOR', 267 'MO_HXOR', 268 'MO_HCAS', 269 'MO_HEXCH', 270 'MO_HADD', 271 'MO_HSUB', 272 'MO_HINC', 273 'MO_HDEC', 274 'MO_HMAX', 275 'MO_HMIN', 276 'MO_UNDEF' 277 ] 278 279class StorageClassType(Enum): vals = [ 280 'SC_SPILL', 281 'SC_GLOBAL', 282 'SC_SHARED', 283 'SC_PRIVATE', 284 'SC_READONLY', 285 'SC_KERNARG', 286 'SC_NONE', 287 ] 288 289class RegisterType(Enum): vals = [ 290 'RT_VECTOR', 291 'RT_SCALAR', 292 'RT_CONDITION', 293 'RT_HARDWARE', 294 'RT_NONE', 295 ] 296 297class GenericMemoryOrder(Enum): vals = [ 298 'MEMORY_ORDER_NONE', 299 'MEMORY_ORDER_RELAXED', 300 'MEMORY_ORDER_SC_ACQUIRE', 301 'MEMORY_ORDER_SC_RELEASE', 302 'MEMORY_ORDER_SC_ACQUIRE_RELEASE', 303 ] 304 305class GenericMemoryScope(Enum): vals = [ 306 'MEMORY_SCOPE_NONE', 307 'MEMORY_SCOPE_WORKITEM', 308 'MEMORY_SCOPE_WAVEFRONT', 309 'MEMORY_SCOPE_WORKGROUP', 310 'MEMORY_SCOPE_DEVICE', 311 'MEMORY_SCOPE_SYSTEM', 312 ] 313