GPU.py revision 11534
12736Sktlim@umich.edu#
22736Sktlim@umich.edu#  Copyright (c) 2015 Advanced Micro Devices, Inc.
32736Sktlim@umich.edu#  All rights reserved.
42736Sktlim@umich.edu#
52736Sktlim@umich.edu#  For use for simulation and test purposes only
62736Sktlim@umich.edu#
72736Sktlim@umich.edu#  Redistribution and use in source and binary forms, with or without
82736Sktlim@umich.edu#  modification, are permitted provided that the following conditions are met:
92736Sktlim@umich.edu#
102736Sktlim@umich.edu#  1. Redistributions of source code must retain the above copyright notice,
112736Sktlim@umich.edu#  this list of conditions and the following disclaimer.
122736Sktlim@umich.edu#
132736Sktlim@umich.edu#  2. Redistributions in binary form must reproduce the above copyright notice,
142736Sktlim@umich.edu#  this list of conditions and the following disclaimer in the documentation
152736Sktlim@umich.edu#  and/or other materials provided with the distribution.
162736Sktlim@umich.edu#
172736Sktlim@umich.edu#  3. Neither the name of the copyright holder nor the names of its contributors
182736Sktlim@umich.edu#  may be used to endorse or promote products derived from this software
192736Sktlim@umich.edu#  without specific prior written permission.
202736Sktlim@umich.edu#
212736Sktlim@umich.edu#  THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
222736Sktlim@umich.edu#  AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
232736Sktlim@umich.edu#  IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
242736Sktlim@umich.edu#  ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
252736Sktlim@umich.edu#  LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
262736Sktlim@umich.edu#  CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
272736Sktlim@umich.edu#  SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
282736Sktlim@umich.edu#  INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
292736Sktlim@umich.edu#  CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
302736Sktlim@umich.edu#  ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
312736Sktlim@umich.edu#  POSSIBILITY OF SUCH DAMAGE.
322736Sktlim@umich.edu#
332736Sktlim@umich.edu#  Author: Steve Reinhardt
342736Sktlim@umich.edu#
354762Snate@binkert.org
364762Snate@binkert.orgfrom ClockedObject import ClockedObject
372736Sktlim@umich.edufrom Device import DmaDevice
382736Sktlim@umich.edufrom m5.defines import buildEnv
392736Sktlim@umich.edufrom m5.params import *
402736Sktlim@umich.edufrom m5.proxy import *
412736Sktlim@umich.edufrom m5.SimObject import SimObject
422736Sktlim@umich.edufrom MemObject import MemObject
432736Sktlim@umich.edufrom Process import EmulatedDriver
442736Sktlim@umich.edufrom Bridge import Bridge
452736Sktlim@umich.edufrom LdsState import LdsState
462736Sktlim@umich.edu
472736Sktlim@umich.educlass PrefetchType(Enum): vals = [
482736Sktlim@umich.edu    'PF_CU',
492736Sktlim@umich.edu    'PF_PHASE',
502736Sktlim@umich.edu    'PF_WF',
512736Sktlim@umich.edu    'PF_STRIDE',
522736Sktlim@umich.edu    'PF_END',
532736Sktlim@umich.edu    ]
542736Sktlim@umich.edu
552736Sktlim@umich.educlass VectorRegisterFile(SimObject):
562736Sktlim@umich.edu    type = 'VectorRegisterFile'
572736Sktlim@umich.edu    cxx_class = 'VectorRegisterFile'
582736Sktlim@umich.edu    cxx_header = 'gpu-compute/vector_register_file.hh'
592736Sktlim@umich.edu
602736Sktlim@umich.edu    simd_id = Param.Int(0, 'SIMD ID associated with this VRF')
612736Sktlim@umich.edu    num_regs_per_simd = Param.Int(2048, 'number of vector registers per SIMD')
622736Sktlim@umich.edu    wfSize = Param.Int(64, 'Wavefront size (in work items)')
632736Sktlim@umich.edu    min_alloc = Param.Int(4, 'min number of VGPRs allocated per WF')
642736Sktlim@umich.edu
652736Sktlim@umich.educlass Wavefront(SimObject):
662736Sktlim@umich.edu    type = 'Wavefront'
672736Sktlim@umich.edu    cxx_class = 'Wavefront'
682736Sktlim@umich.edu    cxx_header = 'gpu-compute/wavefront.hh'
692736Sktlim@umich.edu
702736Sktlim@umich.edu    simdId = Param.Int('SIMD id (0-ComputeUnit.num_SIMDs)')
712736Sktlim@umich.edu    wf_slot_id = Param.Int('wavefront id (0-ComputeUnit.max_wfs)')
722736Sktlim@umich.edu    wfSize = Param.Int(64, 'Wavefront size (in work items)')
732736Sktlim@umich.edu
742736Sktlim@umich.educlass ComputeUnit(MemObject):
752736Sktlim@umich.edu    type = 'ComputeUnit'
762736Sktlim@umich.edu    cxx_class = 'ComputeUnit'
772736Sktlim@umich.edu    cxx_header = 'gpu-compute/compute_unit.hh'
782736Sktlim@umich.edu
792736Sktlim@umich.edu    wavefronts = VectorParam.Wavefront('Number of wavefronts')
802736Sktlim@umich.edu    wfSize = Param.Int(64, 'Wavefront size (in work items)')
812736Sktlim@umich.edu    num_SIMDs = Param.Int(4, 'number of SIMD units per CU')
822736Sktlim@umich.edu
832736Sktlim@umich.edu    spbypass_pipe_length = Param.Int(4, 'vector ALU Single Precision bypass '\
842736Sktlim@umich.edu                                        'latency')
852736Sktlim@umich.edu
862736Sktlim@umich.edu    dpbypass_pipe_length = Param.Int(8, 'vector ALU Double Precision bypass '\
872736Sktlim@umich.edu                                        'latency')
882736Sktlim@umich.edu
892736Sktlim@umich.edu    issue_period = Param.Int(4, 'number of cycles per issue period')
902736Sktlim@umich.edu    num_global_mem_pipes = Param.Int(1,'number of global memory pipes per CU')
912736Sktlim@umich.edu    num_shared_mem_pipes = Param.Int(1,'number of shared memory pipes per CU')
922736Sktlim@umich.edu    n_wf = Param.Int(1, 'Number of wavefront slots per SIMD')
932736Sktlim@umich.edu    mem_req_latency = Param.Int(9, "Latency for request from the cu to ruby. "\
942736Sktlim@umich.edu                                "Represents the pipeline to reach the TCP and "\
952736Sktlim@umich.edu                                "specified in GPU clock cycles")
962736Sktlim@umich.edu    mem_resp_latency = Param.Int(9, "Latency for responses from ruby to the "\
972736Sktlim@umich.edu                                 "cu. Represents the pipeline between the TCP "\
982736Sktlim@umich.edu                                 "and cu as well as TCP data array access. "\
992736Sktlim@umich.edu                                 "Specified in GPU clock cycles")
1002736Sktlim@umich.edu    system = Param.System(Parent.any, "system object")
1012736Sktlim@umich.edu    cu_id = Param.Int('CU id')
1022736Sktlim@umich.edu    vrf_to_coalescer_bus_width = Param.Int(32, "VRF->Coalescer data bus width "\
1032736Sktlim@umich.edu                                           "in bytes")
1042736Sktlim@umich.edu    coalescer_to_vrf_bus_width = Param.Int(32, "Coalescer->VRF data bus width "\
1052736Sktlim@umich.edu                                           "in bytes")
1062736Sktlim@umich.edu
1072736Sktlim@umich.edu    memory_port = VectorMasterPort("Port to the memory system")
1082736Sktlim@umich.edu    translation_port = VectorMasterPort('Port to the TLB hierarchy')
1092736Sktlim@umich.edu    sqc_port = MasterPort("Port to the SQC (I-cache")
1102736Sktlim@umich.edu    sqc_tlb_port = MasterPort("Port to the TLB for the SQC (I-cache)")
1112736Sktlim@umich.edu    perLaneTLB = Param.Bool(False, "enable per-lane TLB")
1122736Sktlim@umich.edu    prefetch_depth = Param.Int(0, "Number of prefetches triggered at a time"\
1132736Sktlim@umich.edu                               "(0 turns off prefetching)")
1142736Sktlim@umich.edu    prefetch_stride = Param.Int(1, "Fixed Prefetch Stride (1 means next-page)")
1152736Sktlim@umich.edu    prefetch_prev_type = Param.PrefetchType('PF_PHASE', "Prefetch the stride "\
1162736Sktlim@umich.edu                                            "from last mem req in lane of "\
1172736Sktlim@umich.edu                                            "CU|Phase|Wavefront")
1182736Sktlim@umich.edu    execPolicy = Param.String("OLDEST-FIRST", "WF execution selection policy");
1192736Sktlim@umich.edu    xactCasMode = Param.Bool(False, "Behavior of xact_cas_load magic instr.");
1204762Snate@binkert.org    debugSegFault = Param.Bool(False, "enable debugging GPU seg faults")
1214762Snate@binkert.org    functionalTLB = Param.Bool(False, "Assume TLB causes no delay")
1222736Sktlim@umich.edu
1234762Snate@binkert.org    localMemBarrier = Param.Bool(False, "Assume Barriers do not wait on "\
1242736Sktlim@umich.edu                                        "kernel end")
1252736Sktlim@umich.edu
1262736Sktlim@umich.edu    countPages = Param.Bool(False, "Generate per-CU file of all pages touched "\
1272736Sktlim@umich.edu                                   "and how many times")
1282736Sktlim@umich.edu    global_mem_queue_size = Param.Int(256, "Number of entries in the global "
1294762Snate@binkert.org                                      "memory pipeline's queues")
1304762Snate@binkert.org    local_mem_queue_size = Param.Int(256, "Number of entries in the local "
1312736Sktlim@umich.edu                                      "memory pipeline's queues")
1324762Snate@binkert.org    ldsBus = Bridge() # the bridge between the CU and its LDS
1332736Sktlim@umich.edu    ldsPort = MasterPort("The port that goes to the LDS")
134    localDataStore = Param.LdsState("the LDS for this CU")
135
136    vector_register_file = VectorParam.VectorRegisterFile("Vector register "\
137                                                          "file")
138
139class Shader(ClockedObject):
140    type = 'Shader'
141    cxx_class = 'Shader'
142    cxx_header = 'gpu-compute/shader.hh'
143
144    CUs = VectorParam.ComputeUnit('Number of compute units')
145    n_wf = Param.Int(1, 'Number of wavefront slots per SIMD')
146    impl_kern_boundary_sync = Param.Bool(True, """Insert acq/rel packets into
147                                                  ruby at kernel boundaries""")
148    separate_acquire_release = Param.Bool(False,
149        """Do ld_acquire/st_release generate separate requests for the
150        acquire and release?""")
151    globalmem = Param.MemorySize('64kB', 'Memory size')
152    timing = Param.Bool(False, 'timing memory accesses')
153
154    cpu_pointer = Param.BaseCPU(NULL, "pointer to base CPU")
155    translation = Param.Bool(False, "address translation");
156
157class ClDriver(EmulatedDriver):
158    type = 'ClDriver'
159    cxx_header = 'gpu-compute/cl_driver.hh'
160    codefile = VectorParam.String('code file name(s)')
161
162class GpuDispatcher(DmaDevice):
163    type = 'GpuDispatcher'
164    cxx_header = 'gpu-compute/dispatcher.hh'
165    # put at 8GB line for now
166    pio_addr = Param.Addr(0x200000000, "Device Address")
167    pio_latency = Param.Latency('1ns', "Programmed IO latency")
168    shader_pointer = Param.Shader('pointer to shader')
169    translation_port = MasterPort('Port to the dispatcher TLB')
170    cpu = Param.BaseCPU("CPU to wake up on kernel completion")
171
172    cl_driver = Param.ClDriver('pointer to driver')
173
174class OpType(Enum): vals = [
175    'OT_NULL',
176    'OT_ALU',
177    'OT_SPECIAL',
178    'OT_GLOBAL_READ',
179    'OT_GLOBAL_WRITE',
180    'OT_GLOBAL_ATOMIC',
181    'OT_GLOBAL_HIST',
182    'OT_GLOBAL_LDAS',
183    'OT_SHARED_READ',
184    'OT_SHARED_WRITE',
185    'OT_SHARED_ATOMIC',
186    'OT_SHARED_HIST',
187    'OT_SHARED_LDAS',
188    'OT_PRIVATE_READ',
189    'OT_PRIVATE_WRITE',
190    'OT_PRIVATE_ATOMIC',
191    'OT_PRIVATE_HIST',
192    'OT_PRIVATE_LDAS',
193    'OT_SPILL_READ',
194    'OT_SPILL_WRITE',
195    'OT_SPILL_ATOMIC',
196    'OT_SPILL_HIST',
197    'OT_SPILL_LDAS',
198    'OT_READONLY_READ',
199    'OT_READONLY_WRITE',
200    'OT_READONLY_ATOMIC',
201    'OT_READONLY_HIST',
202    'OT_READONLY_LDAS',
203    'OT_FLAT_READ',
204    'OT_FLAT_WRITE',
205    'OT_FLAT_ATOMIC',
206    'OT_FLAT_HIST',
207    'OT_FLAT_LDAS',
208    'OT_KERN_READ',
209    'OT_BRANCH',
210
211    # note: Only the OT_BOTH_MEMFENCE seems to be supported in the 1.0F version
212    #       of the compiler.
213    'OT_SHARED_MEMFENCE',
214    'OT_GLOBAL_MEMFENCE',
215    'OT_BOTH_MEMFENCE',
216
217    'OT_BARRIER',
218    'OT_PRINT',
219    'OT_RET',
220    'OT_NOP',
221    'OT_ARG'
222    ]
223
224class MemType(Enum): vals = [
225    'M_U8',
226    'M_U16',
227    'M_U32',
228    'M_U64',
229    'M_S8',
230    'M_S16',
231    'M_S32',
232    'M_S64',
233    'M_F16',
234    'M_F32',
235    'M_F64',
236    ]
237
238class MemOpType(Enum): vals = [
239    'MO_LD',
240    'MO_ST',
241    'MO_LDAS',
242    'MO_LDA',
243    'MO_AAND',
244    'MO_AOR',
245    'MO_AXOR',
246    'MO_ACAS',
247    'MO_AEXCH',
248    'MO_AADD',
249    'MO_ASUB',
250    'MO_AINC',
251    'MO_ADEC',
252    'MO_AMAX',
253    'MO_AMIN',
254    'MO_ANRAND',
255    'MO_ANROR',
256    'MO_ANRXOR',
257    'MO_ANRCAS',
258    'MO_ANREXCH',
259    'MO_ANRADD',
260    'MO_ANRSUB',
261    'MO_ANRINC',
262    'MO_ANRDEC',
263    'MO_ANRMAX',
264    'MO_ANRMIN',
265    'MO_HAND',
266    'MO_HOR',
267    'MO_HXOR',
268    'MO_HCAS',
269    'MO_HEXCH',
270    'MO_HADD',
271    'MO_HSUB',
272    'MO_HINC',
273    'MO_HDEC',
274    'MO_HMAX',
275    'MO_HMIN',
276    'MO_UNDEF'
277    ]
278
279class StorageClassType(Enum): vals = [
280    'SC_SPILL',
281    'SC_GLOBAL',
282    'SC_SHARED',
283    'SC_PRIVATE',
284    'SC_READONLY',
285    'SC_KERNARG',
286    'SC_NONE',
287    ]
288
289class RegisterType(Enum): vals = [
290    'RT_VECTOR',
291    'RT_SCALAR',
292    'RT_CONDITION',
293    'RT_HARDWARE',
294    'RT_NONE',
295    ]
296
297class GenericMemoryOrder(Enum): vals = [
298    'MEMORY_ORDER_NONE',
299    'MEMORY_ORDER_RELAXED',
300    'MEMORY_ORDER_SC_ACQUIRE',
301    'MEMORY_ORDER_SC_RELEASE',
302    'MEMORY_ORDER_SC_ACQUIRE_RELEASE',
303    ]
304
305class GenericMemoryScope(Enum): vals = [
306    'MEMORY_SCOPE_NONE',
307    'MEMORY_SCOPE_WORKITEM',
308    'MEMORY_SCOPE_WAVEFRONT',
309    'MEMORY_SCOPE_WORKGROUP',
310    'MEMORY_SCOPE_DEVICE',
311    'MEMORY_SCOPE_SYSTEM',
312    ]
313