shader.hh revision 11435:0f1b46dde3fa
1/*
2 * Copyright (c) 2011-2015 Advanced Micro Devices, Inc.
3 * All rights reserved.
4 *
5 * For use for simulation and test purposes only
6 *
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions are met:
9 *
10 * 1. Redistributions of source code must retain the above copyright notice,
11 * this list of conditions and the following disclaimer.
12 *
13 * 2. Redistributions in binary form must reproduce the above copyright notice,
14 * this list of conditions and the following disclaimer in the documentation
15 * and/or other materials provided with the distribution.
16 *
17 * 3. Neither the name of the copyright holder nor the names of its contributors
18 * may be used to endorse or promote products derived from this software
19 * without specific prior written permission.
20 *
21 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
22 * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
23 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
24 * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
25 * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
26 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
27 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
28 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
29 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
30 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
31 * POSSIBILITY OF SUCH DAMAGE.
32 *
33 * Author: Steve Reinhardt
34 */
35
36#ifndef __SHADER_HH__
37#define __SHADER_HH__
38
39#include <functional>
40#include <string>
41
42#include "arch/isa.hh"
43#include "arch/isa_traits.hh"
44#include "base/types.hh"
45#include "cpu/simple/atomic.hh"
46#include "cpu/simple/timing.hh"
47#include "cpu/simple_thread.hh"
48#include "cpu/thread_context.hh"
49#include "cpu/thread_state.hh"
50#include "enums/MemOpType.hh"
51#include "enums/MemType.hh"
52#include "gpu-compute/compute_unit.hh"
53#include "gpu-compute/gpu_tlb.hh"
54#include "gpu-compute/lds_state.hh"
55#include "gpu-compute/qstruct.hh"
56#include "mem/page_table.hh"
57#include "mem/port.hh"
58#include "mem/request.hh"
59#include "params/Shader.hh"
60#include "sim/faults.hh"
61#include "sim/process.hh"
62#include "sim/sim_object.hh"
63
64class BaseTLB;
65class GpuDispatcher;
66
67namespace TheISA
68{
69    class GpuTLB;
70}
71
72static const int LDS_SIZE = 65536;
73
74// Class Shader: This describes a single shader instance. Most
75// configurations will only have a single shader.
76
77class Shader : public SimObject
78{
79  protected:
80      // Shader's clock period in terms of number of ticks of curTime,
81      // aka global simulation clock
82      Tick clock;
83
84  public:
85    typedef ShaderParams Params;
86    enum hsail_mode_e {SIMT,VECTOR_SCALAR};
87
88    // clock related functions ; maps to-and-from
89    // Simulation ticks and shader clocks.
90    Tick frequency() const { return SimClock::Frequency / clock; }
91
92    Tick ticks(int numCycles) const { return  (Tick)clock * numCycles; }
93
94    Tick getClock() const { return clock; }
95    Tick curCycle() const { return curTick() / clock; }
96    Tick tickToCycles(Tick val) const { return val / clock;}
97
98
99    SimpleThread *cpuThread;
100    ThreadContext *gpuTc;
101    BaseCPU *cpuPointer;
102
103    class TickEvent : public Event
104    {
105      private:
106        Shader *shader;
107
108      public:
109        TickEvent(Shader*);
110        void process();
111        const char* description() const;
112    };
113
114    TickEvent tickEvent;
115
116    // is this simulation going to be timing mode in the memory?
117    bool timingSim;
118    hsail_mode_e hsail_mode;
119
120    // If set, issue acq packet @ kernel launch
121    int impl_kern_boundary_sync;
122    // If set, generate a separate packet for acquire/release on
123    // ld_acquire/st_release/atomic operations
124    int separate_acquire_release;
125    // If set, fetch returns may be coissued with instructions
126    int coissue_return;
127    // If set, always dump all 64 gprs to trace
128    int trace_vgpr_all;
129    // Number of cu units in the shader
130    int n_cu;
131    // Number of wavefront slots per cu
132    int n_wf;
133    // The size of global memory
134    int globalMemSize;
135
136    /*
137     * Bytes/work-item for call instruction
138     * The number of arguments for an hsail function will
139     * vary. We simply determine the maximum # of arguments
140     * required by any hsail function up front before the
141     * simulation (during parsing of the Brig) and record
142     * that number here.
143     */
144    int funcargs_size;
145
146    // Tracks CU that rr dispatcher should attempt scheduling
147    int nextSchedCu;
148
149    // Size of scheduled add queue
150    uint32_t sa_n;
151
152    // Pointer to value to be increments
153    std::vector<uint32_t*> sa_val;
154    // When to do the increment
155    std::vector<uint64_t> sa_when;
156    // Amount to increment by
157    std::vector<int32_t> sa_x;
158
159    // List of Compute Units (CU's)
160    std::vector<ComputeUnit*> cuList;
161
162    uint64_t tick_cnt;
163    uint64_t box_tick_cnt;
164    uint64_t start_tick_cnt;
165
166    GpuDispatcher *dispatcher;
167
168    Shader(const Params *p);
169    ~Shader();
170    virtual void init();
171
172    // Run shader
173    void exec();
174
175    // Check to see if shader is busy
176    bool busy();
177
178    // Schedule a 32-bit value to be incremented some time in the future
179    void ScheduleAdd(uint32_t *val, Tick when, int x);
180    bool processTimingPacket(PacketPtr pkt);
181
182    void AccessMem(uint64_t address, void *ptr, uint32_t size, int cu_id,
183                   MemCmd cmd, bool suppress_func_errors);
184
185    void ReadMem(uint64_t address, void *ptr, uint32_t sz, int cu_id);
186
187    void ReadMem(uint64_t address, void *ptr, uint32_t sz, int cu_id,
188                 bool suppress_func_errors);
189
190    void WriteMem(uint64_t address, void *ptr, uint32_t sz, int cu_id);
191
192    void WriteMem(uint64_t address, void *ptr, uint32_t sz, int cu_id,
193                  bool suppress_func_errors);
194
195    void doFunctionalAccess(RequestPtr req, MemCmd cmd, void *data,
196                            bool suppress_func_errors, int cu_id);
197
198    void
199    registerCU(int cu_id, ComputeUnit *compute_unit)
200    {
201        cuList[cu_id] = compute_unit;
202    }
203
204    void handshake(GpuDispatcher *dispatcher);
205    bool dispatch_workgroups(NDRange *ndr);
206    Addr mmap(int length);
207    void functionalTLBAccess(PacketPtr pkt, int cu_id, BaseTLB::Mode mode);
208    void updateContext(int cid);
209    void hostWakeUp(BaseCPU *cpu);
210};
211
212#endif // __SHADER_HH__
213