gen.py (11534:7106f550afad) gen.py (11639:2e8d4bd8108d)
1#! /usr/bin/python
2
3#
4# Copyright (c) 2015 Advanced Micro Devices, Inc.
5# All rights reserved.
6#
7# For use for simulation and test purposes only
8#
9# Redistribution and use in source and binary forms, with or without
10# modification, are permitted provided that the following conditions are met:
11#
12# 1. Redistributions of source code must retain the above copyright notice,
13# this list of conditions and the following disclaimer.
14#
15# 2. Redistributions in binary form must reproduce the above copyright notice,
16# this list of conditions and the following disclaimer in the documentation
17# and/or other materials provided with the distribution.
18#
19# 3. Neither the name of the copyright holder nor the names of its contributors
20# may be used to endorse or promote products derived from this software
21# without specific prior written permission.
22#
23# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
24# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
25# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
26# ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
27# LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
28# CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
29# SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
30# INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
31# CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
32# ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
33# POSSIBILITY OF SUCH DAMAGE.
34#
35# Author: Steve Reinhardt
36#
37
38import sys, re
39
40from m5.util import code_formatter
41
42if len(sys.argv) != 4:
43 print "Error: need 3 args (file names)"
44 sys.exit(0)
45
46header_code = code_formatter()
47decoder_code = code_formatter()
48exec_code = code_formatter()
49
50###############
51#
52# Generate file prologs (includes etc.)
53#
54###############
55
56header_code('''
57#include "arch/hsail/insts/decl.hh"
58#include "base/bitfield.hh"
59#include "gpu-compute/hsail_code.hh"
60#include "gpu-compute/wavefront.hh"
61
62namespace HsailISA
63{
64''')
65header_code.indent()
66
67decoder_code('''
68#include "arch/hsail/gpu_decoder.hh"
69#include "arch/hsail/insts/branch.hh"
70#include "arch/hsail/insts/decl.hh"
71#include "arch/hsail/insts/gen_decl.hh"
72#include "arch/hsail/insts/mem.hh"
73#include "arch/hsail/insts/mem_impl.hh"
74#include "gpu-compute/brig_object.hh"
75
76namespace HsailISA
77{
78 std::vector<GPUStaticInst*> Decoder::decodedInsts;
79
80 GPUStaticInst*
81 Decoder::decode(MachInst machInst)
82 {
83 using namespace Brig;
84
85 const BrigInstBase *ib = machInst.brigInstBase;
86 const BrigObject *obj = machInst.brigObj;
87
88 switch(ib->opcode) {
89''')
90decoder_code.indent()
91decoder_code.indent()
92
93exec_code('''
94#include "arch/hsail/insts/gen_decl.hh"
95#include "base/intmath.hh"
96
97namespace HsailISA
98{
99''')
100exec_code.indent()
101
102###############
103#
104# Define code templates for class declarations (for header file)
105#
106###############
107
108# Basic header template for an instruction with no template parameters.
109header_template_nodt = '''
110class $class_name : public $base_class
111{
112 public:
113 typedef $base_class Base;
114
115 $class_name(const Brig::BrigInstBase *ib, const BrigObject *obj)
116 : Base(ib, obj, "$opcode")
117 {
118 }
119
120 void execute(GPUDynInstPtr gpuDynInst);
121};
122
123'''
124
125# Basic header template for an instruction with a single DataType
126# template parameter.
127header_template_1dt = '''
128template<typename DataType>
129class $class_name : public $base_class<DataType>
130{
131 public:
132 typedef $base_class<DataType> Base;
133 typedef typename DataType::CType CType;
134
135 $class_name(const Brig::BrigInstBase *ib, const BrigObject *obj)
136 : Base(ib, obj, "$opcode")
137 {
138 }
139
140 void execute(GPUDynInstPtr gpuDynInst);
141};
142
143'''
144
145header_template_1dt_noexec = '''
146template<typename DataType>
147class $class_name : public $base_class<DataType>
148{
149 public:
150 typedef $base_class<DataType> Base;
151 typedef typename DataType::CType CType;
152
153 $class_name(const Brig::BrigInstBase *ib, const BrigObject *obj)
154 : Base(ib, obj, "$opcode")
155 {
156 }
157};
158
159'''
160
161# Same as header_template_1dt, except the base class has a second
162# template parameter NumSrcOperands to allow a variable number of
163# source operands. Note that since this is implemented with an array,
164# it only works for instructions where all sources are of the same
165# type (like most arithmetics).
166header_template_1dt_varsrcs = '''
167template<typename DataType>
168class $class_name : public $base_class<DataType, $num_srcs>
169{
170 public:
171 typedef $base_class<DataType, $num_srcs> Base;
172 typedef typename DataType::CType CType;
173
174 $class_name(const Brig::BrigInstBase *ib, const BrigObject *obj)
175 : Base(ib, obj, "$opcode")
176 {
177 }
178
179 void execute(GPUDynInstPtr gpuDynInst);
180};
181
182'''
183
184# Header template for instruction with two DataType template
185# parameters, one for the dest and one for the source. This is used
186# by compare and convert.
187header_template_2dt = '''
188template<typename DestDataType, class SrcDataType>
189class $class_name : public $base_class<DestDataType, SrcDataType>
190{
191 public:
192 typedef $base_class<DestDataType, SrcDataType> Base;
193 typedef typename DestDataType::CType DestCType;
194 typedef typename SrcDataType::CType SrcCType;
195
196 $class_name(const Brig::BrigInstBase *ib, const BrigObject *obj)
197 : Base(ib, obj, "$opcode")
198 {
199 }
200
201 void execute(GPUDynInstPtr gpuDynInst);
202};
203
204'''
205
206header_templates = {
207 'ArithInst': header_template_1dt_varsrcs,
208 'CmovInst': header_template_1dt,
209 'ClassInst': header_template_1dt,
210 'ShiftInst': header_template_1dt,
211 'ExtractInsertInst': header_template_1dt,
212 'CmpInst': header_template_2dt,
213 'CvtInst': header_template_2dt,
214 'LdInst': '',
215 'StInst': '',
216 'SpecialInstNoSrc': header_template_nodt,
217 'SpecialInst1Src': header_template_nodt,
218 'SpecialInstNoSrcNoDest': '',
219}
220
221###############
222#
223# Define code templates for exec functions
224#
225###############
226
227# exec function body
228exec_template_nodt_nosrc = '''
229void
230$class_name::execute(GPUDynInstPtr gpuDynInst)
231{
232 Wavefront *w = gpuDynInst->wavefront();
233
234 typedef Base::DestCType DestCType;
235
1#! /usr/bin/python
2
3#
4# Copyright (c) 2015 Advanced Micro Devices, Inc.
5# All rights reserved.
6#
7# For use for simulation and test purposes only
8#
9# Redistribution and use in source and binary forms, with or without
10# modification, are permitted provided that the following conditions are met:
11#
12# 1. Redistributions of source code must retain the above copyright notice,
13# this list of conditions and the following disclaimer.
14#
15# 2. Redistributions in binary form must reproduce the above copyright notice,
16# this list of conditions and the following disclaimer in the documentation
17# and/or other materials provided with the distribution.
18#
19# 3. Neither the name of the copyright holder nor the names of its contributors
20# may be used to endorse or promote products derived from this software
21# without specific prior written permission.
22#
23# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
24# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
25# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
26# ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
27# LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
28# CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
29# SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
30# INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
31# CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
32# ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
33# POSSIBILITY OF SUCH DAMAGE.
34#
35# Author: Steve Reinhardt
36#
37
38import sys, re
39
40from m5.util import code_formatter
41
42if len(sys.argv) != 4:
43 print "Error: need 3 args (file names)"
44 sys.exit(0)
45
46header_code = code_formatter()
47decoder_code = code_formatter()
48exec_code = code_formatter()
49
50###############
51#
52# Generate file prologs (includes etc.)
53#
54###############
55
56header_code('''
57#include "arch/hsail/insts/decl.hh"
58#include "base/bitfield.hh"
59#include "gpu-compute/hsail_code.hh"
60#include "gpu-compute/wavefront.hh"
61
62namespace HsailISA
63{
64''')
65header_code.indent()
66
67decoder_code('''
68#include "arch/hsail/gpu_decoder.hh"
69#include "arch/hsail/insts/branch.hh"
70#include "arch/hsail/insts/decl.hh"
71#include "arch/hsail/insts/gen_decl.hh"
72#include "arch/hsail/insts/mem.hh"
73#include "arch/hsail/insts/mem_impl.hh"
74#include "gpu-compute/brig_object.hh"
75
76namespace HsailISA
77{
78 std::vector<GPUStaticInst*> Decoder::decodedInsts;
79
80 GPUStaticInst*
81 Decoder::decode(MachInst machInst)
82 {
83 using namespace Brig;
84
85 const BrigInstBase *ib = machInst.brigInstBase;
86 const BrigObject *obj = machInst.brigObj;
87
88 switch(ib->opcode) {
89''')
90decoder_code.indent()
91decoder_code.indent()
92
93exec_code('''
94#include "arch/hsail/insts/gen_decl.hh"
95#include "base/intmath.hh"
96
97namespace HsailISA
98{
99''')
100exec_code.indent()
101
102###############
103#
104# Define code templates for class declarations (for header file)
105#
106###############
107
108# Basic header template for an instruction with no template parameters.
109header_template_nodt = '''
110class $class_name : public $base_class
111{
112 public:
113 typedef $base_class Base;
114
115 $class_name(const Brig::BrigInstBase *ib, const BrigObject *obj)
116 : Base(ib, obj, "$opcode")
117 {
118 }
119
120 void execute(GPUDynInstPtr gpuDynInst);
121};
122
123'''
124
125# Basic header template for an instruction with a single DataType
126# template parameter.
127header_template_1dt = '''
128template<typename DataType>
129class $class_name : public $base_class<DataType>
130{
131 public:
132 typedef $base_class<DataType> Base;
133 typedef typename DataType::CType CType;
134
135 $class_name(const Brig::BrigInstBase *ib, const BrigObject *obj)
136 : Base(ib, obj, "$opcode")
137 {
138 }
139
140 void execute(GPUDynInstPtr gpuDynInst);
141};
142
143'''
144
145header_template_1dt_noexec = '''
146template<typename DataType>
147class $class_name : public $base_class<DataType>
148{
149 public:
150 typedef $base_class<DataType> Base;
151 typedef typename DataType::CType CType;
152
153 $class_name(const Brig::BrigInstBase *ib, const BrigObject *obj)
154 : Base(ib, obj, "$opcode")
155 {
156 }
157};
158
159'''
160
161# Same as header_template_1dt, except the base class has a second
162# template parameter NumSrcOperands to allow a variable number of
163# source operands. Note that since this is implemented with an array,
164# it only works for instructions where all sources are of the same
165# type (like most arithmetics).
166header_template_1dt_varsrcs = '''
167template<typename DataType>
168class $class_name : public $base_class<DataType, $num_srcs>
169{
170 public:
171 typedef $base_class<DataType, $num_srcs> Base;
172 typedef typename DataType::CType CType;
173
174 $class_name(const Brig::BrigInstBase *ib, const BrigObject *obj)
175 : Base(ib, obj, "$opcode")
176 {
177 }
178
179 void execute(GPUDynInstPtr gpuDynInst);
180};
181
182'''
183
184# Header template for instruction with two DataType template
185# parameters, one for the dest and one for the source. This is used
186# by compare and convert.
187header_template_2dt = '''
188template<typename DestDataType, class SrcDataType>
189class $class_name : public $base_class<DestDataType, SrcDataType>
190{
191 public:
192 typedef $base_class<DestDataType, SrcDataType> Base;
193 typedef typename DestDataType::CType DestCType;
194 typedef typename SrcDataType::CType SrcCType;
195
196 $class_name(const Brig::BrigInstBase *ib, const BrigObject *obj)
197 : Base(ib, obj, "$opcode")
198 {
199 }
200
201 void execute(GPUDynInstPtr gpuDynInst);
202};
203
204'''
205
206header_templates = {
207 'ArithInst': header_template_1dt_varsrcs,
208 'CmovInst': header_template_1dt,
209 'ClassInst': header_template_1dt,
210 'ShiftInst': header_template_1dt,
211 'ExtractInsertInst': header_template_1dt,
212 'CmpInst': header_template_2dt,
213 'CvtInst': header_template_2dt,
214 'LdInst': '',
215 'StInst': '',
216 'SpecialInstNoSrc': header_template_nodt,
217 'SpecialInst1Src': header_template_nodt,
218 'SpecialInstNoSrcNoDest': '',
219}
220
221###############
222#
223# Define code templates for exec functions
224#
225###############
226
227# exec function body
228exec_template_nodt_nosrc = '''
229void
230$class_name::execute(GPUDynInstPtr gpuDynInst)
231{
232 Wavefront *w = gpuDynInst->wavefront();
233
234 typedef Base::DestCType DestCType;
235
236 const VectorMask &mask = w->get_pred();
236 const VectorMask &mask = w->getPred();
237
238 for (int lane = 0; lane < w->computeUnit->wfSize(); ++lane) {
239 if (mask[lane]) {
240 DestCType dest_val = $expr;
241 this->dest.set(w, lane, dest_val);
242 }
243 }
244}
245
246'''
247
248exec_template_nodt_1src = '''
249void
250$class_name::execute(GPUDynInstPtr gpuDynInst)
251{
252 Wavefront *w = gpuDynInst->wavefront();
253
254 typedef Base::DestCType DestCType;
255 typedef Base::SrcCType SrcCType;
256
237
238 for (int lane = 0; lane < w->computeUnit->wfSize(); ++lane) {
239 if (mask[lane]) {
240 DestCType dest_val = $expr;
241 this->dest.set(w, lane, dest_val);
242 }
243 }
244}
245
246'''
247
248exec_template_nodt_1src = '''
249void
250$class_name::execute(GPUDynInstPtr gpuDynInst)
251{
252 Wavefront *w = gpuDynInst->wavefront();
253
254 typedef Base::DestCType DestCType;
255 typedef Base::SrcCType SrcCType;
256
257 const VectorMask &mask = w->get_pred();
257 const VectorMask &mask = w->getPred();
258
259 for (int lane = 0; lane < w->computeUnit->wfSize(); ++lane) {
260 if (mask[lane]) {
261 SrcCType src_val0 = this->src0.get<SrcCType>(w, lane);
262 DestCType dest_val = $expr;
263
264 this->dest.set(w, lane, dest_val);
265 }
266 }
267}
268
269'''
270
271exec_template_1dt_varsrcs = '''
272template<typename DataType>
273void
274$class_name<DataType>::execute(GPUDynInstPtr gpuDynInst)
275{
276 Wavefront *w = gpuDynInst->wavefront();
277
258
259 for (int lane = 0; lane < w->computeUnit->wfSize(); ++lane) {
260 if (mask[lane]) {
261 SrcCType src_val0 = this->src0.get<SrcCType>(w, lane);
262 DestCType dest_val = $expr;
263
264 this->dest.set(w, lane, dest_val);
265 }
266 }
267}
268
269'''
270
271exec_template_1dt_varsrcs = '''
272template<typename DataType>
273void
274$class_name<DataType>::execute(GPUDynInstPtr gpuDynInst)
275{
276 Wavefront *w = gpuDynInst->wavefront();
277
278 const VectorMask &mask = w->get_pred();
278 const VectorMask &mask = w->getPred();
279
280 for (int lane = 0; lane < w->computeUnit->wfSize(); ++lane) {
281 if (mask[lane]) {
282 CType dest_val;
283 if ($dest_is_src_flag) {
284 dest_val = this->dest.template get<CType>(w, lane);
285 }
286
287 CType src_val[$num_srcs];
288
289 for (int i = 0; i < $num_srcs; ++i) {
290 src_val[i] = this->src[i].template get<CType>(w, lane);
291 }
292
293 dest_val = (CType)($expr);
294
295 this->dest.set(w, lane, dest_val);
296 }
297 }
298}
299
300'''
301
302exec_template_1dt_3srcs = '''
303template<typename DataType>
304void
305$class_name<DataType>::execute(GPUDynInstPtr gpuDynInst)
306{
307 Wavefront *w = gpuDynInst->wavefront();
308
309 typedef typename Base::Src0CType Src0T;
310 typedef typename Base::Src1CType Src1T;
311 typedef typename Base::Src2CType Src2T;
312
279
280 for (int lane = 0; lane < w->computeUnit->wfSize(); ++lane) {
281 if (mask[lane]) {
282 CType dest_val;
283 if ($dest_is_src_flag) {
284 dest_val = this->dest.template get<CType>(w, lane);
285 }
286
287 CType src_val[$num_srcs];
288
289 for (int i = 0; i < $num_srcs; ++i) {
290 src_val[i] = this->src[i].template get<CType>(w, lane);
291 }
292
293 dest_val = (CType)($expr);
294
295 this->dest.set(w, lane, dest_val);
296 }
297 }
298}
299
300'''
301
302exec_template_1dt_3srcs = '''
303template<typename DataType>
304void
305$class_name<DataType>::execute(GPUDynInstPtr gpuDynInst)
306{
307 Wavefront *w = gpuDynInst->wavefront();
308
309 typedef typename Base::Src0CType Src0T;
310 typedef typename Base::Src1CType Src1T;
311 typedef typename Base::Src2CType Src2T;
312
313 const VectorMask &mask = w->get_pred();
313 const VectorMask &mask = w->getPred();
314
315 for (int lane = 0; lane < w->computeUnit->wfSize(); ++lane) {
316 if (mask[lane]) {
317 CType dest_val;
318
319 if ($dest_is_src_flag) {
320 dest_val = this->dest.template get<CType>(w, lane);
321 }
322
323 Src0T src_val0 = this->src0.template get<Src0T>(w, lane);
324 Src1T src_val1 = this->src1.template get<Src1T>(w, lane);
325 Src2T src_val2 = this->src2.template get<Src2T>(w, lane);
326
327 dest_val = $expr;
328
329 this->dest.set(w, lane, dest_val);
330 }
331 }
332}
333
334'''
335
336exec_template_1dt_2src_1dest = '''
337template<typename DataType>
338void
339$class_name<DataType>::execute(GPUDynInstPtr gpuDynInst)
340{
341 Wavefront *w = gpuDynInst->wavefront();
342
343 typedef typename Base::DestCType DestT;
344 typedef CType Src0T;
345 typedef typename Base::Src1CType Src1T;
346
314
315 for (int lane = 0; lane < w->computeUnit->wfSize(); ++lane) {
316 if (mask[lane]) {
317 CType dest_val;
318
319 if ($dest_is_src_flag) {
320 dest_val = this->dest.template get<CType>(w, lane);
321 }
322
323 Src0T src_val0 = this->src0.template get<Src0T>(w, lane);
324 Src1T src_val1 = this->src1.template get<Src1T>(w, lane);
325 Src2T src_val2 = this->src2.template get<Src2T>(w, lane);
326
327 dest_val = $expr;
328
329 this->dest.set(w, lane, dest_val);
330 }
331 }
332}
333
334'''
335
336exec_template_1dt_2src_1dest = '''
337template<typename DataType>
338void
339$class_name<DataType>::execute(GPUDynInstPtr gpuDynInst)
340{
341 Wavefront *w = gpuDynInst->wavefront();
342
343 typedef typename Base::DestCType DestT;
344 typedef CType Src0T;
345 typedef typename Base::Src1CType Src1T;
346
347 const VectorMask &mask = w->get_pred();
347 const VectorMask &mask = w->getPred();
348
349 for (int lane = 0; lane < w->computeUnit->wfSize(); ++lane) {
350 if (mask[lane]) {
351 DestT dest_val;
352 if ($dest_is_src_flag) {
353 dest_val = this->dest.template get<DestT>(w, lane);
354 }
355 Src0T src_val0 = this->src0.template get<Src0T>(w, lane);
356 Src1T src_val1 = this->src1.template get<Src1T>(w, lane);
357
358 dest_val = $expr;
359
360 this->dest.set(w, lane, dest_val);
361 }
362 }
363}
364
365'''
366
367exec_template_shift = '''
368template<typename DataType>
369void
370$class_name<DataType>::execute(GPUDynInstPtr gpuDynInst)
371{
372 Wavefront *w = gpuDynInst->wavefront();
373
348
349 for (int lane = 0; lane < w->computeUnit->wfSize(); ++lane) {
350 if (mask[lane]) {
351 DestT dest_val;
352 if ($dest_is_src_flag) {
353 dest_val = this->dest.template get<DestT>(w, lane);
354 }
355 Src0T src_val0 = this->src0.template get<Src0T>(w, lane);
356 Src1T src_val1 = this->src1.template get<Src1T>(w, lane);
357
358 dest_val = $expr;
359
360 this->dest.set(w, lane, dest_val);
361 }
362 }
363}
364
365'''
366
367exec_template_shift = '''
368template<typename DataType>
369void
370$class_name<DataType>::execute(GPUDynInstPtr gpuDynInst)
371{
372 Wavefront *w = gpuDynInst->wavefront();
373
374 const VectorMask &mask = w->get_pred();
374 const VectorMask &mask = w->getPred();
375 for (int lane = 0; lane < w->computeUnit->wfSize(); ++lane) {
376 if (mask[lane]) {
377 CType dest_val;
378
379 if ($dest_is_src_flag) {
380 dest_val = this->dest.template get<CType>(w, lane);
381 }
382
383 CType src_val0 = this->src0.template get<CType>(w, lane);
384 uint32_t src_val1 = this->src1.template get<uint32_t>(w, lane);
385
386 dest_val = $expr;
387
388 this->dest.set(w, lane, dest_val);
389 }
390 }
391}
392
393'''
394
395exec_template_2dt = '''
396template<typename DestDataType, class SrcDataType>
397void
398$class_name<DestDataType, SrcDataType>::execute(GPUDynInstPtr gpuDynInst)
399{
400 Wavefront *w = gpuDynInst->wavefront();
401
375 for (int lane = 0; lane < w->computeUnit->wfSize(); ++lane) {
376 if (mask[lane]) {
377 CType dest_val;
378
379 if ($dest_is_src_flag) {
380 dest_val = this->dest.template get<CType>(w, lane);
381 }
382
383 CType src_val0 = this->src0.template get<CType>(w, lane);
384 uint32_t src_val1 = this->src1.template get<uint32_t>(w, lane);
385
386 dest_val = $expr;
387
388 this->dest.set(w, lane, dest_val);
389 }
390 }
391}
392
393'''
394
395exec_template_2dt = '''
396template<typename DestDataType, class SrcDataType>
397void
398$class_name<DestDataType, SrcDataType>::execute(GPUDynInstPtr gpuDynInst)
399{
400 Wavefront *w = gpuDynInst->wavefront();
401
402 const VectorMask &mask = w->get_pred();
402 const VectorMask &mask = w->getPred();
403
404 for (int lane = 0; lane < w->computeUnit->wfSize(); ++lane) {
405 if (mask[lane]) {
406 DestCType dest_val;
407 SrcCType src_val[$num_srcs];
408
409 for (int i = 0; i < $num_srcs; ++i) {
410 src_val[i] = this->src[i].template get<SrcCType>(w, lane);
411 }
412
413 dest_val = $expr;
414
415 this->dest.set(w, lane, dest_val);
416 }
417 }
418}
419
420'''
421
422exec_templates = {
423 'ArithInst': exec_template_1dt_varsrcs,
424 'CmovInst': exec_template_1dt_3srcs,
425 'ExtractInsertInst': exec_template_1dt_3srcs,
426 'ClassInst': exec_template_1dt_2src_1dest,
427 'CmpInst': exec_template_2dt,
428 'CvtInst': exec_template_2dt,
429 'LdInst': '',
430 'StInst': '',
431 'SpecialInstNoSrc': exec_template_nodt_nosrc,
432 'SpecialInst1Src': exec_template_nodt_1src,
433 'SpecialInstNoSrcNoDest': '',
434}
435
436###############
437#
438# Define code templates for the decoder cases
439#
440###############
441
442# decode template for nodt-opcode case
443decode_nodt_template = '''
444 case BRIG_OPCODE_$brig_opcode_upper: return $constructor(ib, obj);'''
445
446decode_case_prolog_class_inst = '''
447 case BRIG_OPCODE_$brig_opcode_upper:
448 {
449 //const BrigOperandBase *baseOp = obj->getOperand(ib->operands[1]);
450 BrigType16_t type = ((BrigInstSourceType*)ib)->sourceType;
451 //switch (baseOp->kind) {
452 // case BRIG_OPERAND_REG:
453 // type = ((const BrigOperandReg*)baseOp)->type;
454 // break;
455 // case BRIG_OPERAND_IMMED:
456 // type = ((const BrigOperandImmed*)baseOp)->type;
457 // break;
458 // default:
459 // fatal("CLASS unrecognized kind of operand %d\\n",
460 // baseOp->kind);
461 //}
462 switch (type) {'''
463
464# common prolog for 1dt- or 2dt-opcode case: switch on data type
465decode_case_prolog = '''
466 case BRIG_OPCODE_$brig_opcode_upper:
467 {
468 switch (ib->type) {'''
469
470# single-level decode case entry (for 1dt opcodes)
471decode_case_entry = \
472' case BRIG_TYPE_$type_name: return $constructor(ib, obj);'
473
474decode_store_prolog = \
475' case BRIG_TYPE_$type_name: {'
476
477decode_store_case_epilog = '''
478 }'''
479
480decode_store_case_entry = \
481' return $constructor(ib, obj);'
482
483# common epilog for type switch
484decode_case_epilog = '''
485 default: fatal("$brig_opcode_upper: unrecognized type %d\\n",
486 ib->type);
487 }
488 }
489 break;'''
490
491# Additional templates for nested decode on a second type field (for
492# compare and convert). These are used in place of the
493# decode_case_entry template to create a second-level switch on on the
494# second type field inside each case of the first-level type switch.
495# Because the name and location of the second type can vary, the Brig
496# instruction type must be provided in $brig_type, and the name of the
497# second type field must be provided in $type_field.
498decode_case2_prolog = '''
499 case BRIG_TYPE_$type_name:
500 switch (((Brig$brig_type*)ib)->$type2_field) {'''
501
502decode_case2_entry = \
503' case BRIG_TYPE_$type2_name: return $constructor(ib, obj);'
504
505decode_case2_epilog = '''
506 default: fatal("$brig_opcode_upper: unrecognized $type2_field %d\\n",
507 ((Brig$brig_type*)ib)->$type2_field);
508 }
509 break;'''
510
511# Figure out how many source operands an expr needs by looking for the
512# highest-numbered srcN value referenced. Since sources are numbered
513# starting at 0, the return value is N+1.
514def num_src_operands(expr):
515 if expr.find('src2') != -1:
516 return 3
517 elif expr.find('src1') != -1:
518 return 2
519 elif expr.find('src0') != -1:
520 return 1
521 else:
522 return 0
523
524###############
525#
526# Define final code generation methods
527#
528# The gen_nodt, and gen_1dt, and gen_2dt methods are the interface for
529# generating actual instructions.
530#
531###############
532
533# Generate class declaration, exec function, and decode switch case
534# for an brig_opcode with a single-level type switch. The 'types'
535# parameter is a list or tuple of types for which the instruction
536# should be instantiated.
537def gen(brig_opcode, types=None, expr=None, base_class='ArithInst',
538 type2_info=None, constructor_prefix='new ', is_store=False):
539 brig_opcode_upper = brig_opcode.upper()
540 class_name = brig_opcode
541 opcode = class_name.lower()
542
543 if base_class == 'ArithInst':
544 # note that expr must be provided with ArithInst so we can
545 # derive num_srcs for the template
546 assert expr
547
548 if expr:
549 # Derive several bits of info from expr. If expr is not used,
550 # this info will be irrelevant.
551 num_srcs = num_src_operands(expr)
552 # if the RHS expression includes 'dest', then we're doing an RMW
553 # on the reg and we need to treat it like a source
554 dest_is_src = expr.find('dest') != -1
555 dest_is_src_flag = str(dest_is_src).lower() # for C++
556 if base_class in ['ShiftInst']:
557 expr = re.sub(r'\bsrc(\d)\b', r'src_val\1', expr)
558 elif base_class in ['ArithInst', 'CmpInst', 'CvtInst']:
559 expr = re.sub(r'\bsrc(\d)\b', r'src_val[\1]', expr)
560 else:
561 expr = re.sub(r'\bsrc(\d)\b', r'src_val\1', expr)
562 expr = re.sub(r'\bdest\b', r'dest_val', expr)
563
564 # Strip template arguments off of base class before looking up
565 # appropriate templates
566 base_class_base = re.sub(r'<.*>$', '', base_class)
567 header_code(header_templates[base_class_base])
568
569 if base_class.startswith('SpecialInst'):
570 exec_code(exec_templates[base_class_base])
571 elif base_class.startswith('ShiftInst'):
572 header_code(exec_template_shift)
573 else:
574 header_code(exec_templates[base_class_base])
575
576 if not types or isinstance(types, str):
577 # Just a single type
578 constructor = constructor_prefix + class_name
579 decoder_code(decode_nodt_template)
580 else:
581 # multiple types, need at least one level of decode
582 if brig_opcode == 'Class':
583 decoder_code(decode_case_prolog_class_inst)
584 else:
585 decoder_code(decode_case_prolog)
586 if not type2_info:
587 if not is_store:
588 # single list of types, to basic one-level decode
589 for type_name in types:
590 full_class_name = '%s<%s>' % (class_name, type_name.upper())
591 constructor = constructor_prefix + full_class_name
592 decoder_code(decode_case_entry)
593 else:
594 # single list of types, to basic one-level decode
595 for type_name in types:
596 decoder_code(decode_store_prolog)
597 type_size = int(re.findall(r'[0-9]+', type_name)[0])
598 src_size = 32
599 type_type = type_name[0]
600 full_class_name = '%s<%s,%s>' % (class_name, \
601 type_name.upper(), \
602 '%s%d' % \
603 (type_type.upper(), \
604 type_size))
605 constructor = constructor_prefix + full_class_name
606 decoder_code(decode_store_case_entry)
607 decoder_code(decode_store_case_epilog)
608 else:
609 # need secondary type switch (convert, compare)
610 # unpack extra info on second switch
611 (type2_field, types2) = type2_info
612 brig_type = 'Inst%s' % brig_opcode
613 for type_name in types:
614 decoder_code(decode_case2_prolog)
615 fmt = '%s<%s,%%s>' % (class_name, type_name.upper())
616 for type2_name in types2:
617 full_class_name = fmt % type2_name.upper()
618 constructor = constructor_prefix + full_class_name
619 decoder_code(decode_case2_entry)
620
621 decoder_code(decode_case2_epilog)
622
623 decoder_code(decode_case_epilog)
624
625###############
626#
627# Generate instructions
628#
629###############
630
631# handy abbreviations for common sets of types
632
633# arithmetic ops are typically defined only on 32- and 64-bit sizes
634arith_int_types = ('S32', 'U32', 'S64', 'U64')
635arith_float_types = ('F32', 'F64')
636arith_types = arith_int_types + arith_float_types
637
638bit_types = ('B1', 'B32', 'B64')
639
640all_int_types = ('S8', 'U8', 'S16', 'U16') + arith_int_types
641
642# I think you might be able to do 'f16' memory ops too, but we'll
643# ignore them for now.
644mem_types = all_int_types + arith_float_types
645mem_atom_types = all_int_types + ('B32', 'B64')
646
647##### Arithmetic & logical operations
648gen('Add', arith_types, 'src0 + src1')
649gen('Sub', arith_types, 'src0 - src1')
650gen('Mul', arith_types, 'src0 * src1')
651gen('Div', arith_types, 'src0 / src1')
652gen('Min', arith_types, 'std::min(src0, src1)')
653gen('Max', arith_types, 'std::max(src0, src1)')
654gen('Gcnmin', arith_types, 'std::min(src0, src1)')
655
656gen('CopySign', arith_float_types,
657 'src1 < 0 ? -std::abs(src0) : std::abs(src0)')
658gen('Sqrt', arith_float_types, 'sqrt(src0)')
659gen('Floor', arith_float_types, 'floor(src0)')
660
661# "fast" sqrt... same as slow for us
662gen('Nsqrt', arith_float_types, 'sqrt(src0)')
663gen('Nrsqrt', arith_float_types, '1.0/sqrt(src0)')
664gen('Nrcp', arith_float_types, '1.0/src0')
665gen('Fract', arith_float_types,
666 '(src0 >= 0.0)?(src0-floor(src0)):(floor(src0)-src0)')
667
668gen('Ncos', arith_float_types, 'cos(src0)');
669gen('Nsin', arith_float_types, 'sin(src0)');
670
671gen('And', bit_types, 'src0 & src1')
672gen('Or', bit_types, 'src0 | src1')
673gen('Xor', bit_types, 'src0 ^ src1')
674
675gen('Bitselect', bit_types, '(src1 & src0) | (src2 & ~src0)')
676gen('Firstbit',bit_types, 'firstbit(src0)')
677gen('Popcount', ('B32', 'B64'), '__builtin_popcount(src0)')
678
679gen('Shl', arith_int_types, 'src0 << (unsigned)src1', 'ShiftInst')
680gen('Shr', arith_int_types, 'src0 >> (unsigned)src1', 'ShiftInst')
681
682# gen('Mul_hi', types=('s32','u32', '??'))
683# gen('Mul24', types=('s32','u32', '??'))
684gen('Rem', arith_int_types, 'src0 - ((src0 / src1) * src1)')
685
686gen('Abs', arith_types, 'std::abs(src0)')
687gen('Neg', arith_types, '-src0')
688
689gen('Mov', bit_types, 'src0')
690gen('Not', bit_types, 'heynot(src0)')
691
692# mad and fma differ only in rounding behavior, which we don't emulate
693# also there's an integer form of mad, but not of fma
694gen('Mad', arith_types, 'src0 * src1 + src2')
695gen('Fma', arith_float_types, 'src0 * src1 + src2')
696
697#native floating point operations
698gen('Nfma', arith_float_types, 'src0 * src1 + src2')
699
700gen('Cmov', bit_types, 'src0 ? src1 : src2', 'CmovInst')
701gen('BitAlign', bit_types, '(src0 << src2)|(src1 >> (32 - src2))')
702gen('ByteAlign', bit_types, '(src0 << 8 * src2)|(src1 >> (32 - 8 * src2))')
703
704# see base/bitfield.hh
705gen('BitExtract', arith_int_types, 'bits(src0, src1, src1 + src2 - 1)',
706 'ExtractInsertInst')
707
708gen('BitInsert', arith_int_types, 'insertBits(dest, src1, src2, src0)',
709 'ExtractInsertInst')
710
711##### Compare
712gen('Cmp', ('B1', 'S32', 'U32', 'F32'), 'compare(src0, src1, this->cmpOp)',
713 'CmpInst', ('sourceType', arith_types + bit_types))
714gen('Class', arith_float_types, 'fpclassify(src0,src1)','ClassInst')
715
716##### Conversion
717
718# Conversion operations are only defined on B1, not B32 or B64
719cvt_types = ('B1',) + mem_types
720
721gen('Cvt', cvt_types, 'src0', 'CvtInst', ('sourceType', cvt_types))
722
723
724##### Load & Store
725gen('Lda', mem_types, base_class = 'LdInst', constructor_prefix='decode')
726gen('Ld', mem_types, base_class = 'LdInst', constructor_prefix='decode')
727gen('St', mem_types, base_class = 'StInst', constructor_prefix='decode',
728 is_store=True)
729gen('Atomic', mem_atom_types, base_class='StInst', constructor_prefix='decode')
730gen('AtomicNoRet', mem_atom_types, base_class='StInst',
731 constructor_prefix='decode')
732
733gen('Cbr', base_class = 'LdInst', constructor_prefix='decode')
734gen('Br', base_class = 'LdInst', constructor_prefix='decode')
735
736##### Special operations
737def gen_special(brig_opcode, expr, dest_type='U32'):
738 num_srcs = num_src_operands(expr)
739 if num_srcs == 0:
740 base_class = 'SpecialInstNoSrc<%s>' % dest_type
741 elif num_srcs == 1:
742 base_class = 'SpecialInst1Src<%s>' % dest_type
743 else:
744 assert false
745
746 gen(brig_opcode, None, expr, base_class)
747
403
404 for (int lane = 0; lane < w->computeUnit->wfSize(); ++lane) {
405 if (mask[lane]) {
406 DestCType dest_val;
407 SrcCType src_val[$num_srcs];
408
409 for (int i = 0; i < $num_srcs; ++i) {
410 src_val[i] = this->src[i].template get<SrcCType>(w, lane);
411 }
412
413 dest_val = $expr;
414
415 this->dest.set(w, lane, dest_val);
416 }
417 }
418}
419
420'''
421
422exec_templates = {
423 'ArithInst': exec_template_1dt_varsrcs,
424 'CmovInst': exec_template_1dt_3srcs,
425 'ExtractInsertInst': exec_template_1dt_3srcs,
426 'ClassInst': exec_template_1dt_2src_1dest,
427 'CmpInst': exec_template_2dt,
428 'CvtInst': exec_template_2dt,
429 'LdInst': '',
430 'StInst': '',
431 'SpecialInstNoSrc': exec_template_nodt_nosrc,
432 'SpecialInst1Src': exec_template_nodt_1src,
433 'SpecialInstNoSrcNoDest': '',
434}
435
436###############
437#
438# Define code templates for the decoder cases
439#
440###############
441
442# decode template for nodt-opcode case
443decode_nodt_template = '''
444 case BRIG_OPCODE_$brig_opcode_upper: return $constructor(ib, obj);'''
445
446decode_case_prolog_class_inst = '''
447 case BRIG_OPCODE_$brig_opcode_upper:
448 {
449 //const BrigOperandBase *baseOp = obj->getOperand(ib->operands[1]);
450 BrigType16_t type = ((BrigInstSourceType*)ib)->sourceType;
451 //switch (baseOp->kind) {
452 // case BRIG_OPERAND_REG:
453 // type = ((const BrigOperandReg*)baseOp)->type;
454 // break;
455 // case BRIG_OPERAND_IMMED:
456 // type = ((const BrigOperandImmed*)baseOp)->type;
457 // break;
458 // default:
459 // fatal("CLASS unrecognized kind of operand %d\\n",
460 // baseOp->kind);
461 //}
462 switch (type) {'''
463
464# common prolog for 1dt- or 2dt-opcode case: switch on data type
465decode_case_prolog = '''
466 case BRIG_OPCODE_$brig_opcode_upper:
467 {
468 switch (ib->type) {'''
469
470# single-level decode case entry (for 1dt opcodes)
471decode_case_entry = \
472' case BRIG_TYPE_$type_name: return $constructor(ib, obj);'
473
474decode_store_prolog = \
475' case BRIG_TYPE_$type_name: {'
476
477decode_store_case_epilog = '''
478 }'''
479
480decode_store_case_entry = \
481' return $constructor(ib, obj);'
482
483# common epilog for type switch
484decode_case_epilog = '''
485 default: fatal("$brig_opcode_upper: unrecognized type %d\\n",
486 ib->type);
487 }
488 }
489 break;'''
490
491# Additional templates for nested decode on a second type field (for
492# compare and convert). These are used in place of the
493# decode_case_entry template to create a second-level switch on on the
494# second type field inside each case of the first-level type switch.
495# Because the name and location of the second type can vary, the Brig
496# instruction type must be provided in $brig_type, and the name of the
497# second type field must be provided in $type_field.
498decode_case2_prolog = '''
499 case BRIG_TYPE_$type_name:
500 switch (((Brig$brig_type*)ib)->$type2_field) {'''
501
502decode_case2_entry = \
503' case BRIG_TYPE_$type2_name: return $constructor(ib, obj);'
504
505decode_case2_epilog = '''
506 default: fatal("$brig_opcode_upper: unrecognized $type2_field %d\\n",
507 ((Brig$brig_type*)ib)->$type2_field);
508 }
509 break;'''
510
511# Figure out how many source operands an expr needs by looking for the
512# highest-numbered srcN value referenced. Since sources are numbered
513# starting at 0, the return value is N+1.
514def num_src_operands(expr):
515 if expr.find('src2') != -1:
516 return 3
517 elif expr.find('src1') != -1:
518 return 2
519 elif expr.find('src0') != -1:
520 return 1
521 else:
522 return 0
523
524###############
525#
526# Define final code generation methods
527#
528# The gen_nodt, and gen_1dt, and gen_2dt methods are the interface for
529# generating actual instructions.
530#
531###############
532
533# Generate class declaration, exec function, and decode switch case
534# for an brig_opcode with a single-level type switch. The 'types'
535# parameter is a list or tuple of types for which the instruction
536# should be instantiated.
537def gen(brig_opcode, types=None, expr=None, base_class='ArithInst',
538 type2_info=None, constructor_prefix='new ', is_store=False):
539 brig_opcode_upper = brig_opcode.upper()
540 class_name = brig_opcode
541 opcode = class_name.lower()
542
543 if base_class == 'ArithInst':
544 # note that expr must be provided with ArithInst so we can
545 # derive num_srcs for the template
546 assert expr
547
548 if expr:
549 # Derive several bits of info from expr. If expr is not used,
550 # this info will be irrelevant.
551 num_srcs = num_src_operands(expr)
552 # if the RHS expression includes 'dest', then we're doing an RMW
553 # on the reg and we need to treat it like a source
554 dest_is_src = expr.find('dest') != -1
555 dest_is_src_flag = str(dest_is_src).lower() # for C++
556 if base_class in ['ShiftInst']:
557 expr = re.sub(r'\bsrc(\d)\b', r'src_val\1', expr)
558 elif base_class in ['ArithInst', 'CmpInst', 'CvtInst']:
559 expr = re.sub(r'\bsrc(\d)\b', r'src_val[\1]', expr)
560 else:
561 expr = re.sub(r'\bsrc(\d)\b', r'src_val\1', expr)
562 expr = re.sub(r'\bdest\b', r'dest_val', expr)
563
564 # Strip template arguments off of base class before looking up
565 # appropriate templates
566 base_class_base = re.sub(r'<.*>$', '', base_class)
567 header_code(header_templates[base_class_base])
568
569 if base_class.startswith('SpecialInst'):
570 exec_code(exec_templates[base_class_base])
571 elif base_class.startswith('ShiftInst'):
572 header_code(exec_template_shift)
573 else:
574 header_code(exec_templates[base_class_base])
575
576 if not types or isinstance(types, str):
577 # Just a single type
578 constructor = constructor_prefix + class_name
579 decoder_code(decode_nodt_template)
580 else:
581 # multiple types, need at least one level of decode
582 if brig_opcode == 'Class':
583 decoder_code(decode_case_prolog_class_inst)
584 else:
585 decoder_code(decode_case_prolog)
586 if not type2_info:
587 if not is_store:
588 # single list of types, to basic one-level decode
589 for type_name in types:
590 full_class_name = '%s<%s>' % (class_name, type_name.upper())
591 constructor = constructor_prefix + full_class_name
592 decoder_code(decode_case_entry)
593 else:
594 # single list of types, to basic one-level decode
595 for type_name in types:
596 decoder_code(decode_store_prolog)
597 type_size = int(re.findall(r'[0-9]+', type_name)[0])
598 src_size = 32
599 type_type = type_name[0]
600 full_class_name = '%s<%s,%s>' % (class_name, \
601 type_name.upper(), \
602 '%s%d' % \
603 (type_type.upper(), \
604 type_size))
605 constructor = constructor_prefix + full_class_name
606 decoder_code(decode_store_case_entry)
607 decoder_code(decode_store_case_epilog)
608 else:
609 # need secondary type switch (convert, compare)
610 # unpack extra info on second switch
611 (type2_field, types2) = type2_info
612 brig_type = 'Inst%s' % brig_opcode
613 for type_name in types:
614 decoder_code(decode_case2_prolog)
615 fmt = '%s<%s,%%s>' % (class_name, type_name.upper())
616 for type2_name in types2:
617 full_class_name = fmt % type2_name.upper()
618 constructor = constructor_prefix + full_class_name
619 decoder_code(decode_case2_entry)
620
621 decoder_code(decode_case2_epilog)
622
623 decoder_code(decode_case_epilog)
624
625###############
626#
627# Generate instructions
628#
629###############
630
631# handy abbreviations for common sets of types
632
633# arithmetic ops are typically defined only on 32- and 64-bit sizes
634arith_int_types = ('S32', 'U32', 'S64', 'U64')
635arith_float_types = ('F32', 'F64')
636arith_types = arith_int_types + arith_float_types
637
638bit_types = ('B1', 'B32', 'B64')
639
640all_int_types = ('S8', 'U8', 'S16', 'U16') + arith_int_types
641
642# I think you might be able to do 'f16' memory ops too, but we'll
643# ignore them for now.
644mem_types = all_int_types + arith_float_types
645mem_atom_types = all_int_types + ('B32', 'B64')
646
647##### Arithmetic & logical operations
648gen('Add', arith_types, 'src0 + src1')
649gen('Sub', arith_types, 'src0 - src1')
650gen('Mul', arith_types, 'src0 * src1')
651gen('Div', arith_types, 'src0 / src1')
652gen('Min', arith_types, 'std::min(src0, src1)')
653gen('Max', arith_types, 'std::max(src0, src1)')
654gen('Gcnmin', arith_types, 'std::min(src0, src1)')
655
656gen('CopySign', arith_float_types,
657 'src1 < 0 ? -std::abs(src0) : std::abs(src0)')
658gen('Sqrt', arith_float_types, 'sqrt(src0)')
659gen('Floor', arith_float_types, 'floor(src0)')
660
661# "fast" sqrt... same as slow for us
662gen('Nsqrt', arith_float_types, 'sqrt(src0)')
663gen('Nrsqrt', arith_float_types, '1.0/sqrt(src0)')
664gen('Nrcp', arith_float_types, '1.0/src0')
665gen('Fract', arith_float_types,
666 '(src0 >= 0.0)?(src0-floor(src0)):(floor(src0)-src0)')
667
668gen('Ncos', arith_float_types, 'cos(src0)');
669gen('Nsin', arith_float_types, 'sin(src0)');
670
671gen('And', bit_types, 'src0 & src1')
672gen('Or', bit_types, 'src0 | src1')
673gen('Xor', bit_types, 'src0 ^ src1')
674
675gen('Bitselect', bit_types, '(src1 & src0) | (src2 & ~src0)')
676gen('Firstbit',bit_types, 'firstbit(src0)')
677gen('Popcount', ('B32', 'B64'), '__builtin_popcount(src0)')
678
679gen('Shl', arith_int_types, 'src0 << (unsigned)src1', 'ShiftInst')
680gen('Shr', arith_int_types, 'src0 >> (unsigned)src1', 'ShiftInst')
681
682# gen('Mul_hi', types=('s32','u32', '??'))
683# gen('Mul24', types=('s32','u32', '??'))
684gen('Rem', arith_int_types, 'src0 - ((src0 / src1) * src1)')
685
686gen('Abs', arith_types, 'std::abs(src0)')
687gen('Neg', arith_types, '-src0')
688
689gen('Mov', bit_types, 'src0')
690gen('Not', bit_types, 'heynot(src0)')
691
692# mad and fma differ only in rounding behavior, which we don't emulate
693# also there's an integer form of mad, but not of fma
694gen('Mad', arith_types, 'src0 * src1 + src2')
695gen('Fma', arith_float_types, 'src0 * src1 + src2')
696
697#native floating point operations
698gen('Nfma', arith_float_types, 'src0 * src1 + src2')
699
700gen('Cmov', bit_types, 'src0 ? src1 : src2', 'CmovInst')
701gen('BitAlign', bit_types, '(src0 << src2)|(src1 >> (32 - src2))')
702gen('ByteAlign', bit_types, '(src0 << 8 * src2)|(src1 >> (32 - 8 * src2))')
703
704# see base/bitfield.hh
705gen('BitExtract', arith_int_types, 'bits(src0, src1, src1 + src2 - 1)',
706 'ExtractInsertInst')
707
708gen('BitInsert', arith_int_types, 'insertBits(dest, src1, src2, src0)',
709 'ExtractInsertInst')
710
711##### Compare
712gen('Cmp', ('B1', 'S32', 'U32', 'F32'), 'compare(src0, src1, this->cmpOp)',
713 'CmpInst', ('sourceType', arith_types + bit_types))
714gen('Class', arith_float_types, 'fpclassify(src0,src1)','ClassInst')
715
716##### Conversion
717
718# Conversion operations are only defined on B1, not B32 or B64
719cvt_types = ('B1',) + mem_types
720
721gen('Cvt', cvt_types, 'src0', 'CvtInst', ('sourceType', cvt_types))
722
723
724##### Load & Store
725gen('Lda', mem_types, base_class = 'LdInst', constructor_prefix='decode')
726gen('Ld', mem_types, base_class = 'LdInst', constructor_prefix='decode')
727gen('St', mem_types, base_class = 'StInst', constructor_prefix='decode',
728 is_store=True)
729gen('Atomic', mem_atom_types, base_class='StInst', constructor_prefix='decode')
730gen('AtomicNoRet', mem_atom_types, base_class='StInst',
731 constructor_prefix='decode')
732
733gen('Cbr', base_class = 'LdInst', constructor_prefix='decode')
734gen('Br', base_class = 'LdInst', constructor_prefix='decode')
735
736##### Special operations
737def gen_special(brig_opcode, expr, dest_type='U32'):
738 num_srcs = num_src_operands(expr)
739 if num_srcs == 0:
740 base_class = 'SpecialInstNoSrc<%s>' % dest_type
741 elif num_srcs == 1:
742 base_class = 'SpecialInst1Src<%s>' % dest_type
743 else:
744 assert false
745
746 gen(brig_opcode, None, expr, base_class)
747
748gen_special('WorkItemId', 'w->workitemid[src0][lane]')
748gen_special('WorkItemId', 'w->workItemId[src0][lane]')
749gen_special('WorkItemAbsId',
749gen_special('WorkItemAbsId',
750 'w->workitemid[src0][lane] + (w->workgroupid[src0] * w->workgroupsz[src0])')
751gen_special('WorkGroupId', 'w->workgroupid[src0]')
752gen_special('WorkGroupSize', 'w->workgroupsz[src0]')
753gen_special('CurrentWorkGroupSize', 'w->workgroupsz[src0]')
754gen_special('GridSize', 'w->gridsz[src0]')
750 'w->workItemId[src0][lane] + (w->workGroupId[src0] * w->workGroupSz[src0])')
751gen_special('WorkGroupId', 'w->workGroupId[src0]')
752gen_special('WorkGroupSize', 'w->workGroupSz[src0]')
753gen_special('CurrentWorkGroupSize', 'w->workGroupSz[src0]')
754gen_special('GridSize', 'w->gridSz[src0]')
755gen_special('GridGroups',
755gen_special('GridGroups',
756 'divCeil(w->gridsz[src0],w->workgroupsz[src0])')
756 'divCeil(w->gridSz[src0],w->workGroupSz[src0])')
757gen_special('LaneId', 'lane')
757gen_special('LaneId', 'lane')
758gen_special('WaveId', 'w->dynwaveid')
758gen_special('WaveId', 'w->dynWaveId')
759gen_special('Clock', 'w->computeUnit->shader->tick_cnt', 'U64')
760
761# gen_special('CU'', ')
762
763gen('Ret', base_class='SpecialInstNoSrcNoDest')
764gen('Barrier', base_class='SpecialInstNoSrcNoDest')
765gen('MemFence', base_class='SpecialInstNoSrcNoDest')
766
767# Map magic instructions to the BrigSyscall opcode
768# Magic instructions are defined in magic.hh
769#
770# In the future, real HSA kernel system calls can be implemented and coexist
771# with magic instructions.
772gen('Call', base_class='SpecialInstNoSrcNoDest')
773
774###############
775#
776# Generate file epilogs
777#
778###############
779header_code.dedent()
780header_code('''
781} // namespace HsailISA
782''')
783
784# close off main decode switch
785decoder_code.dedent()
786decoder_code.dedent()
787decoder_code('''
788 default: fatal("unrecognized Brig opcode %d\\n", ib->opcode);
789 } // end switch(ib->opcode)
790 } // end decode()
791} // namespace HsailISA
792''')
793
794exec_code.dedent()
795exec_code('''
796} // namespace HsailISA
797''')
798
799###############
800#
801# Output accumulated code to files
802#
803###############
804header_code.write(sys.argv[1])
805decoder_code.write(sys.argv[2])
806exec_code.write(sys.argv[3])
759gen_special('Clock', 'w->computeUnit->shader->tick_cnt', 'U64')
760
761# gen_special('CU'', ')
762
763gen('Ret', base_class='SpecialInstNoSrcNoDest')
764gen('Barrier', base_class='SpecialInstNoSrcNoDest')
765gen('MemFence', base_class='SpecialInstNoSrcNoDest')
766
767# Map magic instructions to the BrigSyscall opcode
768# Magic instructions are defined in magic.hh
769#
770# In the future, real HSA kernel system calls can be implemented and coexist
771# with magic instructions.
772gen('Call', base_class='SpecialInstNoSrcNoDest')
773
774###############
775#
776# Generate file epilogs
777#
778###############
779header_code.dedent()
780header_code('''
781} // namespace HsailISA
782''')
783
784# close off main decode switch
785decoder_code.dedent()
786decoder_code.dedent()
787decoder_code('''
788 default: fatal("unrecognized Brig opcode %d\\n", ib->opcode);
789 } // end switch(ib->opcode)
790 } // end decode()
791} // namespace HsailISA
792''')
793
794exec_code.dedent()
795exec_code('''
796} // namespace HsailISA
797''')
798
799###############
800#
801# Output accumulated code to files
802#
803###############
804header_code.write(sys.argv[1])
805decoder_code.write(sys.argv[2])
806exec_code.write(sys.argv[3])