mediaop.isa (12707:7819f067a128) mediaop.isa (13611:c8b7847b4171)
1// Copyright (c) 2009 The Regents of The University of Michigan
2// Copyright (c) 2015 Advanced Micro Devices, Inc.
3//
4// All rights reserved.
5//
6// Redistribution and use in source and binary forms, with or without
7// modification, are permitted provided that the following conditions are
8// met: redistributions of source code must retain the above copyright
9// notice, this list of conditions and the following disclaimer;
10// redistributions in binary form must reproduce the above copyright
11// notice, this list of conditions and the following disclaimer in the
12// documentation and/or other materials provided with the distribution;
13// neither the name of the copyright holders nor the names of its
14// contributors may be used to endorse or promote products derived from
15// this software without specific prior written permission.
16//
17// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
18// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
19// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
20// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
21// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
22// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
23// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
24// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
25// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
26// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
27// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
28//
29// Authors: Gabe Black
30
31def template MediaOpExecute {{
32 Fault %(class_name)s::execute(ExecContext *xc,
33 Trace::InstRecord *traceData) const
34 {
35 Fault fault = NoFault;
36
37 %(op_decl)s;
38 %(op_rd)s;
39
40 %(code)s;
41
42 //Write the resulting state to the execution context
43 if(fault == NoFault)
44 {
45 %(op_wb)s;
46 }
47 return fault;
48 }
49}};
50
51def template MediaOpRegDeclare {{
52 class %(class_name)s : public %(base_class)s
53 {
54 public:
55 %(class_name)s(ExtMachInst _machInst,
56 const char * instMnem, uint64_t setFlags,
57 InstRegIndex _src1, InstRegIndex _src2, InstRegIndex _dest,
58 uint8_t _srcSize, uint8_t _destSize, uint16_t _ext);
59
60 Fault execute(ExecContext *, Trace::InstRecord *) const;
61 };
62}};
63
64def template MediaOpImmDeclare {{
65
66 class %(class_name)s : public %(base_class)s
67 {
68 public:
69 %(class_name)s(ExtMachInst _machInst,
70 const char * instMnem, uint64_t setFlags,
71 InstRegIndex _src1, uint16_t _imm8, InstRegIndex _dest,
72 uint8_t _srcSize, uint8_t _destSize, uint16_t _ext);
73
74 Fault execute(ExecContext *, Trace::InstRecord *) const;
75 };
76}};
77
78def template MediaOpRegConstructor {{
79 %(class_name)s::%(class_name)s(
80 ExtMachInst machInst, const char * instMnem, uint64_t setFlags,
81 InstRegIndex _src1, InstRegIndex _src2, InstRegIndex _dest,
82 uint8_t _srcSize, uint8_t _destSize, uint16_t _ext) :
83 %(base_class)s(machInst, "%(mnemonic)s", instMnem, setFlags,
84 _src1, _src2, _dest, _srcSize, _destSize, _ext,
85 %(op_class)s)
86 {
87 %(constructor)s;
88 }
89}};
90
91def template MediaOpImmConstructor {{
92 %(class_name)s::%(class_name)s(
93 ExtMachInst machInst, const char * instMnem, uint64_t setFlags,
94 InstRegIndex _src1, uint16_t _imm8, InstRegIndex _dest,
95 uint8_t _srcSize, uint8_t _destSize, uint16_t _ext) :
96 %(base_class)s(machInst, "%(mnemonic)s", instMnem, setFlags,
97 _src1, _imm8, _dest, _srcSize, _destSize, _ext,
98 %(op_class)s)
99 {
100 %(constructor)s;
101 }
102}};
103
104let {{
105 # Make these empty strings so that concatenating onto
106 # them will always work.
107 header_output = ""
108 decoder_output = ""
109 exec_output = ""
110
111 immTemplates = (
112 MediaOpImmDeclare,
113 MediaOpImmConstructor,
114 MediaOpExecute)
115
116 regTemplates = (
117 MediaOpRegDeclare,
118 MediaOpRegConstructor,
119 MediaOpExecute)
120
121 class MediaOpMeta(type):
122 def buildCppClasses(self, name, Name, suffix, code):
123
124 # Globals to stick the output in
125 global header_output
126 global decoder_output
127 global exec_output
128
129 # If op2 is used anywhere, make register and immediate versions
130 # of this code.
131 matcher = re.compile(r"(?<!\w)(?P<prefix>s?)op2(?P<typeQual>_[^\W_]+)?")
132 match = matcher.search(code)
133 if match:
134 typeQual = ""
135 if match.group("typeQual"):
136 typeQual = match.group("typeQual")
137 src2_name = "%sFpSrcReg2%s" % (match.group("prefix"), typeQual)
138 self.buildCppClasses(name, Name, suffix,
139 matcher.sub(src2_name, code))
140 self.buildCppClasses(name + "i", Name, suffix + "Imm",
141 matcher.sub("imm8", code))
142 return
143
144 base = "X86ISA::MediaOp"
145
146 # If imm8 shows up in the code, use the immediate templates, if
147 # not, hopefully the register ones will be correct.
148 matcher = re.compile("(?<!\w)imm8(?!\w)")
149 if matcher.search(code):
150 base += "Imm"
151 templates = immTemplates
152 else:
153 base += "Reg"
154 templates = regTemplates
155
156 # Get everything ready for the substitution
157 opt_args = []
158 if self.op_class:
159 opt_args.append(self.op_class)
160 iop = InstObjParams(name, Name + suffix, base, {"code" : code},
161 opt_args)
162
163 # Generate the actual code (finally!)
164 header_output += templates[0].subst(iop)
165 decoder_output += templates[1].subst(iop)
166 exec_output += templates[2].subst(iop)
167
168
169 def __new__(mcls, Name, bases, dict):
170 abstract = False
171 name = Name.lower()
172 if "abstract" in dict:
173 abstract = dict['abstract']
174 del dict['abstract']
175 if not "op_class" in dict:
176 dict["op_class"] = None
177
178 cls = super(MediaOpMeta, mcls).__new__(mcls, Name, bases, dict)
179 if not abstract:
180 cls.className = Name
181 cls.base_mnemonic = name
182 code = cls.code
183
184 # Set up the C++ classes
185 mcls.buildCppClasses(cls, name, Name, "", code)
186
187 # Hook into the microassembler dict
188 global microopClasses
189 microopClasses[name] = cls
190
191 # If op2 is used anywhere, make register and immediate versions
192 # of this code.
193 matcher = re.compile(r"op2(?P<typeQual>_[^\W_]+)?")
194 if matcher.search(code):
195 microopClasses[name + 'i'] = cls
196 return cls
197
198
199 class MediaOp(X86Microop):
200 __metaclass__ = MediaOpMeta
201 # This class itself doesn't act as a microop
202 abstract = True
203
204 def __init__(self, dest, src1, op2,
205 size = None, destSize = None, srcSize = None, ext = None):
206 self.dest = dest
207 self.src1 = src1
208 self.op2 = op2
209 if size is not None:
210 self.srcSize = size
211 self.destSize = size
212 if srcSize is not None:
213 self.srcSize = srcSize
214 if destSize is not None:
215 self.destSize = destSize
216 if self.srcSize is None:
217 raise Exception, "Source size not set."
218 if self.destSize is None:
219 raise Exception, "Dest size not set."
220 if ext is None:
221 self.ext = 0
222 else:
223 self.ext = ext
224
225 def getAllocator(self, microFlags):
226 className = self.className
227 if self.mnemonic == self.base_mnemonic + 'i':
228 className += "Imm"
229 allocator = '''new %(class_name)s(machInst, macrocodeBlock,
230 %(flags)s, %(src1)s, %(op2)s, %(dest)s,
231 %(srcSize)s, %(destSize)s, %(ext)s)''' % {
232 "class_name" : className,
233 "flags" : self.microFlagsText(microFlags),
234 "src1" : self.src1, "op2" : self.op2,
235 "dest" : self.dest,
236 "srcSize" : self.srcSize,
237 "destSize" : self.destSize,
238 "ext" : self.ext}
239 return allocator
240
241 class Mov2int(MediaOp):
242 def __init__(self, dest, src1, src2 = 0, \
243 size = None, destSize = None, srcSize = None, ext = None):
244 super(Mov2int, self).__init__(dest, src1,\
245 src2, size, destSize, srcSize, ext)
246 op_class = 'SimdMiscOp'
247 code = '''
1// Copyright (c) 2009 The Regents of The University of Michigan
2// Copyright (c) 2015 Advanced Micro Devices, Inc.
3//
4// All rights reserved.
5//
6// Redistribution and use in source and binary forms, with or without
7// modification, are permitted provided that the following conditions are
8// met: redistributions of source code must retain the above copyright
9// notice, this list of conditions and the following disclaimer;
10// redistributions in binary form must reproduce the above copyright
11// notice, this list of conditions and the following disclaimer in the
12// documentation and/or other materials provided with the distribution;
13// neither the name of the copyright holders nor the names of its
14// contributors may be used to endorse or promote products derived from
15// this software without specific prior written permission.
16//
17// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
18// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
19// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
20// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
21// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
22// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
23// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
24// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
25// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
26// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
27// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
28//
29// Authors: Gabe Black
30
31def template MediaOpExecute {{
32 Fault %(class_name)s::execute(ExecContext *xc,
33 Trace::InstRecord *traceData) const
34 {
35 Fault fault = NoFault;
36
37 %(op_decl)s;
38 %(op_rd)s;
39
40 %(code)s;
41
42 //Write the resulting state to the execution context
43 if(fault == NoFault)
44 {
45 %(op_wb)s;
46 }
47 return fault;
48 }
49}};
50
51def template MediaOpRegDeclare {{
52 class %(class_name)s : public %(base_class)s
53 {
54 public:
55 %(class_name)s(ExtMachInst _machInst,
56 const char * instMnem, uint64_t setFlags,
57 InstRegIndex _src1, InstRegIndex _src2, InstRegIndex _dest,
58 uint8_t _srcSize, uint8_t _destSize, uint16_t _ext);
59
60 Fault execute(ExecContext *, Trace::InstRecord *) const;
61 };
62}};
63
64def template MediaOpImmDeclare {{
65
66 class %(class_name)s : public %(base_class)s
67 {
68 public:
69 %(class_name)s(ExtMachInst _machInst,
70 const char * instMnem, uint64_t setFlags,
71 InstRegIndex _src1, uint16_t _imm8, InstRegIndex _dest,
72 uint8_t _srcSize, uint8_t _destSize, uint16_t _ext);
73
74 Fault execute(ExecContext *, Trace::InstRecord *) const;
75 };
76}};
77
78def template MediaOpRegConstructor {{
79 %(class_name)s::%(class_name)s(
80 ExtMachInst machInst, const char * instMnem, uint64_t setFlags,
81 InstRegIndex _src1, InstRegIndex _src2, InstRegIndex _dest,
82 uint8_t _srcSize, uint8_t _destSize, uint16_t _ext) :
83 %(base_class)s(machInst, "%(mnemonic)s", instMnem, setFlags,
84 _src1, _src2, _dest, _srcSize, _destSize, _ext,
85 %(op_class)s)
86 {
87 %(constructor)s;
88 }
89}};
90
91def template MediaOpImmConstructor {{
92 %(class_name)s::%(class_name)s(
93 ExtMachInst machInst, const char * instMnem, uint64_t setFlags,
94 InstRegIndex _src1, uint16_t _imm8, InstRegIndex _dest,
95 uint8_t _srcSize, uint8_t _destSize, uint16_t _ext) :
96 %(base_class)s(machInst, "%(mnemonic)s", instMnem, setFlags,
97 _src1, _imm8, _dest, _srcSize, _destSize, _ext,
98 %(op_class)s)
99 {
100 %(constructor)s;
101 }
102}};
103
104let {{
105 # Make these empty strings so that concatenating onto
106 # them will always work.
107 header_output = ""
108 decoder_output = ""
109 exec_output = ""
110
111 immTemplates = (
112 MediaOpImmDeclare,
113 MediaOpImmConstructor,
114 MediaOpExecute)
115
116 regTemplates = (
117 MediaOpRegDeclare,
118 MediaOpRegConstructor,
119 MediaOpExecute)
120
121 class MediaOpMeta(type):
122 def buildCppClasses(self, name, Name, suffix, code):
123
124 # Globals to stick the output in
125 global header_output
126 global decoder_output
127 global exec_output
128
129 # If op2 is used anywhere, make register and immediate versions
130 # of this code.
131 matcher = re.compile(r"(?<!\w)(?P<prefix>s?)op2(?P<typeQual>_[^\W_]+)?")
132 match = matcher.search(code)
133 if match:
134 typeQual = ""
135 if match.group("typeQual"):
136 typeQual = match.group("typeQual")
137 src2_name = "%sFpSrcReg2%s" % (match.group("prefix"), typeQual)
138 self.buildCppClasses(name, Name, suffix,
139 matcher.sub(src2_name, code))
140 self.buildCppClasses(name + "i", Name, suffix + "Imm",
141 matcher.sub("imm8", code))
142 return
143
144 base = "X86ISA::MediaOp"
145
146 # If imm8 shows up in the code, use the immediate templates, if
147 # not, hopefully the register ones will be correct.
148 matcher = re.compile("(?<!\w)imm8(?!\w)")
149 if matcher.search(code):
150 base += "Imm"
151 templates = immTemplates
152 else:
153 base += "Reg"
154 templates = regTemplates
155
156 # Get everything ready for the substitution
157 opt_args = []
158 if self.op_class:
159 opt_args.append(self.op_class)
160 iop = InstObjParams(name, Name + suffix, base, {"code" : code},
161 opt_args)
162
163 # Generate the actual code (finally!)
164 header_output += templates[0].subst(iop)
165 decoder_output += templates[1].subst(iop)
166 exec_output += templates[2].subst(iop)
167
168
169 def __new__(mcls, Name, bases, dict):
170 abstract = False
171 name = Name.lower()
172 if "abstract" in dict:
173 abstract = dict['abstract']
174 del dict['abstract']
175 if not "op_class" in dict:
176 dict["op_class"] = None
177
178 cls = super(MediaOpMeta, mcls).__new__(mcls, Name, bases, dict)
179 if not abstract:
180 cls.className = Name
181 cls.base_mnemonic = name
182 code = cls.code
183
184 # Set up the C++ classes
185 mcls.buildCppClasses(cls, name, Name, "", code)
186
187 # Hook into the microassembler dict
188 global microopClasses
189 microopClasses[name] = cls
190
191 # If op2 is used anywhere, make register and immediate versions
192 # of this code.
193 matcher = re.compile(r"op2(?P<typeQual>_[^\W_]+)?")
194 if matcher.search(code):
195 microopClasses[name + 'i'] = cls
196 return cls
197
198
199 class MediaOp(X86Microop):
200 __metaclass__ = MediaOpMeta
201 # This class itself doesn't act as a microop
202 abstract = True
203
204 def __init__(self, dest, src1, op2,
205 size = None, destSize = None, srcSize = None, ext = None):
206 self.dest = dest
207 self.src1 = src1
208 self.op2 = op2
209 if size is not None:
210 self.srcSize = size
211 self.destSize = size
212 if srcSize is not None:
213 self.srcSize = srcSize
214 if destSize is not None:
215 self.destSize = destSize
216 if self.srcSize is None:
217 raise Exception, "Source size not set."
218 if self.destSize is None:
219 raise Exception, "Dest size not set."
220 if ext is None:
221 self.ext = 0
222 else:
223 self.ext = ext
224
225 def getAllocator(self, microFlags):
226 className = self.className
227 if self.mnemonic == self.base_mnemonic + 'i':
228 className += "Imm"
229 allocator = '''new %(class_name)s(machInst, macrocodeBlock,
230 %(flags)s, %(src1)s, %(op2)s, %(dest)s,
231 %(srcSize)s, %(destSize)s, %(ext)s)''' % {
232 "class_name" : className,
233 "flags" : self.microFlagsText(microFlags),
234 "src1" : self.src1, "op2" : self.op2,
235 "dest" : self.dest,
236 "srcSize" : self.srcSize,
237 "destSize" : self.destSize,
238 "ext" : self.ext}
239 return allocator
240
241 class Mov2int(MediaOp):
242 def __init__(self, dest, src1, src2 = 0, \
243 size = None, destSize = None, srcSize = None, ext = None):
244 super(Mov2int, self).__init__(dest, src1,\
245 src2, size, destSize, srcSize, ext)
246 op_class = 'SimdMiscOp'
247 code = '''
248 int items = sizeof(FloatRegBits) / srcSize;
248 int items = sizeof(FloatReg) / srcSize;
249 int offset = imm8;
250 if (bits(src1, 0) && (ext & 0x1))
251 offset -= items;
252 if (offset >= 0 && offset < items) {
253 uint64_t fpSrcReg1 =
254 bits(FpSrcReg1_uqw,
255 (offset + 1) * srcSize * 8 - 1,
256 (offset + 0) * srcSize * 8);
257 DestReg = merge(0, fpSrcReg1, destSize);
258 } else {
259 DestReg = DestReg;
260 }
261 '''
262
263 class Mov2fp(MediaOp):
264 def __init__(self, dest, src1, src2 = 0, \
265 size = None, destSize = None, srcSize = None, ext = None):
266 super(Mov2fp, self).__init__(dest, src1,\
267 src2, size, destSize, srcSize, ext)
268 op_class = 'SimdMiscOp'
269 code = '''
249 int offset = imm8;
250 if (bits(src1, 0) && (ext & 0x1))
251 offset -= items;
252 if (offset >= 0 && offset < items) {
253 uint64_t fpSrcReg1 =
254 bits(FpSrcReg1_uqw,
255 (offset + 1) * srcSize * 8 - 1,
256 (offset + 0) * srcSize * 8);
257 DestReg = merge(0, fpSrcReg1, destSize);
258 } else {
259 DestReg = DestReg;
260 }
261 '''
262
263 class Mov2fp(MediaOp):
264 def __init__(self, dest, src1, src2 = 0, \
265 size = None, destSize = None, srcSize = None, ext = None):
266 super(Mov2fp, self).__init__(dest, src1,\
267 src2, size, destSize, srcSize, ext)
268 op_class = 'SimdMiscOp'
269 code = '''
270 int items = sizeof(FloatRegBits) / destSize;
270 int items = sizeof(FloatReg) / destSize;
271 int offset = imm8;
272 if (bits(dest, 0) && (ext & 0x1))
273 offset -= items;
274 if (offset >= 0 && offset < items) {
275 uint64_t srcReg1 = pick(SrcReg1, 0, srcSize);
276 FpDestReg_uqw =
277 insertBits(FpDestReg_uqw,
278 (offset + 1) * destSize * 8 - 1,
279 (offset + 0) * destSize * 8, srcReg1);
280 } else {
281 FpDestReg_uqw = FpDestReg_uqw;
282 }
283 '''
284
285 class Movsign(MediaOp):
286 def __init__(self, dest, src, \
287 size = None, destSize = None, srcSize = None, ext = None):
288 super(Movsign, self).__init__(dest, src,\
289 "InstRegIndex(0)", size, destSize, srcSize, ext)
290 op_class = 'SimdMiscOp'
291 code = '''
271 int offset = imm8;
272 if (bits(dest, 0) && (ext & 0x1))
273 offset -= items;
274 if (offset >= 0 && offset < items) {
275 uint64_t srcReg1 = pick(SrcReg1, 0, srcSize);
276 FpDestReg_uqw =
277 insertBits(FpDestReg_uqw,
278 (offset + 1) * destSize * 8 - 1,
279 (offset + 0) * destSize * 8, srcReg1);
280 } else {
281 FpDestReg_uqw = FpDestReg_uqw;
282 }
283 '''
284
285 class Movsign(MediaOp):
286 def __init__(self, dest, src, \
287 size = None, destSize = None, srcSize = None, ext = None):
288 super(Movsign, self).__init__(dest, src,\
289 "InstRegIndex(0)", size, destSize, srcSize, ext)
290 op_class = 'SimdMiscOp'
291 code = '''
292 int items = sizeof(FloatRegBits) / srcSize;
292 int items = sizeof(FloatReg) / srcSize;
293 uint64_t result = 0;
294 int offset = (ext & 0x1) ? items : 0;
295 for (int i = 0; i < items; i++) {
296 uint64_t picked =
297 bits(FpSrcReg1_uqw, (i + 1) * 8 * srcSize - 1);
298 result = insertBits(result, i + offset, i + offset, picked);
299 }
300 DestReg = DestReg | result;
301 '''
302
303 class Maskmov(MediaOp):
304 op_class = 'SimdMiscOp'
305 code = '''
306 assert(srcSize == destSize);
307 int size = srcSize;
308 int sizeBits = size * 8;
309 int items = numItems(size);
310 uint64_t result = FpDestReg_uqw;
311
312 for (int i = 0; i < items; i++) {
313 int hiIndex = (i + 1) * sizeBits - 1;
314 int loIndex = (i + 0) * sizeBits;
315 uint64_t arg1Bits = bits(FpSrcReg1_uqw, hiIndex, loIndex);
316 if (bits(FpSrcReg2_uqw, hiIndex))
317 result = insertBits(result, hiIndex, loIndex, arg1Bits);
318 }
319 FpDestReg_uqw = result;
320 '''
321
322 class shuffle(MediaOp):
323 op_class = 'SimdMiscOp'
324 code = '''
325 assert(srcSize == destSize);
326 int size = srcSize;
327 int sizeBits = size * 8;
293 uint64_t result = 0;
294 int offset = (ext & 0x1) ? items : 0;
295 for (int i = 0; i < items; i++) {
296 uint64_t picked =
297 bits(FpSrcReg1_uqw, (i + 1) * 8 * srcSize - 1);
298 result = insertBits(result, i + offset, i + offset, picked);
299 }
300 DestReg = DestReg | result;
301 '''
302
303 class Maskmov(MediaOp):
304 op_class = 'SimdMiscOp'
305 code = '''
306 assert(srcSize == destSize);
307 int size = srcSize;
308 int sizeBits = size * 8;
309 int items = numItems(size);
310 uint64_t result = FpDestReg_uqw;
311
312 for (int i = 0; i < items; i++) {
313 int hiIndex = (i + 1) * sizeBits - 1;
314 int loIndex = (i + 0) * sizeBits;
315 uint64_t arg1Bits = bits(FpSrcReg1_uqw, hiIndex, loIndex);
316 if (bits(FpSrcReg2_uqw, hiIndex))
317 result = insertBits(result, hiIndex, loIndex, arg1Bits);
318 }
319 FpDestReg_uqw = result;
320 '''
321
322 class shuffle(MediaOp):
323 op_class = 'SimdMiscOp'
324 code = '''
325 assert(srcSize == destSize);
326 int size = srcSize;
327 int sizeBits = size * 8;
328 int items = sizeof(FloatRegBits) / size;
328 int items = sizeof(FloatReg) / size;
329 int options;
330 int optionBits;
331 if (size == 8) {
332 options = 2;
333 optionBits = 1;
334 } else {
335 options = 4;
336 optionBits = 2;
337 }
338
339 uint64_t result = 0;
340 uint8_t sel = ext;
341
342 for (int i = 0; i < items; i++) {
343 uint64_t resBits;
344 uint8_t lsel = sel & mask(optionBits);
329 int options;
330 int optionBits;
331 if (size == 8) {
332 options = 2;
333 optionBits = 1;
334 } else {
335 options = 4;
336 optionBits = 2;
337 }
338
339 uint64_t result = 0;
340 uint8_t sel = ext;
341
342 for (int i = 0; i < items; i++) {
343 uint64_t resBits;
344 uint8_t lsel = sel & mask(optionBits);
345 if (lsel * size >= sizeof(FloatRegBits)) {
345 if (lsel * size >= sizeof(FloatReg)) {
346 lsel -= options / 2;
347 resBits = bits(FpSrcReg2_uqw,
348 (lsel + 1) * sizeBits - 1,
349 (lsel + 0) * sizeBits);
350 } else {
351 resBits = bits(FpSrcReg1_uqw,
352 (lsel + 1) * sizeBits - 1,
353 (lsel + 0) * sizeBits);
354 }
355
356 sel >>= optionBits;
357
358 int hiIndex = (i + 1) * sizeBits - 1;
359 int loIndex = (i + 0) * sizeBits;
360 result = insertBits(result, hiIndex, loIndex, resBits);
361 }
362 FpDestReg_uqw = result;
363 '''
364
365 class Unpack(MediaOp):
366 op_class = 'SimdMiscOp'
367 code = '''
368 assert(srcSize == destSize);
369 int size = destSize;
346 lsel -= options / 2;
347 resBits = bits(FpSrcReg2_uqw,
348 (lsel + 1) * sizeBits - 1,
349 (lsel + 0) * sizeBits);
350 } else {
351 resBits = bits(FpSrcReg1_uqw,
352 (lsel + 1) * sizeBits - 1,
353 (lsel + 0) * sizeBits);
354 }
355
356 sel >>= optionBits;
357
358 int hiIndex = (i + 1) * sizeBits - 1;
359 int loIndex = (i + 0) * sizeBits;
360 result = insertBits(result, hiIndex, loIndex, resBits);
361 }
362 FpDestReg_uqw = result;
363 '''
364
365 class Unpack(MediaOp):
366 op_class = 'SimdMiscOp'
367 code = '''
368 assert(srcSize == destSize);
369 int size = destSize;
370 int items = (sizeof(FloatRegBits) / size) / 2;
370 int items = (sizeof(FloatReg) / size) / 2;
371 int offset = ext ? items : 0;
372 uint64_t result = 0;
373 for (int i = 0; i < items; i++) {
374 uint64_t pickedLow =
375 bits(FpSrcReg1_uqw, (i + offset + 1) * 8 * size - 1,
376 (i + offset) * 8 * size);
377 result = insertBits(result,
378 (2 * i + 1) * 8 * size - 1,
379 (2 * i + 0) * 8 * size,
380 pickedLow);
381 uint64_t pickedHigh =
382 bits(FpSrcReg2_uqw, (i + offset + 1) * 8 * size - 1,
383 (i + offset) * 8 * size);
384 result = insertBits(result,
385 (2 * i + 2) * 8 * size - 1,
386 (2 * i + 1) * 8 * size,
387 pickedHigh);
388 }
389 FpDestReg_uqw = result;
390 '''
391
392 class Pack(MediaOp):
393 op_class = 'SimdMiscOp'
394 code = '''
395 assert(srcSize == destSize * 2);
371 int offset = ext ? items : 0;
372 uint64_t result = 0;
373 for (int i = 0; i < items; i++) {
374 uint64_t pickedLow =
375 bits(FpSrcReg1_uqw, (i + offset + 1) * 8 * size - 1,
376 (i + offset) * 8 * size);
377 result = insertBits(result,
378 (2 * i + 1) * 8 * size - 1,
379 (2 * i + 0) * 8 * size,
380 pickedLow);
381 uint64_t pickedHigh =
382 bits(FpSrcReg2_uqw, (i + offset + 1) * 8 * size - 1,
383 (i + offset) * 8 * size);
384 result = insertBits(result,
385 (2 * i + 2) * 8 * size - 1,
386 (2 * i + 1) * 8 * size,
387 pickedHigh);
388 }
389 FpDestReg_uqw = result;
390 '''
391
392 class Pack(MediaOp):
393 op_class = 'SimdMiscOp'
394 code = '''
395 assert(srcSize == destSize * 2);
396 int items = (sizeof(FloatRegBits) / destSize);
396 int items = (sizeof(FloatReg) / destSize);
397 int destBits = destSize * 8;
398 int srcBits = srcSize * 8;
399 uint64_t result = 0;
400 int i;
401 for (i = 0; i < items / 2; i++) {
402 uint64_t picked =
403 bits(FpSrcReg1_uqw, (i + 1) * srcBits - 1,
404 (i + 0) * srcBits);
405 unsigned signBit = bits(picked, srcBits - 1);
406 uint64_t overflow = bits(picked, srcBits - 1, destBits - 1);
407
408 // Handle saturation.
409 if (signBit) {
410 if (overflow != mask(destBits - srcBits + 1)) {
411 if (signedOp())
412 picked = (ULL(1) << (destBits - 1));
413 else
414 picked = 0;
415 }
416 } else {
417 if (overflow != 0) {
418 if (signedOp())
419 picked = mask(destBits - 1);
420 else
421 picked = mask(destBits);
422 }
423 }
424 result = insertBits(result,
425 (i + 1) * destBits - 1,
426 (i + 0) * destBits,
427 picked);
428 }
429 for (;i < items; i++) {
430 uint64_t picked =
431 bits(FpSrcReg2_uqw, (i - items + 1) * srcBits - 1,
432 (i - items + 0) * srcBits);
433 unsigned signBit = bits(picked, srcBits - 1);
434 uint64_t overflow = bits(picked, srcBits - 1, destBits - 1);
435
436 // Handle saturation.
437 if (signBit) {
438 if (overflow != mask(destBits - srcBits + 1)) {
439 if (signedOp())
440 picked = (ULL(1) << (destBits - 1));
441 else
442 picked = 0;
443 }
444 } else {
445 if (overflow != 0) {
446 if (signedOp())
447 picked = mask(destBits - 1);
448 else
449 picked = mask(destBits);
450 }
451 }
452 result = insertBits(result,
453 (i + 1) * destBits - 1,
454 (i + 0) * destBits,
455 picked);
456 }
457 FpDestReg_uqw = result;
458 '''
459
460 class Mxor(MediaOp):
461 def __init__(self, dest, src1, src2):
462 super(Mxor, self).__init__(dest, src1, src2, 1)
463 op_class = 'SimdAluOp'
464 code = '''
465 FpDestReg_uqw = FpSrcReg1_uqw ^ FpSrcReg2_uqw;
466 '''
467
468 class Mor(MediaOp):
469 def __init__(self, dest, src1, src2):
470 super(Mor, self).__init__(dest, src1, src2, 1)
471 op_class = 'SimdAluOp'
472 code = '''
473 FpDestReg_uqw = FpSrcReg1_uqw | FpSrcReg2_uqw;
474 '''
475
476 class Mand(MediaOp):
477 def __init__(self, dest, src1, src2):
478 super(Mand, self).__init__(dest, src1, src2, 1)
479 op_class = 'SimdAluOp'
480 code = '''
481 FpDestReg_uqw = FpSrcReg1_uqw & FpSrcReg2_uqw;
482 '''
483
484 class Mandn(MediaOp):
485 def __init__(self, dest, src1, src2):
486 super(Mandn, self).__init__(dest, src1, src2, 1)
487 op_class = 'SimdAluOp'
488 code = '''
489 FpDestReg_uqw = ~FpSrcReg1_uqw & FpSrcReg2_uqw;
490 '''
491
492 class Mminf(MediaOp):
493 op_class = 'SimdFloatCmpOp'
494 code = '''
495 union floatInt
496 {
497 float f;
498 uint32_t i;
499 };
500 union doubleInt
501 {
502 double d;
503 uint64_t i;
504 };
505
506 assert(srcSize == destSize);
507 int size = srcSize;
508 int sizeBits = size * 8;
509 assert(srcSize == 4 || srcSize == 8);
510 int items = numItems(size);
511 uint64_t result = FpDestReg_uqw;
512
513 for (int i = 0; i < items; i++) {
514 double arg1, arg2;
515 int hiIndex = (i + 1) * sizeBits - 1;
516 int loIndex = (i + 0) * sizeBits;
517 uint64_t arg1Bits = bits(FpSrcReg1_uqw, hiIndex, loIndex);
518 uint64_t arg2Bits = bits(FpSrcReg2_uqw, hiIndex, loIndex);
519
520 if (size == 4) {
521 floatInt fi;
522 fi.i = arg1Bits;
523 arg1 = fi.f;
524 fi.i = arg2Bits;
525 arg2 = fi.f;
526 } else {
527 doubleInt di;
528 di.i = arg1Bits;
529 arg1 = di.d;
530 di.i = arg2Bits;
531 arg2 = di.d;
532 }
533
534 if (arg1 < arg2) {
535 result = insertBits(result, hiIndex, loIndex, arg1Bits);
536 } else {
537 result = insertBits(result, hiIndex, loIndex, arg2Bits);
538 }
539 }
540 FpDestReg_uqw = result;
541 '''
542
543 class Mmaxf(MediaOp):
544 op_class = 'SimdFloatCmpOp'
545 code = '''
546 union floatInt
547 {
548 float f;
549 uint32_t i;
550 };
551 union doubleInt
552 {
553 double d;
554 uint64_t i;
555 };
556
557 assert(srcSize == destSize);
558 int size = srcSize;
559 int sizeBits = size * 8;
560 assert(srcSize == 4 || srcSize == 8);
561 int items = numItems(size);
562 uint64_t result = FpDestReg_uqw;
563
564 for (int i = 0; i < items; i++) {
565 double arg1, arg2;
566 int hiIndex = (i + 1) * sizeBits - 1;
567 int loIndex = (i + 0) * sizeBits;
568 uint64_t arg1Bits = bits(FpSrcReg1_uqw, hiIndex, loIndex);
569 uint64_t arg2Bits = bits(FpSrcReg2_uqw, hiIndex, loIndex);
570
571 if (size == 4) {
572 floatInt fi;
573 fi.i = arg1Bits;
574 arg1 = fi.f;
575 fi.i = arg2Bits;
576 arg2 = fi.f;
577 } else {
578 doubleInt di;
579 di.i = arg1Bits;
580 arg1 = di.d;
581 di.i = arg2Bits;
582 arg2 = di.d;
583 }
584
585 if (arg1 > arg2) {
586 result = insertBits(result, hiIndex, loIndex, arg1Bits);
587 } else {
588 result = insertBits(result, hiIndex, loIndex, arg2Bits);
589 }
590 }
591 FpDestReg_uqw = result;
592 '''
593
594 class Mmini(MediaOp):
595 op_class = 'SimdCmpOp'
596 code = '''
597
598 assert(srcSize == destSize);
599 int size = srcSize;
600 int sizeBits = size * 8;
601 int items = numItems(size);
602 uint64_t result = FpDestReg_uqw;
603
604 for (int i = 0; i < items; i++) {
605 int hiIndex = (i + 1) * sizeBits - 1;
606 int loIndex = (i + 0) * sizeBits;
607 uint64_t arg1Bits = bits(FpSrcReg1_uqw, hiIndex, loIndex);
608 int64_t arg1 = arg1Bits |
609 (0 - (arg1Bits & (ULL(1) << (sizeBits - 1))));
610 uint64_t arg2Bits = bits(FpSrcReg2_uqw, hiIndex, loIndex);
611 int64_t arg2 = arg2Bits |
612 (0 - (arg2Bits & (ULL(1) << (sizeBits - 1))));
613 uint64_t resBits;
614
615 if (signedOp()) {
616 if (arg1 < arg2) {
617 resBits = arg1Bits;
618 } else {
619 resBits = arg2Bits;
620 }
621 } else {
622 if (arg1Bits < arg2Bits) {
623 resBits = arg1Bits;
624 } else {
625 resBits = arg2Bits;
626 }
627 }
628 result = insertBits(result, hiIndex, loIndex, resBits);
629 }
630 FpDestReg_uqw = result;
631 '''
632
633 class Mmaxi(MediaOp):
634 op_class = 'SimdCmpOp'
635 code = '''
636
637 assert(srcSize == destSize);
638 int size = srcSize;
639 int sizeBits = size * 8;
640 int items = numItems(size);
641 uint64_t result = FpDestReg_uqw;
642
643 for (int i = 0; i < items; i++) {
644 int hiIndex = (i + 1) * sizeBits - 1;
645 int loIndex = (i + 0) * sizeBits;
646 uint64_t arg1Bits = bits(FpSrcReg1_uqw, hiIndex, loIndex);
647 int64_t arg1 = arg1Bits |
648 (0 - (arg1Bits & (ULL(1) << (sizeBits - 1))));
649 uint64_t arg2Bits = bits(FpSrcReg2_uqw, hiIndex, loIndex);
650 int64_t arg2 = arg2Bits |
651 (0 - (arg2Bits & (ULL(1) << (sizeBits - 1))));
652 uint64_t resBits;
653
654 if (signedOp()) {
655 if (arg1 > arg2) {
656 resBits = arg1Bits;
657 } else {
658 resBits = arg2Bits;
659 }
660 } else {
661 if (arg1Bits > arg2Bits) {
662 resBits = arg1Bits;
663 } else {
664 resBits = arg2Bits;
665 }
666 }
667 result = insertBits(result, hiIndex, loIndex, resBits);
668 }
669 FpDestReg_uqw = result;
670 '''
671
672 class Msqrt(MediaOp):
673 op_class = 'SimdFloatSqrtOp'
674 def __init__(self, dest, src, \
675 size = None, destSize = None, srcSize = None, ext = None):
676 super(Msqrt, self).__init__(dest, src,\
677 "InstRegIndex(0)", size, destSize, srcSize, ext)
678 code = '''
679 union floatInt
680 {
681 float f;
682 uint32_t i;
683 };
684 union doubleInt
685 {
686 double d;
687 uint64_t i;
688 };
689
690 assert(srcSize == destSize);
691 int size = srcSize;
692 int sizeBits = size * 8;
693 assert(srcSize == 4 || srcSize == 8);
694 int items = numItems(size);
695 uint64_t result = FpDestReg_uqw;
696
697 for (int i = 0; i < items; i++) {
698 int hiIndex = (i + 1) * sizeBits - 1;
699 int loIndex = (i + 0) * sizeBits;
700 uint64_t argBits = bits(FpSrcReg1_uqw, hiIndex, loIndex);
701
702 if (size == 4) {
703 floatInt fi;
704 fi.i = argBits;
705 fi.f = sqrt(fi.f);
706 argBits = fi.i;
707 } else {
708 doubleInt di;
709 di.i = argBits;
710 di.d = sqrt(di.d);
711 argBits = di.i;
712 }
713 result = insertBits(result, hiIndex, loIndex, argBits);
714 }
715 FpDestReg_uqw = result;
716 '''
717
718 # compute approximate reciprocal --- single-precision only
719 class Mrcp(MediaOp):
720 def __init__(self, dest, src, \
721 size = None, destSize = None, srcSize = None, ext = None):
722 super(Mrcp, self).__init__(dest, src,\
723 "InstRegIndex(0)", size, destSize, srcSize, ext)
724 op_class = 'SimdFloatAluOp'
725 code = '''
726 union floatInt
727 {
728 float f;
729 uint32_t i;
730 };
731
732 assert(srcSize == 4); // ISA defines single-precision only
733 assert(srcSize == destSize);
734 const int size = 4;
735 const int sizeBits = size * 8;
736 int items = numItems(size);
737 uint64_t result = FpDestReg_uqw;
738
739 for (int i = 0; i < items; i++) {
740 int hiIndex = (i + 1) * sizeBits - 1;
741 int loIndex = (i + 0) * sizeBits;
742 uint64_t argBits = bits(FpSrcReg1_uqw, hiIndex, loIndex);
743
744 floatInt fi;
745 fi.i = argBits;
746 // This is more accuracy than HW provides, but oh well
747 fi.f = 1.0 / fi.f;
748 argBits = fi.i;
749 result = insertBits(result, hiIndex, loIndex, argBits);
750 }
751 FpDestReg_uqw = result;
752 '''
753
754 class Maddf(MediaOp):
755 op_class = 'SimdFloatAddOp'
756 code = '''
757 union floatInt
758 {
759 float f;
760 uint32_t i;
761 };
762 union doubleInt
763 {
764 double d;
765 uint64_t i;
766 };
767
768 assert(srcSize == destSize);
769 int size = srcSize;
770 int sizeBits = size * 8;
771 assert(srcSize == 4 || srcSize == 8);
772 int items = numItems(size);
773 uint64_t result = FpDestReg_uqw;
774
775 for (int i = 0; i < items; i++) {
776 int hiIndex = (i + 1) * sizeBits - 1;
777 int loIndex = (i + 0) * sizeBits;
778 uint64_t arg1Bits = bits(FpSrcReg1_uqw, hiIndex, loIndex);
779 uint64_t arg2Bits = bits(FpSrcReg2_uqw, hiIndex, loIndex);
780 uint64_t resBits;
781
782 if (size == 4) {
783 floatInt arg1, arg2, res;
784 arg1.i = arg1Bits;
785 arg2.i = arg2Bits;
786 res.f = arg1.f + arg2.f;
787 resBits = res.i;
788 } else {
789 doubleInt arg1, arg2, res;
790 arg1.i = arg1Bits;
791 arg2.i = arg2Bits;
792 res.d = arg1.d + arg2.d;
793 resBits = res.i;
794 }
795
796 result = insertBits(result, hiIndex, loIndex, resBits);
797 }
798 FpDestReg_uqw = result;
799 '''
800
801 class Msubf(MediaOp):
802 op_class = 'SimdFloatAddOp'
803 code = '''
804 union floatInt
805 {
806 float f;
807 uint32_t i;
808 };
809 union doubleInt
810 {
811 double d;
812 uint64_t i;
813 };
814
815 assert(srcSize == destSize);
816 int size = srcSize;
817 int sizeBits = size * 8;
818 assert(srcSize == 4 || srcSize == 8);
819 int items = numItems(size);
820 uint64_t result = FpDestReg_uqw;
821
822 for (int i = 0; i < items; i++) {
823 int hiIndex = (i + 1) * sizeBits - 1;
824 int loIndex = (i + 0) * sizeBits;
825 uint64_t arg1Bits = bits(FpSrcReg1_uqw, hiIndex, loIndex);
826 uint64_t arg2Bits = bits(FpSrcReg2_uqw, hiIndex, loIndex);
827 uint64_t resBits;
828
829 if (size == 4) {
830 floatInt arg1, arg2, res;
831 arg1.i = arg1Bits;
832 arg2.i = arg2Bits;
833 res.f = arg1.f - arg2.f;
834 resBits = res.i;
835 } else {
836 doubleInt arg1, arg2, res;
837 arg1.i = arg1Bits;
838 arg2.i = arg2Bits;
839 res.d = arg1.d - arg2.d;
840 resBits = res.i;
841 }
842
843 result = insertBits(result, hiIndex, loIndex, resBits);
844 }
845 FpDestReg_uqw = result;
846 '''
847
848 class Mmulf(MediaOp):
849 op_class = 'SimdFloatMultOp'
850 code = '''
851 union floatInt
852 {
853 float f;
854 uint32_t i;
855 };
856 union doubleInt
857 {
858 double d;
859 uint64_t i;
860 };
861
862 assert(srcSize == destSize);
863 int size = srcSize;
864 int sizeBits = size * 8;
865 assert(srcSize == 4 || srcSize == 8);
866 int items = numItems(size);
867 uint64_t result = FpDestReg_uqw;
868
869 for (int i = 0; i < items; i++) {
870 int hiIndex = (i + 1) * sizeBits - 1;
871 int loIndex = (i + 0) * sizeBits;
872 uint64_t arg1Bits = bits(FpSrcReg1_uqw, hiIndex, loIndex);
873 uint64_t arg2Bits = bits(FpSrcReg2_uqw, hiIndex, loIndex);
874 uint64_t resBits;
875
876 if (size == 4) {
877 floatInt arg1, arg2, res;
878 arg1.i = arg1Bits;
879 arg2.i = arg2Bits;
880 res.f = arg1.f * arg2.f;
881 resBits = res.i;
882 } else {
883 doubleInt arg1, arg2, res;
884 arg1.i = arg1Bits;
885 arg2.i = arg2Bits;
886 res.d = arg1.d * arg2.d;
887 resBits = res.i;
888 }
889
890 result = insertBits(result, hiIndex, loIndex, resBits);
891 }
892 FpDestReg_uqw = result;
893 '''
894
895 class Mdivf(MediaOp):
896 op_class = 'SimdFloatDivOp'
897 code = '''
898 union floatInt
899 {
900 float f;
901 uint32_t i;
902 };
903 union doubleInt
904 {
905 double d;
906 uint64_t i;
907 };
908
909 assert(srcSize == destSize);
910 int size = srcSize;
911 int sizeBits = size * 8;
912 assert(srcSize == 4 || srcSize == 8);
913 int items = numItems(size);
914 uint64_t result = FpDestReg_uqw;
915
916 for (int i = 0; i < items; i++) {
917 int hiIndex = (i + 1) * sizeBits - 1;
918 int loIndex = (i + 0) * sizeBits;
919 uint64_t arg1Bits = bits(FpSrcReg1_uqw, hiIndex, loIndex);
920 uint64_t arg2Bits = bits(FpSrcReg2_uqw, hiIndex, loIndex);
921 uint64_t resBits;
922
923 if (size == 4) {
924 floatInt arg1, arg2, res;
925 arg1.i = arg1Bits;
926 arg2.i = arg2Bits;
927 res.f = arg1.f / arg2.f;
928 resBits = res.i;
929 } else {
930 doubleInt arg1, arg2, res;
931 arg1.i = arg1Bits;
932 arg2.i = arg2Bits;
933 res.d = arg1.d / arg2.d;
934 resBits = res.i;
935 }
936
937 result = insertBits(result, hiIndex, loIndex, resBits);
938 }
939 FpDestReg_uqw = result;
940 '''
941
942 class Maddi(MediaOp):
943 op_class = 'SimdAddOp'
944 code = '''
945 assert(srcSize == destSize);
946 int size = srcSize;
947 int sizeBits = size * 8;
948 int items = numItems(size);
949 uint64_t result = FpDestReg_uqw;
950
951 for (int i = 0; i < items; i++) {
952 int hiIndex = (i + 1) * sizeBits - 1;
953 int loIndex = (i + 0) * sizeBits;
954 uint64_t arg1Bits = bits(FpSrcReg1_uqw, hiIndex, loIndex);
955 uint64_t arg2Bits = bits(FpSrcReg2_uqw, hiIndex, loIndex);
956 uint64_t resBits = arg1Bits + arg2Bits;
957
958 if (ext & 0x2) {
959 if (signedOp()) {
960 int arg1Sign = bits(arg1Bits, sizeBits - 1);
961 int arg2Sign = bits(arg2Bits, sizeBits - 1);
962 int resSign = bits(resBits, sizeBits - 1);
963 if ((arg1Sign == arg2Sign) && (arg1Sign != resSign)) {
964 if (resSign == 0)
965 resBits = (ULL(1) << (sizeBits - 1));
966 else
967 resBits = mask(sizeBits - 1);
968 }
969 } else {
970 if (findCarry(sizeBits, resBits, arg1Bits, arg2Bits))
971 resBits = mask(sizeBits);
972 }
973 }
974
975 result = insertBits(result, hiIndex, loIndex, resBits);
976 }
977 FpDestReg_uqw = result;
978 '''
979
980 class Msubi(MediaOp):
981 op_class = 'SimdAddOp'
982 code = '''
983 assert(srcSize == destSize);
984 int size = srcSize;
985 int sizeBits = size * 8;
986 int items = numItems(size);
987 uint64_t result = FpDestReg_uqw;
988
989 for (int i = 0; i < items; i++) {
990 int hiIndex = (i + 1) * sizeBits - 1;
991 int loIndex = (i + 0) * sizeBits;
992 uint64_t arg1Bits = bits(FpSrcReg1_uqw, hiIndex, loIndex);
993 uint64_t arg2Bits = bits(FpSrcReg2_uqw, hiIndex, loIndex);
994 uint64_t resBits = arg1Bits - arg2Bits;
995
996 if (ext & 0x2) {
997 if (signedOp()) {
998 int arg1Sign = bits(arg1Bits, sizeBits - 1);
999 int arg2Sign = !bits(arg2Bits, sizeBits - 1);
1000 int resSign = bits(resBits, sizeBits - 1);
1001 if ((arg1Sign == arg2Sign) && (arg1Sign != resSign)) {
1002 if (resSign == 0)
1003 resBits = (ULL(1) << (sizeBits - 1));
1004 else
1005 resBits = mask(sizeBits - 1);
1006 }
1007 } else {
1008 if (arg2Bits > arg1Bits) {
1009 resBits = 0;
1010 } else if (!findCarry(sizeBits, resBits,
1011 arg1Bits, ~arg2Bits)) {
1012 resBits = mask(sizeBits);
1013 }
1014 }
1015 }
1016
1017 result = insertBits(result, hiIndex, loIndex, resBits);
1018 }
1019 FpDestReg_uqw = result;
1020 '''
1021
1022 class Mmuli(MediaOp):
1023 op_class = 'SimdMultOp'
1024 code = '''
1025 int srcBits = srcSize * 8;
1026 int destBits = destSize * 8;
1027 assert(destBits <= 64);
1028 assert(destSize >= srcSize);
1029 int items = numItems(destSize);
1030 uint64_t result = FpDestReg_uqw;
1031
1032 for (int i = 0; i < items; i++) {
1033 int offset = 0;
1034 if (ext & 16) {
1035 if (ext & 32)
1036 offset = i * (destBits - srcBits);
1037 else
1038 offset = i * (destBits - srcBits) + srcBits;
1039 }
1040 int srcHiIndex = (i + 1) * srcBits - 1 + offset;
1041 int srcLoIndex = (i + 0) * srcBits + offset;
1042 uint64_t arg1Bits = bits(FpSrcReg1_uqw, srcHiIndex, srcLoIndex);
1043 uint64_t arg2Bits = bits(FpSrcReg2_uqw, srcHiIndex, srcLoIndex);
1044 uint64_t resBits;
1045
1046 if (signedOp()) {
1047 int64_t arg1 = arg1Bits |
1048 (0 - (arg1Bits & (ULL(1) << (srcBits - 1))));
1049 int64_t arg2 = arg2Bits |
1050 (0 - (arg2Bits & (ULL(1) << (srcBits - 1))));
1051 resBits = (uint64_t)(arg1 * arg2);
1052 } else {
1053 resBits = arg1Bits * arg2Bits;
1054 }
1055
1056 if (ext & 0x4)
1057 resBits += (ULL(1) << (destBits - 1));
1058
1059 if (multHi())
1060 resBits >>= destBits;
1061
1062 int destHiIndex = (i + 1) * destBits - 1;
1063 int destLoIndex = (i + 0) * destBits;
1064 result = insertBits(result, destHiIndex, destLoIndex, resBits);
1065 }
1066 FpDestReg_uqw = result;
1067 '''
1068
1069 class Mavg(MediaOp):
1070 op_class = 'SimdAddOp'
1071 code = '''
1072 assert(srcSize == destSize);
1073 int size = srcSize;
1074 int sizeBits = size * 8;
1075 int items = numItems(size);
1076 uint64_t result = FpDestReg_uqw;
1077
1078 for (int i = 0; i < items; i++) {
1079 int hiIndex = (i + 1) * sizeBits - 1;
1080 int loIndex = (i + 0) * sizeBits;
1081 uint64_t arg1Bits = bits(FpSrcReg1_uqw, hiIndex, loIndex);
1082 uint64_t arg2Bits = bits(FpSrcReg2_uqw, hiIndex, loIndex);
1083 uint64_t resBits = (arg1Bits + arg2Bits + 1) / 2;
1084
1085 result = insertBits(result, hiIndex, loIndex, resBits);
1086 }
1087 FpDestReg_uqw = result;
1088 '''
1089
1090 class Msad(MediaOp):
1091 op_class = 'SimdAddOp'
1092 code = '''
1093 int srcBits = srcSize * 8;
397 int destBits = destSize * 8;
398 int srcBits = srcSize * 8;
399 uint64_t result = 0;
400 int i;
401 for (i = 0; i < items / 2; i++) {
402 uint64_t picked =
403 bits(FpSrcReg1_uqw, (i + 1) * srcBits - 1,
404 (i + 0) * srcBits);
405 unsigned signBit = bits(picked, srcBits - 1);
406 uint64_t overflow = bits(picked, srcBits - 1, destBits - 1);
407
408 // Handle saturation.
409 if (signBit) {
410 if (overflow != mask(destBits - srcBits + 1)) {
411 if (signedOp())
412 picked = (ULL(1) << (destBits - 1));
413 else
414 picked = 0;
415 }
416 } else {
417 if (overflow != 0) {
418 if (signedOp())
419 picked = mask(destBits - 1);
420 else
421 picked = mask(destBits);
422 }
423 }
424 result = insertBits(result,
425 (i + 1) * destBits - 1,
426 (i + 0) * destBits,
427 picked);
428 }
429 for (;i < items; i++) {
430 uint64_t picked =
431 bits(FpSrcReg2_uqw, (i - items + 1) * srcBits - 1,
432 (i - items + 0) * srcBits);
433 unsigned signBit = bits(picked, srcBits - 1);
434 uint64_t overflow = bits(picked, srcBits - 1, destBits - 1);
435
436 // Handle saturation.
437 if (signBit) {
438 if (overflow != mask(destBits - srcBits + 1)) {
439 if (signedOp())
440 picked = (ULL(1) << (destBits - 1));
441 else
442 picked = 0;
443 }
444 } else {
445 if (overflow != 0) {
446 if (signedOp())
447 picked = mask(destBits - 1);
448 else
449 picked = mask(destBits);
450 }
451 }
452 result = insertBits(result,
453 (i + 1) * destBits - 1,
454 (i + 0) * destBits,
455 picked);
456 }
457 FpDestReg_uqw = result;
458 '''
459
460 class Mxor(MediaOp):
461 def __init__(self, dest, src1, src2):
462 super(Mxor, self).__init__(dest, src1, src2, 1)
463 op_class = 'SimdAluOp'
464 code = '''
465 FpDestReg_uqw = FpSrcReg1_uqw ^ FpSrcReg2_uqw;
466 '''
467
468 class Mor(MediaOp):
469 def __init__(self, dest, src1, src2):
470 super(Mor, self).__init__(dest, src1, src2, 1)
471 op_class = 'SimdAluOp'
472 code = '''
473 FpDestReg_uqw = FpSrcReg1_uqw | FpSrcReg2_uqw;
474 '''
475
476 class Mand(MediaOp):
477 def __init__(self, dest, src1, src2):
478 super(Mand, self).__init__(dest, src1, src2, 1)
479 op_class = 'SimdAluOp'
480 code = '''
481 FpDestReg_uqw = FpSrcReg1_uqw & FpSrcReg2_uqw;
482 '''
483
484 class Mandn(MediaOp):
485 def __init__(self, dest, src1, src2):
486 super(Mandn, self).__init__(dest, src1, src2, 1)
487 op_class = 'SimdAluOp'
488 code = '''
489 FpDestReg_uqw = ~FpSrcReg1_uqw & FpSrcReg2_uqw;
490 '''
491
492 class Mminf(MediaOp):
493 op_class = 'SimdFloatCmpOp'
494 code = '''
495 union floatInt
496 {
497 float f;
498 uint32_t i;
499 };
500 union doubleInt
501 {
502 double d;
503 uint64_t i;
504 };
505
506 assert(srcSize == destSize);
507 int size = srcSize;
508 int sizeBits = size * 8;
509 assert(srcSize == 4 || srcSize == 8);
510 int items = numItems(size);
511 uint64_t result = FpDestReg_uqw;
512
513 for (int i = 0; i < items; i++) {
514 double arg1, arg2;
515 int hiIndex = (i + 1) * sizeBits - 1;
516 int loIndex = (i + 0) * sizeBits;
517 uint64_t arg1Bits = bits(FpSrcReg1_uqw, hiIndex, loIndex);
518 uint64_t arg2Bits = bits(FpSrcReg2_uqw, hiIndex, loIndex);
519
520 if (size == 4) {
521 floatInt fi;
522 fi.i = arg1Bits;
523 arg1 = fi.f;
524 fi.i = arg2Bits;
525 arg2 = fi.f;
526 } else {
527 doubleInt di;
528 di.i = arg1Bits;
529 arg1 = di.d;
530 di.i = arg2Bits;
531 arg2 = di.d;
532 }
533
534 if (arg1 < arg2) {
535 result = insertBits(result, hiIndex, loIndex, arg1Bits);
536 } else {
537 result = insertBits(result, hiIndex, loIndex, arg2Bits);
538 }
539 }
540 FpDestReg_uqw = result;
541 '''
542
543 class Mmaxf(MediaOp):
544 op_class = 'SimdFloatCmpOp'
545 code = '''
546 union floatInt
547 {
548 float f;
549 uint32_t i;
550 };
551 union doubleInt
552 {
553 double d;
554 uint64_t i;
555 };
556
557 assert(srcSize == destSize);
558 int size = srcSize;
559 int sizeBits = size * 8;
560 assert(srcSize == 4 || srcSize == 8);
561 int items = numItems(size);
562 uint64_t result = FpDestReg_uqw;
563
564 for (int i = 0; i < items; i++) {
565 double arg1, arg2;
566 int hiIndex = (i + 1) * sizeBits - 1;
567 int loIndex = (i + 0) * sizeBits;
568 uint64_t arg1Bits = bits(FpSrcReg1_uqw, hiIndex, loIndex);
569 uint64_t arg2Bits = bits(FpSrcReg2_uqw, hiIndex, loIndex);
570
571 if (size == 4) {
572 floatInt fi;
573 fi.i = arg1Bits;
574 arg1 = fi.f;
575 fi.i = arg2Bits;
576 arg2 = fi.f;
577 } else {
578 doubleInt di;
579 di.i = arg1Bits;
580 arg1 = di.d;
581 di.i = arg2Bits;
582 arg2 = di.d;
583 }
584
585 if (arg1 > arg2) {
586 result = insertBits(result, hiIndex, loIndex, arg1Bits);
587 } else {
588 result = insertBits(result, hiIndex, loIndex, arg2Bits);
589 }
590 }
591 FpDestReg_uqw = result;
592 '''
593
594 class Mmini(MediaOp):
595 op_class = 'SimdCmpOp'
596 code = '''
597
598 assert(srcSize == destSize);
599 int size = srcSize;
600 int sizeBits = size * 8;
601 int items = numItems(size);
602 uint64_t result = FpDestReg_uqw;
603
604 for (int i = 0; i < items; i++) {
605 int hiIndex = (i + 1) * sizeBits - 1;
606 int loIndex = (i + 0) * sizeBits;
607 uint64_t arg1Bits = bits(FpSrcReg1_uqw, hiIndex, loIndex);
608 int64_t arg1 = arg1Bits |
609 (0 - (arg1Bits & (ULL(1) << (sizeBits - 1))));
610 uint64_t arg2Bits = bits(FpSrcReg2_uqw, hiIndex, loIndex);
611 int64_t arg2 = arg2Bits |
612 (0 - (arg2Bits & (ULL(1) << (sizeBits - 1))));
613 uint64_t resBits;
614
615 if (signedOp()) {
616 if (arg1 < arg2) {
617 resBits = arg1Bits;
618 } else {
619 resBits = arg2Bits;
620 }
621 } else {
622 if (arg1Bits < arg2Bits) {
623 resBits = arg1Bits;
624 } else {
625 resBits = arg2Bits;
626 }
627 }
628 result = insertBits(result, hiIndex, loIndex, resBits);
629 }
630 FpDestReg_uqw = result;
631 '''
632
633 class Mmaxi(MediaOp):
634 op_class = 'SimdCmpOp'
635 code = '''
636
637 assert(srcSize == destSize);
638 int size = srcSize;
639 int sizeBits = size * 8;
640 int items = numItems(size);
641 uint64_t result = FpDestReg_uqw;
642
643 for (int i = 0; i < items; i++) {
644 int hiIndex = (i + 1) * sizeBits - 1;
645 int loIndex = (i + 0) * sizeBits;
646 uint64_t arg1Bits = bits(FpSrcReg1_uqw, hiIndex, loIndex);
647 int64_t arg1 = arg1Bits |
648 (0 - (arg1Bits & (ULL(1) << (sizeBits - 1))));
649 uint64_t arg2Bits = bits(FpSrcReg2_uqw, hiIndex, loIndex);
650 int64_t arg2 = arg2Bits |
651 (0 - (arg2Bits & (ULL(1) << (sizeBits - 1))));
652 uint64_t resBits;
653
654 if (signedOp()) {
655 if (arg1 > arg2) {
656 resBits = arg1Bits;
657 } else {
658 resBits = arg2Bits;
659 }
660 } else {
661 if (arg1Bits > arg2Bits) {
662 resBits = arg1Bits;
663 } else {
664 resBits = arg2Bits;
665 }
666 }
667 result = insertBits(result, hiIndex, loIndex, resBits);
668 }
669 FpDestReg_uqw = result;
670 '''
671
672 class Msqrt(MediaOp):
673 op_class = 'SimdFloatSqrtOp'
674 def __init__(self, dest, src, \
675 size = None, destSize = None, srcSize = None, ext = None):
676 super(Msqrt, self).__init__(dest, src,\
677 "InstRegIndex(0)", size, destSize, srcSize, ext)
678 code = '''
679 union floatInt
680 {
681 float f;
682 uint32_t i;
683 };
684 union doubleInt
685 {
686 double d;
687 uint64_t i;
688 };
689
690 assert(srcSize == destSize);
691 int size = srcSize;
692 int sizeBits = size * 8;
693 assert(srcSize == 4 || srcSize == 8);
694 int items = numItems(size);
695 uint64_t result = FpDestReg_uqw;
696
697 for (int i = 0; i < items; i++) {
698 int hiIndex = (i + 1) * sizeBits - 1;
699 int loIndex = (i + 0) * sizeBits;
700 uint64_t argBits = bits(FpSrcReg1_uqw, hiIndex, loIndex);
701
702 if (size == 4) {
703 floatInt fi;
704 fi.i = argBits;
705 fi.f = sqrt(fi.f);
706 argBits = fi.i;
707 } else {
708 doubleInt di;
709 di.i = argBits;
710 di.d = sqrt(di.d);
711 argBits = di.i;
712 }
713 result = insertBits(result, hiIndex, loIndex, argBits);
714 }
715 FpDestReg_uqw = result;
716 '''
717
718 # compute approximate reciprocal --- single-precision only
719 class Mrcp(MediaOp):
720 def __init__(self, dest, src, \
721 size = None, destSize = None, srcSize = None, ext = None):
722 super(Mrcp, self).__init__(dest, src,\
723 "InstRegIndex(0)", size, destSize, srcSize, ext)
724 op_class = 'SimdFloatAluOp'
725 code = '''
726 union floatInt
727 {
728 float f;
729 uint32_t i;
730 };
731
732 assert(srcSize == 4); // ISA defines single-precision only
733 assert(srcSize == destSize);
734 const int size = 4;
735 const int sizeBits = size * 8;
736 int items = numItems(size);
737 uint64_t result = FpDestReg_uqw;
738
739 for (int i = 0; i < items; i++) {
740 int hiIndex = (i + 1) * sizeBits - 1;
741 int loIndex = (i + 0) * sizeBits;
742 uint64_t argBits = bits(FpSrcReg1_uqw, hiIndex, loIndex);
743
744 floatInt fi;
745 fi.i = argBits;
746 // This is more accuracy than HW provides, but oh well
747 fi.f = 1.0 / fi.f;
748 argBits = fi.i;
749 result = insertBits(result, hiIndex, loIndex, argBits);
750 }
751 FpDestReg_uqw = result;
752 '''
753
754 class Maddf(MediaOp):
755 op_class = 'SimdFloatAddOp'
756 code = '''
757 union floatInt
758 {
759 float f;
760 uint32_t i;
761 };
762 union doubleInt
763 {
764 double d;
765 uint64_t i;
766 };
767
768 assert(srcSize == destSize);
769 int size = srcSize;
770 int sizeBits = size * 8;
771 assert(srcSize == 4 || srcSize == 8);
772 int items = numItems(size);
773 uint64_t result = FpDestReg_uqw;
774
775 for (int i = 0; i < items; i++) {
776 int hiIndex = (i + 1) * sizeBits - 1;
777 int loIndex = (i + 0) * sizeBits;
778 uint64_t arg1Bits = bits(FpSrcReg1_uqw, hiIndex, loIndex);
779 uint64_t arg2Bits = bits(FpSrcReg2_uqw, hiIndex, loIndex);
780 uint64_t resBits;
781
782 if (size == 4) {
783 floatInt arg1, arg2, res;
784 arg1.i = arg1Bits;
785 arg2.i = arg2Bits;
786 res.f = arg1.f + arg2.f;
787 resBits = res.i;
788 } else {
789 doubleInt arg1, arg2, res;
790 arg1.i = arg1Bits;
791 arg2.i = arg2Bits;
792 res.d = arg1.d + arg2.d;
793 resBits = res.i;
794 }
795
796 result = insertBits(result, hiIndex, loIndex, resBits);
797 }
798 FpDestReg_uqw = result;
799 '''
800
801 class Msubf(MediaOp):
802 op_class = 'SimdFloatAddOp'
803 code = '''
804 union floatInt
805 {
806 float f;
807 uint32_t i;
808 };
809 union doubleInt
810 {
811 double d;
812 uint64_t i;
813 };
814
815 assert(srcSize == destSize);
816 int size = srcSize;
817 int sizeBits = size * 8;
818 assert(srcSize == 4 || srcSize == 8);
819 int items = numItems(size);
820 uint64_t result = FpDestReg_uqw;
821
822 for (int i = 0; i < items; i++) {
823 int hiIndex = (i + 1) * sizeBits - 1;
824 int loIndex = (i + 0) * sizeBits;
825 uint64_t arg1Bits = bits(FpSrcReg1_uqw, hiIndex, loIndex);
826 uint64_t arg2Bits = bits(FpSrcReg2_uqw, hiIndex, loIndex);
827 uint64_t resBits;
828
829 if (size == 4) {
830 floatInt arg1, arg2, res;
831 arg1.i = arg1Bits;
832 arg2.i = arg2Bits;
833 res.f = arg1.f - arg2.f;
834 resBits = res.i;
835 } else {
836 doubleInt arg1, arg2, res;
837 arg1.i = arg1Bits;
838 arg2.i = arg2Bits;
839 res.d = arg1.d - arg2.d;
840 resBits = res.i;
841 }
842
843 result = insertBits(result, hiIndex, loIndex, resBits);
844 }
845 FpDestReg_uqw = result;
846 '''
847
848 class Mmulf(MediaOp):
849 op_class = 'SimdFloatMultOp'
850 code = '''
851 union floatInt
852 {
853 float f;
854 uint32_t i;
855 };
856 union doubleInt
857 {
858 double d;
859 uint64_t i;
860 };
861
862 assert(srcSize == destSize);
863 int size = srcSize;
864 int sizeBits = size * 8;
865 assert(srcSize == 4 || srcSize == 8);
866 int items = numItems(size);
867 uint64_t result = FpDestReg_uqw;
868
869 for (int i = 0; i < items; i++) {
870 int hiIndex = (i + 1) * sizeBits - 1;
871 int loIndex = (i + 0) * sizeBits;
872 uint64_t arg1Bits = bits(FpSrcReg1_uqw, hiIndex, loIndex);
873 uint64_t arg2Bits = bits(FpSrcReg2_uqw, hiIndex, loIndex);
874 uint64_t resBits;
875
876 if (size == 4) {
877 floatInt arg1, arg2, res;
878 arg1.i = arg1Bits;
879 arg2.i = arg2Bits;
880 res.f = arg1.f * arg2.f;
881 resBits = res.i;
882 } else {
883 doubleInt arg1, arg2, res;
884 arg1.i = arg1Bits;
885 arg2.i = arg2Bits;
886 res.d = arg1.d * arg2.d;
887 resBits = res.i;
888 }
889
890 result = insertBits(result, hiIndex, loIndex, resBits);
891 }
892 FpDestReg_uqw = result;
893 '''
894
895 class Mdivf(MediaOp):
896 op_class = 'SimdFloatDivOp'
897 code = '''
898 union floatInt
899 {
900 float f;
901 uint32_t i;
902 };
903 union doubleInt
904 {
905 double d;
906 uint64_t i;
907 };
908
909 assert(srcSize == destSize);
910 int size = srcSize;
911 int sizeBits = size * 8;
912 assert(srcSize == 4 || srcSize == 8);
913 int items = numItems(size);
914 uint64_t result = FpDestReg_uqw;
915
916 for (int i = 0; i < items; i++) {
917 int hiIndex = (i + 1) * sizeBits - 1;
918 int loIndex = (i + 0) * sizeBits;
919 uint64_t arg1Bits = bits(FpSrcReg1_uqw, hiIndex, loIndex);
920 uint64_t arg2Bits = bits(FpSrcReg2_uqw, hiIndex, loIndex);
921 uint64_t resBits;
922
923 if (size == 4) {
924 floatInt arg1, arg2, res;
925 arg1.i = arg1Bits;
926 arg2.i = arg2Bits;
927 res.f = arg1.f / arg2.f;
928 resBits = res.i;
929 } else {
930 doubleInt arg1, arg2, res;
931 arg1.i = arg1Bits;
932 arg2.i = arg2Bits;
933 res.d = arg1.d / arg2.d;
934 resBits = res.i;
935 }
936
937 result = insertBits(result, hiIndex, loIndex, resBits);
938 }
939 FpDestReg_uqw = result;
940 '''
941
942 class Maddi(MediaOp):
943 op_class = 'SimdAddOp'
944 code = '''
945 assert(srcSize == destSize);
946 int size = srcSize;
947 int sizeBits = size * 8;
948 int items = numItems(size);
949 uint64_t result = FpDestReg_uqw;
950
951 for (int i = 0; i < items; i++) {
952 int hiIndex = (i + 1) * sizeBits - 1;
953 int loIndex = (i + 0) * sizeBits;
954 uint64_t arg1Bits = bits(FpSrcReg1_uqw, hiIndex, loIndex);
955 uint64_t arg2Bits = bits(FpSrcReg2_uqw, hiIndex, loIndex);
956 uint64_t resBits = arg1Bits + arg2Bits;
957
958 if (ext & 0x2) {
959 if (signedOp()) {
960 int arg1Sign = bits(arg1Bits, sizeBits - 1);
961 int arg2Sign = bits(arg2Bits, sizeBits - 1);
962 int resSign = bits(resBits, sizeBits - 1);
963 if ((arg1Sign == arg2Sign) && (arg1Sign != resSign)) {
964 if (resSign == 0)
965 resBits = (ULL(1) << (sizeBits - 1));
966 else
967 resBits = mask(sizeBits - 1);
968 }
969 } else {
970 if (findCarry(sizeBits, resBits, arg1Bits, arg2Bits))
971 resBits = mask(sizeBits);
972 }
973 }
974
975 result = insertBits(result, hiIndex, loIndex, resBits);
976 }
977 FpDestReg_uqw = result;
978 '''
979
980 class Msubi(MediaOp):
981 op_class = 'SimdAddOp'
982 code = '''
983 assert(srcSize == destSize);
984 int size = srcSize;
985 int sizeBits = size * 8;
986 int items = numItems(size);
987 uint64_t result = FpDestReg_uqw;
988
989 for (int i = 0; i < items; i++) {
990 int hiIndex = (i + 1) * sizeBits - 1;
991 int loIndex = (i + 0) * sizeBits;
992 uint64_t arg1Bits = bits(FpSrcReg1_uqw, hiIndex, loIndex);
993 uint64_t arg2Bits = bits(FpSrcReg2_uqw, hiIndex, loIndex);
994 uint64_t resBits = arg1Bits - arg2Bits;
995
996 if (ext & 0x2) {
997 if (signedOp()) {
998 int arg1Sign = bits(arg1Bits, sizeBits - 1);
999 int arg2Sign = !bits(arg2Bits, sizeBits - 1);
1000 int resSign = bits(resBits, sizeBits - 1);
1001 if ((arg1Sign == arg2Sign) && (arg1Sign != resSign)) {
1002 if (resSign == 0)
1003 resBits = (ULL(1) << (sizeBits - 1));
1004 else
1005 resBits = mask(sizeBits - 1);
1006 }
1007 } else {
1008 if (arg2Bits > arg1Bits) {
1009 resBits = 0;
1010 } else if (!findCarry(sizeBits, resBits,
1011 arg1Bits, ~arg2Bits)) {
1012 resBits = mask(sizeBits);
1013 }
1014 }
1015 }
1016
1017 result = insertBits(result, hiIndex, loIndex, resBits);
1018 }
1019 FpDestReg_uqw = result;
1020 '''
1021
1022 class Mmuli(MediaOp):
1023 op_class = 'SimdMultOp'
1024 code = '''
1025 int srcBits = srcSize * 8;
1026 int destBits = destSize * 8;
1027 assert(destBits <= 64);
1028 assert(destSize >= srcSize);
1029 int items = numItems(destSize);
1030 uint64_t result = FpDestReg_uqw;
1031
1032 for (int i = 0; i < items; i++) {
1033 int offset = 0;
1034 if (ext & 16) {
1035 if (ext & 32)
1036 offset = i * (destBits - srcBits);
1037 else
1038 offset = i * (destBits - srcBits) + srcBits;
1039 }
1040 int srcHiIndex = (i + 1) * srcBits - 1 + offset;
1041 int srcLoIndex = (i + 0) * srcBits + offset;
1042 uint64_t arg1Bits = bits(FpSrcReg1_uqw, srcHiIndex, srcLoIndex);
1043 uint64_t arg2Bits = bits(FpSrcReg2_uqw, srcHiIndex, srcLoIndex);
1044 uint64_t resBits;
1045
1046 if (signedOp()) {
1047 int64_t arg1 = arg1Bits |
1048 (0 - (arg1Bits & (ULL(1) << (srcBits - 1))));
1049 int64_t arg2 = arg2Bits |
1050 (0 - (arg2Bits & (ULL(1) << (srcBits - 1))));
1051 resBits = (uint64_t)(arg1 * arg2);
1052 } else {
1053 resBits = arg1Bits * arg2Bits;
1054 }
1055
1056 if (ext & 0x4)
1057 resBits += (ULL(1) << (destBits - 1));
1058
1059 if (multHi())
1060 resBits >>= destBits;
1061
1062 int destHiIndex = (i + 1) * destBits - 1;
1063 int destLoIndex = (i + 0) * destBits;
1064 result = insertBits(result, destHiIndex, destLoIndex, resBits);
1065 }
1066 FpDestReg_uqw = result;
1067 '''
1068
1069 class Mavg(MediaOp):
1070 op_class = 'SimdAddOp'
1071 code = '''
1072 assert(srcSize == destSize);
1073 int size = srcSize;
1074 int sizeBits = size * 8;
1075 int items = numItems(size);
1076 uint64_t result = FpDestReg_uqw;
1077
1078 for (int i = 0; i < items; i++) {
1079 int hiIndex = (i + 1) * sizeBits - 1;
1080 int loIndex = (i + 0) * sizeBits;
1081 uint64_t arg1Bits = bits(FpSrcReg1_uqw, hiIndex, loIndex);
1082 uint64_t arg2Bits = bits(FpSrcReg2_uqw, hiIndex, loIndex);
1083 uint64_t resBits = (arg1Bits + arg2Bits + 1) / 2;
1084
1085 result = insertBits(result, hiIndex, loIndex, resBits);
1086 }
1087 FpDestReg_uqw = result;
1088 '''
1089
1090 class Msad(MediaOp):
1091 op_class = 'SimdAddOp'
1092 code = '''
1093 int srcBits = srcSize * 8;
1094 int items = sizeof(FloatRegBits) / srcSize;
1094 int items = sizeof(FloatReg) / srcSize;
1095
1096 uint64_t sum = 0;
1097 for (int i = 0; i < items; i++) {
1098 int hiIndex = (i + 1) * srcBits - 1;
1099 int loIndex = (i + 0) * srcBits;
1100 uint64_t arg1Bits = bits(FpSrcReg1_uqw, hiIndex, loIndex);
1101 uint64_t arg2Bits = bits(FpSrcReg2_uqw, hiIndex, loIndex);
1102 int64_t resBits = arg1Bits - arg2Bits;
1103 if (resBits < 0)
1104 resBits = -resBits;
1105 sum += resBits;
1106 }
1107 FpDestReg_uqw = sum & mask(destSize * 8);
1108 '''
1109
1110 class Msrl(MediaOp):
1111 op_class = 'SimdShiftOp'
1112 code = '''
1113
1114 assert(srcSize == destSize);
1115 int size = srcSize;
1116 int sizeBits = size * 8;
1117 int items = numItems(size);
1118 uint64_t shiftAmt = op2_uqw;
1119 uint64_t result = FpDestReg_uqw;
1120
1121 for (int i = 0; i < items; i++) {
1122 int hiIndex = (i + 1) * sizeBits - 1;
1123 int loIndex = (i + 0) * sizeBits;
1124 uint64_t arg1Bits = bits(FpSrcReg1_uqw, hiIndex, loIndex);
1125 uint64_t resBits;
1126 if (shiftAmt >= sizeBits) {
1127 resBits = 0;
1128 } else {
1129 resBits = (arg1Bits >> shiftAmt) &
1130 mask(sizeBits - shiftAmt);
1131 }
1132
1133 result = insertBits(result, hiIndex, loIndex, resBits);
1134 }
1135 FpDestReg_uqw = result;
1136 '''
1137
1138 class Msra(MediaOp):
1139 op_class = 'SimdShiftOp'
1140 code = '''
1141
1142 assert(srcSize == destSize);
1143 int size = srcSize;
1144 int sizeBits = size * 8;
1145 int items = numItems(size);
1146 uint64_t shiftAmt = op2_uqw;
1147 uint64_t result = FpDestReg_uqw;
1148
1149 for (int i = 0; i < items; i++) {
1150 int hiIndex = (i + 1) * sizeBits - 1;
1151 int loIndex = (i + 0) * sizeBits;
1152 uint64_t arg1Bits = bits(FpSrcReg1_uqw, hiIndex, loIndex);
1153 uint64_t resBits;
1154 if (shiftAmt >= sizeBits) {
1155 if (bits(arg1Bits, sizeBits - 1))
1156 resBits = mask(sizeBits);
1157 else
1158 resBits = 0;
1159 } else {
1160 resBits = (arg1Bits >> shiftAmt);
1161 resBits = resBits |
1162 (0 - (resBits & (ULL(1) << (sizeBits - 1 - shiftAmt))));
1163 }
1164
1165 result = insertBits(result, hiIndex, loIndex, resBits);
1166 }
1167 FpDestReg_uqw = result;
1168 '''
1169
1170 class Msll(MediaOp):
1171 op_class = 'SimdShiftOp'
1172 code = '''
1173
1174 assert(srcSize == destSize);
1175 int size = srcSize;
1176 int sizeBits = size * 8;
1177 int items = numItems(size);
1178 uint64_t shiftAmt = op2_uqw;
1179 uint64_t result = FpDestReg_uqw;
1180
1181 for (int i = 0; i < items; i++) {
1182 int hiIndex = (i + 1) * sizeBits - 1;
1183 int loIndex = (i + 0) * sizeBits;
1184 uint64_t arg1Bits = bits(FpSrcReg1_uqw, hiIndex, loIndex);
1185 uint64_t resBits;
1186 if (shiftAmt >= sizeBits) {
1187 resBits = 0;
1188 } else {
1189 resBits = (arg1Bits << shiftAmt);
1190 }
1191
1192 result = insertBits(result, hiIndex, loIndex, resBits);
1193 }
1194 FpDestReg_uqw = result;
1195 '''
1196
1197 class Cvtf2i(MediaOp):
1198 def __init__(self, dest, src, \
1199 size = None, destSize = None, srcSize = None, ext = None):
1200 super(Cvtf2i, self).__init__(dest, src,\
1201 "InstRegIndex(0)", size, destSize, srcSize, ext)
1202 op_class = 'SimdFloatCvtOp'
1203 code = '''
1204 union floatInt
1205 {
1206 float f;
1207 uint32_t i;
1208 };
1209 union doubleInt
1210 {
1211 double d;
1212 uint64_t i;
1213 };
1214
1215 assert(destSize == 4 || destSize == 8);
1216 assert(srcSize == 4 || srcSize == 8);
1217 int srcSizeBits = srcSize * 8;
1218 int destSizeBits = destSize * 8;
1219 int items;
1220 int srcStart = 0;
1221 int destStart = 0;
1222 if (srcSize == 2 * destSize) {
1223 items = numItems(srcSize);
1224 if (ext & 0x2)
1225 destStart = destSizeBits * items;
1226 } else if (destSize == 2 * srcSize) {
1227 items = numItems(destSize);
1228 if (ext & 0x2)
1229 srcStart = srcSizeBits * items;
1230 } else {
1231 items = numItems(destSize);
1232 }
1233 uint64_t result = FpDestReg_uqw;
1234
1235 for (int i = 0; i < items; i++) {
1236 int srcHiIndex = srcStart + (i + 1) * srcSizeBits - 1;
1237 int srcLoIndex = srcStart + (i + 0) * srcSizeBits;
1238 uint64_t argBits = bits(FpSrcReg1_uqw, srcHiIndex, srcLoIndex);
1239 double arg;
1240
1241 if (srcSize == 4) {
1242 floatInt fi;
1243 fi.i = argBits;
1244 arg = fi.f;
1245 } else {
1246 doubleInt di;
1247 di.i = argBits;
1248 arg = di.d;
1249 }
1250
1251 if (ext & 0x4) {
1252 if (arg >= 0)
1253 arg += 0.5;
1254 else
1255 arg -= 0.5;
1256 }
1257
1258 if (destSize == 4) {
1259 int32_t i_arg = (int32_t)arg;
1260 argBits = *((uint32_t*)&i_arg);
1261 } else {
1262 int64_t i_arg = (int64_t)arg;
1263 argBits = *((uint64_t*)&i_arg);
1264 }
1265 int destHiIndex = destStart + (i + 1) * destSizeBits - 1;
1266 int destLoIndex = destStart + (i + 0) * destSizeBits;
1267 result = insertBits(result, destHiIndex, destLoIndex, argBits);
1268 }
1269 FpDestReg_uqw = result;
1270 '''
1271
1272 class Cvti2f(MediaOp):
1273 def __init__(self, dest, src, \
1274 size = None, destSize = None, srcSize = None, ext = None):
1275 super(Cvti2f, self).__init__(dest, src,\
1276 "InstRegIndex(0)", size, destSize, srcSize, ext)
1277 op_class = 'SimdFloatCvtOp'
1278 code = '''
1279 union floatInt
1280 {
1281 float f;
1282 uint32_t i;
1283 };
1284 union doubleInt
1285 {
1286 double d;
1287 uint64_t i;
1288 };
1289
1290 assert(destSize == 4 || destSize == 8);
1291 assert(srcSize == 4 || srcSize == 8);
1292 int srcSizeBits = srcSize * 8;
1293 int destSizeBits = destSize * 8;
1294 int items;
1295 int srcStart = 0;
1296 int destStart = 0;
1297 if (srcSize == 2 * destSize) {
1298 items = numItems(srcSize);
1299 if (ext & 0x2)
1300 destStart = destSizeBits * items;
1301 } else if (destSize == 2 * srcSize) {
1302 items = numItems(destSize);
1303 if (ext & 0x2)
1304 srcStart = srcSizeBits * items;
1305 } else {
1306 items = numItems(destSize);
1307 }
1308 uint64_t result = FpDestReg_uqw;
1309
1310 for (int i = 0; i < items; i++) {
1311 int srcHiIndex = srcStart + (i + 1) * srcSizeBits - 1;
1312 int srcLoIndex = srcStart + (i + 0) * srcSizeBits;
1313 uint64_t argBits = bits(FpSrcReg1_uqw, srcHiIndex, srcLoIndex);
1314
1315 int64_t sArg = argBits |
1316 (0 - (argBits & (ULL(1) << (srcSizeBits - 1))));
1317 double arg = sArg;
1318
1319 if (destSize == 4) {
1320 floatInt fi;
1321 fi.f = arg;
1322 argBits = fi.i;
1323 } else {
1324 doubleInt di;
1325 di.d = arg;
1326 argBits = di.i;
1327 }
1328 int destHiIndex = destStart + (i + 1) * destSizeBits - 1;
1329 int destLoIndex = destStart + (i + 0) * destSizeBits;
1330 result = insertBits(result, destHiIndex, destLoIndex, argBits);
1331 }
1332 FpDestReg_uqw = result;
1333 '''
1334
1335 class Cvtf2f(MediaOp):
1336 def __init__(self, dest, src, \
1337 size = None, destSize = None, srcSize = None, ext = None):
1338 super(Cvtf2f, self).__init__(dest, src,\
1339 "InstRegIndex(0)", size, destSize, srcSize, ext)
1340 op_class = 'SimdFloatCvtOp'
1341 code = '''
1342 union floatInt
1343 {
1344 float f;
1345 uint32_t i;
1346 };
1347 union doubleInt
1348 {
1349 double d;
1350 uint64_t i;
1351 };
1352
1353 assert(destSize == 4 || destSize == 8);
1354 assert(srcSize == 4 || srcSize == 8);
1355 int srcSizeBits = srcSize * 8;
1356 int destSizeBits = destSize * 8;
1357 int items;
1358 int srcStart = 0;
1359 int destStart = 0;
1360 if (srcSize == 2 * destSize) {
1361 items = numItems(srcSize);
1362 if (ext & 0x2)
1363 destStart = destSizeBits * items;
1364 } else if (destSize == 2 * srcSize) {
1365 items = numItems(destSize);
1366 if (ext & 0x2)
1367 srcStart = srcSizeBits * items;
1368 } else {
1369 items = numItems(destSize);
1370 }
1371 uint64_t result = FpDestReg_uqw;
1372
1373 for (int i = 0; i < items; i++) {
1374 int srcHiIndex = srcStart + (i + 1) * srcSizeBits - 1;
1375 int srcLoIndex = srcStart + (i + 0) * srcSizeBits;
1376 uint64_t argBits = bits(FpSrcReg1_uqw, srcHiIndex, srcLoIndex);
1377 double arg;
1378
1379 if (srcSize == 4) {
1380 floatInt fi;
1381 fi.i = argBits;
1382 arg = fi.f;
1383 } else {
1384 doubleInt di;
1385 di.i = argBits;
1386 arg = di.d;
1387 }
1388 if (destSize == 4) {
1389 floatInt fi;
1390 fi.f = arg;
1391 argBits = fi.i;
1392 } else {
1393 doubleInt di;
1394 di.d = arg;
1395 argBits = di.i;
1396 }
1397 int destHiIndex = destStart + (i + 1) * destSizeBits - 1;
1398 int destLoIndex = destStart + (i + 0) * destSizeBits;
1399 result = insertBits(result, destHiIndex, destLoIndex, argBits);
1400 }
1401 FpDestReg_uqw = result;
1402 '''
1403
1404 class Mcmpi2r(MediaOp):
1405 op_class = 'SimdCvtOp'
1406 code = '''
1407 union floatInt
1408 {
1409 float f;
1410 uint32_t i;
1411 };
1412 union doubleInt
1413 {
1414 double d;
1415 uint64_t i;
1416 };
1417
1418 assert(srcSize == destSize);
1419 int size = srcSize;
1420 int sizeBits = size * 8;
1421 int items = numItems(size);
1422 uint64_t result = FpDestReg_uqw;
1423
1424 for (int i = 0; i < items; i++) {
1425 int hiIndex = (i + 1) * sizeBits - 1;
1426 int loIndex = (i + 0) * sizeBits;
1427 uint64_t arg1Bits = bits(FpSrcReg1_uqw, hiIndex, loIndex);
1428 int64_t arg1 = arg1Bits |
1429 (0 - (arg1Bits & (ULL(1) << (sizeBits - 1))));
1430 uint64_t arg2Bits = bits(FpSrcReg2_uqw, hiIndex, loIndex);
1431 int64_t arg2 = arg2Bits |
1432 (0 - (arg2Bits & (ULL(1) << (sizeBits - 1))));
1433
1434 uint64_t resBits = 0;
1435 if (((ext & 0x2) == 0 && arg1 == arg2) ||
1436 ((ext & 0x2) == 0x2 && arg1 > arg2))
1437 resBits = mask(sizeBits);
1438
1439 result = insertBits(result, hiIndex, loIndex, resBits);
1440 }
1441 FpDestReg_uqw = result;
1442 '''
1443
1444 class Mcmpf2r(MediaOp):
1445 op_class = 'SimdFloatCvtOp'
1446 code = '''
1447 union floatInt
1448 {
1449 float f;
1450 uint32_t i;
1451 };
1452 union doubleInt
1453 {
1454 double d;
1455 uint64_t i;
1456 };
1457
1458 assert(srcSize == destSize);
1459 int size = srcSize;
1460 int sizeBits = size * 8;
1461 int items = numItems(size);
1462 uint64_t result = FpDestReg_uqw;
1463
1464 for (int i = 0; i < items; i++) {
1465 int hiIndex = (i + 1) * sizeBits - 1;
1466 int loIndex = (i + 0) * sizeBits;
1467 uint64_t arg1Bits = bits(FpSrcReg1_uqw, hiIndex, loIndex);
1468 uint64_t arg2Bits = bits(FpSrcReg2_uqw, hiIndex, loIndex);
1469 double arg1, arg2;
1470
1471 if (size == 4) {
1472 floatInt fi;
1473 fi.i = arg1Bits;
1474 arg1 = fi.f;
1475 fi.i = arg2Bits;
1476 arg2 = fi.f;
1477 } else {
1478 doubleInt di;
1479 di.i = arg1Bits;
1480 arg1 = di.d;
1481 di.i = arg2Bits;
1482 arg2 = di.d;
1483 }
1484
1485 uint64_t resBits = 0;
1486 bool nanop = std::isnan(arg1) || std::isnan(arg2);
1487 switch (ext & mask(3)) {
1488 case 0:
1489 if (arg1 == arg2 && !nanop)
1490 resBits = mask(sizeBits);
1491 break;
1492 case 1:
1493 if (arg1 < arg2 && !nanop)
1494 resBits = mask(sizeBits);
1495 break;
1496 case 2:
1497 if (arg1 <= arg2 && !nanop)
1498 resBits = mask(sizeBits);
1499 break;
1500 case 3:
1501 if (nanop)
1502 resBits = mask(sizeBits);
1503 break;
1504 case 4:
1505 if (arg1 != arg2 || nanop)
1506 resBits = mask(sizeBits);
1507 break;
1508 case 5:
1509 if (!(arg1 < arg2) || nanop)
1510 resBits = mask(sizeBits);
1511 break;
1512 case 6:
1513 if (!(arg1 <= arg2) || nanop)
1514 resBits = mask(sizeBits);
1515 break;
1516 case 7:
1517 if (!nanop)
1518 resBits = mask(sizeBits);
1519 break;
1520 };
1521
1522 result = insertBits(result, hiIndex, loIndex, resBits);
1523 }
1524 FpDestReg_uqw = result;
1525 '''
1526
1527 class Mcmpf2rf(MediaOp):
1528 def __init__(self, src1, src2,\
1529 size = None, destSize = None, srcSize = None, ext = None):
1530 super(Mcmpf2rf, self).__init__("InstRegIndex(0)", src1,\
1531 src2, size, destSize, srcSize, ext)
1532 op_class = 'SimdFloatCvtOp'
1533 code = '''
1534 union floatInt
1535 {
1536 float f;
1537 uint32_t i;
1538 };
1539 union doubleInt
1540 {
1541 double d;
1542 uint64_t i;
1543 };
1544
1545 assert(srcSize == destSize);
1546 assert(srcSize == 4 || srcSize == 8);
1547 int size = srcSize;
1548 int sizeBits = size * 8;
1549
1550 double arg1, arg2;
1551 uint64_t arg1Bits = bits(FpSrcReg1_uqw, sizeBits - 1, 0);
1552 uint64_t arg2Bits = bits(FpSrcReg2_uqw, sizeBits - 1, 0);
1553 if (size == 4) {
1554 floatInt fi;
1555 fi.i = arg1Bits;
1556 arg1 = fi.f;
1557 fi.i = arg2Bits;
1558 arg2 = fi.f;
1559 } else {
1560 doubleInt di;
1561 di.i = arg1Bits;
1562 arg1 = di.d;
1563 di.i = arg2Bits;
1564 arg2 = di.d;
1565 }
1566
1567 // ZF PF CF
1568 // Unordered 1 1 1
1569 // Greater than 0 0 0
1570 // Less than 0 0 1
1571 // Equal 1 0 0
1572 // OF = SF = AF = 0
1573 ccFlagBits = ccFlagBits & ~(SFBit | AFBit | ZFBit | PFBit);
1574 cfofBits = cfofBits & ~(OFBit | CFBit);
1575
1576 if (std::isnan(arg1) || std::isnan(arg2)) {
1577 ccFlagBits = ccFlagBits | (ZFBit | PFBit);
1578 cfofBits = cfofBits | CFBit;
1579 }
1580 else if(arg1 < arg2)
1581 cfofBits = cfofBits | CFBit;
1582 else if(arg1 == arg2)
1583 ccFlagBits = ccFlagBits | ZFBit;
1584 '''
1585
1586 class Emms(MediaOp):
1587 op_class = 'FloatMiscOp'
1588 def __init__(self):
1589 super(Emms, self).__init__('InstRegIndex(MISCREG_FTW)',
1590 'InstRegIndex(0)', 'InstRegIndex(0)', 2)
1591 code = 'FTW = 0xFFFF;'
1592}};
1095
1096 uint64_t sum = 0;
1097 for (int i = 0; i < items; i++) {
1098 int hiIndex = (i + 1) * srcBits - 1;
1099 int loIndex = (i + 0) * srcBits;
1100 uint64_t arg1Bits = bits(FpSrcReg1_uqw, hiIndex, loIndex);
1101 uint64_t arg2Bits = bits(FpSrcReg2_uqw, hiIndex, loIndex);
1102 int64_t resBits = arg1Bits - arg2Bits;
1103 if (resBits < 0)
1104 resBits = -resBits;
1105 sum += resBits;
1106 }
1107 FpDestReg_uqw = sum & mask(destSize * 8);
1108 '''
1109
1110 class Msrl(MediaOp):
1111 op_class = 'SimdShiftOp'
1112 code = '''
1113
1114 assert(srcSize == destSize);
1115 int size = srcSize;
1116 int sizeBits = size * 8;
1117 int items = numItems(size);
1118 uint64_t shiftAmt = op2_uqw;
1119 uint64_t result = FpDestReg_uqw;
1120
1121 for (int i = 0; i < items; i++) {
1122 int hiIndex = (i + 1) * sizeBits - 1;
1123 int loIndex = (i + 0) * sizeBits;
1124 uint64_t arg1Bits = bits(FpSrcReg1_uqw, hiIndex, loIndex);
1125 uint64_t resBits;
1126 if (shiftAmt >= sizeBits) {
1127 resBits = 0;
1128 } else {
1129 resBits = (arg1Bits >> shiftAmt) &
1130 mask(sizeBits - shiftAmt);
1131 }
1132
1133 result = insertBits(result, hiIndex, loIndex, resBits);
1134 }
1135 FpDestReg_uqw = result;
1136 '''
1137
1138 class Msra(MediaOp):
1139 op_class = 'SimdShiftOp'
1140 code = '''
1141
1142 assert(srcSize == destSize);
1143 int size = srcSize;
1144 int sizeBits = size * 8;
1145 int items = numItems(size);
1146 uint64_t shiftAmt = op2_uqw;
1147 uint64_t result = FpDestReg_uqw;
1148
1149 for (int i = 0; i < items; i++) {
1150 int hiIndex = (i + 1) * sizeBits - 1;
1151 int loIndex = (i + 0) * sizeBits;
1152 uint64_t arg1Bits = bits(FpSrcReg1_uqw, hiIndex, loIndex);
1153 uint64_t resBits;
1154 if (shiftAmt >= sizeBits) {
1155 if (bits(arg1Bits, sizeBits - 1))
1156 resBits = mask(sizeBits);
1157 else
1158 resBits = 0;
1159 } else {
1160 resBits = (arg1Bits >> shiftAmt);
1161 resBits = resBits |
1162 (0 - (resBits & (ULL(1) << (sizeBits - 1 - shiftAmt))));
1163 }
1164
1165 result = insertBits(result, hiIndex, loIndex, resBits);
1166 }
1167 FpDestReg_uqw = result;
1168 '''
1169
1170 class Msll(MediaOp):
1171 op_class = 'SimdShiftOp'
1172 code = '''
1173
1174 assert(srcSize == destSize);
1175 int size = srcSize;
1176 int sizeBits = size * 8;
1177 int items = numItems(size);
1178 uint64_t shiftAmt = op2_uqw;
1179 uint64_t result = FpDestReg_uqw;
1180
1181 for (int i = 0; i < items; i++) {
1182 int hiIndex = (i + 1) * sizeBits - 1;
1183 int loIndex = (i + 0) * sizeBits;
1184 uint64_t arg1Bits = bits(FpSrcReg1_uqw, hiIndex, loIndex);
1185 uint64_t resBits;
1186 if (shiftAmt >= sizeBits) {
1187 resBits = 0;
1188 } else {
1189 resBits = (arg1Bits << shiftAmt);
1190 }
1191
1192 result = insertBits(result, hiIndex, loIndex, resBits);
1193 }
1194 FpDestReg_uqw = result;
1195 '''
1196
1197 class Cvtf2i(MediaOp):
1198 def __init__(self, dest, src, \
1199 size = None, destSize = None, srcSize = None, ext = None):
1200 super(Cvtf2i, self).__init__(dest, src,\
1201 "InstRegIndex(0)", size, destSize, srcSize, ext)
1202 op_class = 'SimdFloatCvtOp'
1203 code = '''
1204 union floatInt
1205 {
1206 float f;
1207 uint32_t i;
1208 };
1209 union doubleInt
1210 {
1211 double d;
1212 uint64_t i;
1213 };
1214
1215 assert(destSize == 4 || destSize == 8);
1216 assert(srcSize == 4 || srcSize == 8);
1217 int srcSizeBits = srcSize * 8;
1218 int destSizeBits = destSize * 8;
1219 int items;
1220 int srcStart = 0;
1221 int destStart = 0;
1222 if (srcSize == 2 * destSize) {
1223 items = numItems(srcSize);
1224 if (ext & 0x2)
1225 destStart = destSizeBits * items;
1226 } else if (destSize == 2 * srcSize) {
1227 items = numItems(destSize);
1228 if (ext & 0x2)
1229 srcStart = srcSizeBits * items;
1230 } else {
1231 items = numItems(destSize);
1232 }
1233 uint64_t result = FpDestReg_uqw;
1234
1235 for (int i = 0; i < items; i++) {
1236 int srcHiIndex = srcStart + (i + 1) * srcSizeBits - 1;
1237 int srcLoIndex = srcStart + (i + 0) * srcSizeBits;
1238 uint64_t argBits = bits(FpSrcReg1_uqw, srcHiIndex, srcLoIndex);
1239 double arg;
1240
1241 if (srcSize == 4) {
1242 floatInt fi;
1243 fi.i = argBits;
1244 arg = fi.f;
1245 } else {
1246 doubleInt di;
1247 di.i = argBits;
1248 arg = di.d;
1249 }
1250
1251 if (ext & 0x4) {
1252 if (arg >= 0)
1253 arg += 0.5;
1254 else
1255 arg -= 0.5;
1256 }
1257
1258 if (destSize == 4) {
1259 int32_t i_arg = (int32_t)arg;
1260 argBits = *((uint32_t*)&i_arg);
1261 } else {
1262 int64_t i_arg = (int64_t)arg;
1263 argBits = *((uint64_t*)&i_arg);
1264 }
1265 int destHiIndex = destStart + (i + 1) * destSizeBits - 1;
1266 int destLoIndex = destStart + (i + 0) * destSizeBits;
1267 result = insertBits(result, destHiIndex, destLoIndex, argBits);
1268 }
1269 FpDestReg_uqw = result;
1270 '''
1271
1272 class Cvti2f(MediaOp):
1273 def __init__(self, dest, src, \
1274 size = None, destSize = None, srcSize = None, ext = None):
1275 super(Cvti2f, self).__init__(dest, src,\
1276 "InstRegIndex(0)", size, destSize, srcSize, ext)
1277 op_class = 'SimdFloatCvtOp'
1278 code = '''
1279 union floatInt
1280 {
1281 float f;
1282 uint32_t i;
1283 };
1284 union doubleInt
1285 {
1286 double d;
1287 uint64_t i;
1288 };
1289
1290 assert(destSize == 4 || destSize == 8);
1291 assert(srcSize == 4 || srcSize == 8);
1292 int srcSizeBits = srcSize * 8;
1293 int destSizeBits = destSize * 8;
1294 int items;
1295 int srcStart = 0;
1296 int destStart = 0;
1297 if (srcSize == 2 * destSize) {
1298 items = numItems(srcSize);
1299 if (ext & 0x2)
1300 destStart = destSizeBits * items;
1301 } else if (destSize == 2 * srcSize) {
1302 items = numItems(destSize);
1303 if (ext & 0x2)
1304 srcStart = srcSizeBits * items;
1305 } else {
1306 items = numItems(destSize);
1307 }
1308 uint64_t result = FpDestReg_uqw;
1309
1310 for (int i = 0; i < items; i++) {
1311 int srcHiIndex = srcStart + (i + 1) * srcSizeBits - 1;
1312 int srcLoIndex = srcStart + (i + 0) * srcSizeBits;
1313 uint64_t argBits = bits(FpSrcReg1_uqw, srcHiIndex, srcLoIndex);
1314
1315 int64_t sArg = argBits |
1316 (0 - (argBits & (ULL(1) << (srcSizeBits - 1))));
1317 double arg = sArg;
1318
1319 if (destSize == 4) {
1320 floatInt fi;
1321 fi.f = arg;
1322 argBits = fi.i;
1323 } else {
1324 doubleInt di;
1325 di.d = arg;
1326 argBits = di.i;
1327 }
1328 int destHiIndex = destStart + (i + 1) * destSizeBits - 1;
1329 int destLoIndex = destStart + (i + 0) * destSizeBits;
1330 result = insertBits(result, destHiIndex, destLoIndex, argBits);
1331 }
1332 FpDestReg_uqw = result;
1333 '''
1334
1335 class Cvtf2f(MediaOp):
1336 def __init__(self, dest, src, \
1337 size = None, destSize = None, srcSize = None, ext = None):
1338 super(Cvtf2f, self).__init__(dest, src,\
1339 "InstRegIndex(0)", size, destSize, srcSize, ext)
1340 op_class = 'SimdFloatCvtOp'
1341 code = '''
1342 union floatInt
1343 {
1344 float f;
1345 uint32_t i;
1346 };
1347 union doubleInt
1348 {
1349 double d;
1350 uint64_t i;
1351 };
1352
1353 assert(destSize == 4 || destSize == 8);
1354 assert(srcSize == 4 || srcSize == 8);
1355 int srcSizeBits = srcSize * 8;
1356 int destSizeBits = destSize * 8;
1357 int items;
1358 int srcStart = 0;
1359 int destStart = 0;
1360 if (srcSize == 2 * destSize) {
1361 items = numItems(srcSize);
1362 if (ext & 0x2)
1363 destStart = destSizeBits * items;
1364 } else if (destSize == 2 * srcSize) {
1365 items = numItems(destSize);
1366 if (ext & 0x2)
1367 srcStart = srcSizeBits * items;
1368 } else {
1369 items = numItems(destSize);
1370 }
1371 uint64_t result = FpDestReg_uqw;
1372
1373 for (int i = 0; i < items; i++) {
1374 int srcHiIndex = srcStart + (i + 1) * srcSizeBits - 1;
1375 int srcLoIndex = srcStart + (i + 0) * srcSizeBits;
1376 uint64_t argBits = bits(FpSrcReg1_uqw, srcHiIndex, srcLoIndex);
1377 double arg;
1378
1379 if (srcSize == 4) {
1380 floatInt fi;
1381 fi.i = argBits;
1382 arg = fi.f;
1383 } else {
1384 doubleInt di;
1385 di.i = argBits;
1386 arg = di.d;
1387 }
1388 if (destSize == 4) {
1389 floatInt fi;
1390 fi.f = arg;
1391 argBits = fi.i;
1392 } else {
1393 doubleInt di;
1394 di.d = arg;
1395 argBits = di.i;
1396 }
1397 int destHiIndex = destStart + (i + 1) * destSizeBits - 1;
1398 int destLoIndex = destStart + (i + 0) * destSizeBits;
1399 result = insertBits(result, destHiIndex, destLoIndex, argBits);
1400 }
1401 FpDestReg_uqw = result;
1402 '''
1403
1404 class Mcmpi2r(MediaOp):
1405 op_class = 'SimdCvtOp'
1406 code = '''
1407 union floatInt
1408 {
1409 float f;
1410 uint32_t i;
1411 };
1412 union doubleInt
1413 {
1414 double d;
1415 uint64_t i;
1416 };
1417
1418 assert(srcSize == destSize);
1419 int size = srcSize;
1420 int sizeBits = size * 8;
1421 int items = numItems(size);
1422 uint64_t result = FpDestReg_uqw;
1423
1424 for (int i = 0; i < items; i++) {
1425 int hiIndex = (i + 1) * sizeBits - 1;
1426 int loIndex = (i + 0) * sizeBits;
1427 uint64_t arg1Bits = bits(FpSrcReg1_uqw, hiIndex, loIndex);
1428 int64_t arg1 = arg1Bits |
1429 (0 - (arg1Bits & (ULL(1) << (sizeBits - 1))));
1430 uint64_t arg2Bits = bits(FpSrcReg2_uqw, hiIndex, loIndex);
1431 int64_t arg2 = arg2Bits |
1432 (0 - (arg2Bits & (ULL(1) << (sizeBits - 1))));
1433
1434 uint64_t resBits = 0;
1435 if (((ext & 0x2) == 0 && arg1 == arg2) ||
1436 ((ext & 0x2) == 0x2 && arg1 > arg2))
1437 resBits = mask(sizeBits);
1438
1439 result = insertBits(result, hiIndex, loIndex, resBits);
1440 }
1441 FpDestReg_uqw = result;
1442 '''
1443
1444 class Mcmpf2r(MediaOp):
1445 op_class = 'SimdFloatCvtOp'
1446 code = '''
1447 union floatInt
1448 {
1449 float f;
1450 uint32_t i;
1451 };
1452 union doubleInt
1453 {
1454 double d;
1455 uint64_t i;
1456 };
1457
1458 assert(srcSize == destSize);
1459 int size = srcSize;
1460 int sizeBits = size * 8;
1461 int items = numItems(size);
1462 uint64_t result = FpDestReg_uqw;
1463
1464 for (int i = 0; i < items; i++) {
1465 int hiIndex = (i + 1) * sizeBits - 1;
1466 int loIndex = (i + 0) * sizeBits;
1467 uint64_t arg1Bits = bits(FpSrcReg1_uqw, hiIndex, loIndex);
1468 uint64_t arg2Bits = bits(FpSrcReg2_uqw, hiIndex, loIndex);
1469 double arg1, arg2;
1470
1471 if (size == 4) {
1472 floatInt fi;
1473 fi.i = arg1Bits;
1474 arg1 = fi.f;
1475 fi.i = arg2Bits;
1476 arg2 = fi.f;
1477 } else {
1478 doubleInt di;
1479 di.i = arg1Bits;
1480 arg1 = di.d;
1481 di.i = arg2Bits;
1482 arg2 = di.d;
1483 }
1484
1485 uint64_t resBits = 0;
1486 bool nanop = std::isnan(arg1) || std::isnan(arg2);
1487 switch (ext & mask(3)) {
1488 case 0:
1489 if (arg1 == arg2 && !nanop)
1490 resBits = mask(sizeBits);
1491 break;
1492 case 1:
1493 if (arg1 < arg2 && !nanop)
1494 resBits = mask(sizeBits);
1495 break;
1496 case 2:
1497 if (arg1 <= arg2 && !nanop)
1498 resBits = mask(sizeBits);
1499 break;
1500 case 3:
1501 if (nanop)
1502 resBits = mask(sizeBits);
1503 break;
1504 case 4:
1505 if (arg1 != arg2 || nanop)
1506 resBits = mask(sizeBits);
1507 break;
1508 case 5:
1509 if (!(arg1 < arg2) || nanop)
1510 resBits = mask(sizeBits);
1511 break;
1512 case 6:
1513 if (!(arg1 <= arg2) || nanop)
1514 resBits = mask(sizeBits);
1515 break;
1516 case 7:
1517 if (!nanop)
1518 resBits = mask(sizeBits);
1519 break;
1520 };
1521
1522 result = insertBits(result, hiIndex, loIndex, resBits);
1523 }
1524 FpDestReg_uqw = result;
1525 '''
1526
1527 class Mcmpf2rf(MediaOp):
1528 def __init__(self, src1, src2,\
1529 size = None, destSize = None, srcSize = None, ext = None):
1530 super(Mcmpf2rf, self).__init__("InstRegIndex(0)", src1,\
1531 src2, size, destSize, srcSize, ext)
1532 op_class = 'SimdFloatCvtOp'
1533 code = '''
1534 union floatInt
1535 {
1536 float f;
1537 uint32_t i;
1538 };
1539 union doubleInt
1540 {
1541 double d;
1542 uint64_t i;
1543 };
1544
1545 assert(srcSize == destSize);
1546 assert(srcSize == 4 || srcSize == 8);
1547 int size = srcSize;
1548 int sizeBits = size * 8;
1549
1550 double arg1, arg2;
1551 uint64_t arg1Bits = bits(FpSrcReg1_uqw, sizeBits - 1, 0);
1552 uint64_t arg2Bits = bits(FpSrcReg2_uqw, sizeBits - 1, 0);
1553 if (size == 4) {
1554 floatInt fi;
1555 fi.i = arg1Bits;
1556 arg1 = fi.f;
1557 fi.i = arg2Bits;
1558 arg2 = fi.f;
1559 } else {
1560 doubleInt di;
1561 di.i = arg1Bits;
1562 arg1 = di.d;
1563 di.i = arg2Bits;
1564 arg2 = di.d;
1565 }
1566
1567 // ZF PF CF
1568 // Unordered 1 1 1
1569 // Greater than 0 0 0
1570 // Less than 0 0 1
1571 // Equal 1 0 0
1572 // OF = SF = AF = 0
1573 ccFlagBits = ccFlagBits & ~(SFBit | AFBit | ZFBit | PFBit);
1574 cfofBits = cfofBits & ~(OFBit | CFBit);
1575
1576 if (std::isnan(arg1) || std::isnan(arg2)) {
1577 ccFlagBits = ccFlagBits | (ZFBit | PFBit);
1578 cfofBits = cfofBits | CFBit;
1579 }
1580 else if(arg1 < arg2)
1581 cfofBits = cfofBits | CFBit;
1582 else if(arg1 == arg2)
1583 ccFlagBits = ccFlagBits | ZFBit;
1584 '''
1585
1586 class Emms(MediaOp):
1587 op_class = 'FloatMiscOp'
1588 def __init__(self):
1589 super(Emms, self).__init__('InstRegIndex(MISCREG_FTW)',
1590 'InstRegIndex(0)', 'InstRegIndex(0)', 2)
1591 code = 'FTW = 0xFFFF;'
1592}};