mediaop.isa (11160:10f28b61fcb1) mediaop.isa (11320:42ecb523c64a)
1// Copyright (c) 2009 The Regents of The University of Michigan
2// Copyright (c) 2015 Advanced Micro Devices, Inc.
3//
4// All rights reserved.
5//
6// Redistribution and use in source and binary forms, with or without
7// modification, are permitted provided that the following conditions are
8// met: redistributions of source code must retain the above copyright
9// notice, this list of conditions and the following disclaimer;
10// redistributions in binary form must reproduce the above copyright
11// notice, this list of conditions and the following disclaimer in the
12// documentation and/or other materials provided with the distribution;
13// neither the name of the copyright holders nor the names of its
14// contributors may be used to endorse or promote products derived from
15// this software without specific prior written permission.
16//
17// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
18// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
19// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
20// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
21// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
22// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
23// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
24// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
25// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
26// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
27// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
28//
29// Authors: Gabe Black
30
31def template MediaOpExecute {{
32 Fault %(class_name)s::execute(CPU_EXEC_CONTEXT *xc,
33 Trace::InstRecord *traceData) const
34 {
35 Fault fault = NoFault;
36
37 %(op_decl)s;
38 %(op_rd)s;
39
40 %(code)s;
41
42 //Write the resulting state to the execution context
43 if(fault == NoFault)
44 {
45 %(op_wb)s;
46 }
47 return fault;
48 }
49}};
50
51def template MediaOpRegDeclare {{
52 class %(class_name)s : public %(base_class)s
53 {
54 public:
55 %(class_name)s(ExtMachInst _machInst,
56 const char * instMnem, uint64_t setFlags,
57 InstRegIndex _src1, InstRegIndex _src2, InstRegIndex _dest,
58 uint8_t _srcSize, uint8_t _destSize, uint16_t _ext);
59
60 %(BasicExecDeclare)s
61 };
62}};
63
64def template MediaOpImmDeclare {{
65
66 class %(class_name)s : public %(base_class)s
67 {
68 public:
69 %(class_name)s(ExtMachInst _machInst,
70 const char * instMnem, uint64_t setFlags,
71 InstRegIndex _src1, uint16_t _imm8, InstRegIndex _dest,
72 uint8_t _srcSize, uint8_t _destSize, uint16_t _ext);
73
74 %(BasicExecDeclare)s
75 };
76}};
77
78def template MediaOpRegConstructor {{
79 %(class_name)s::%(class_name)s(
80 ExtMachInst machInst, const char * instMnem, uint64_t setFlags,
81 InstRegIndex _src1, InstRegIndex _src2, InstRegIndex _dest,
82 uint8_t _srcSize, uint8_t _destSize, uint16_t _ext) :
83 %(base_class)s(machInst, "%(mnemonic)s", instMnem, setFlags,
84 _src1, _src2, _dest, _srcSize, _destSize, _ext,
85 %(op_class)s)
86 {
87 %(constructor)s;
88 }
89}};
90
91def template MediaOpImmConstructor {{
92 %(class_name)s::%(class_name)s(
93 ExtMachInst machInst, const char * instMnem, uint64_t setFlags,
94 InstRegIndex _src1, uint16_t _imm8, InstRegIndex _dest,
95 uint8_t _srcSize, uint8_t _destSize, uint16_t _ext) :
96 %(base_class)s(machInst, "%(mnemonic)s", instMnem, setFlags,
97 _src1, _imm8, _dest, _srcSize, _destSize, _ext,
98 %(op_class)s)
99 {
100 %(constructor)s;
101 }
102}};
103
104let {{
105 # Make these empty strings so that concatenating onto
106 # them will always work.
107 header_output = ""
108 decoder_output = ""
109 exec_output = ""
110
111 immTemplates = (
112 MediaOpImmDeclare,
113 MediaOpImmConstructor,
114 MediaOpExecute)
115
116 regTemplates = (
117 MediaOpRegDeclare,
118 MediaOpRegConstructor,
119 MediaOpExecute)
120
121 class MediaOpMeta(type):
122 def buildCppClasses(self, name, Name, suffix, code):
123
124 # Globals to stick the output in
125 global header_output
126 global decoder_output
127 global exec_output
128
129 # If op2 is used anywhere, make register and immediate versions
130 # of this code.
131 matcher = re.compile(r"(?<!\w)(?P<prefix>s?)op2(?P<typeQual>_[^\W_]+)?")
132 match = matcher.search(code)
133 if match:
134 typeQual = ""
135 if match.group("typeQual"):
136 typeQual = match.group("typeQual")
137 src2_name = "%sFpSrcReg2%s" % (match.group("prefix"), typeQual)
138 self.buildCppClasses(name, Name, suffix,
139 matcher.sub(src2_name, code))
140 self.buildCppClasses(name + "i", Name, suffix + "Imm",
141 matcher.sub("imm8", code))
142 return
143
144 base = "X86ISA::MediaOp"
145
146 # If imm8 shows up in the code, use the immediate templates, if
147 # not, hopefully the register ones will be correct.
148 matcher = re.compile("(?<!\w)imm8(?!\w)")
149 if matcher.search(code):
150 base += "Imm"
151 templates = immTemplates
152 else:
153 base += "Reg"
154 templates = regTemplates
155
156 # Get everything ready for the substitution
157 iop = InstObjParams(name, Name + suffix, base, {"code" : code})
158
159 # Generate the actual code (finally!)
160 header_output += templates[0].subst(iop)
161 decoder_output += templates[1].subst(iop)
162 exec_output += templates[2].subst(iop)
163
164
165 def __new__(mcls, Name, bases, dict):
166 abstract = False
167 name = Name.lower()
168 if "abstract" in dict:
169 abstract = dict['abstract']
170 del dict['abstract']
171
172 cls = super(MediaOpMeta, mcls).__new__(mcls, Name, bases, dict)
173 if not abstract:
174 cls.className = Name
175 cls.base_mnemonic = name
176 code = cls.code
177
178 # Set up the C++ classes
179 mcls.buildCppClasses(cls, name, Name, "", code)
180
181 # Hook into the microassembler dict
182 global microopClasses
183 microopClasses[name] = cls
184
185 # If op2 is used anywhere, make register and immediate versions
186 # of this code.
187 matcher = re.compile(r"op2(?P<typeQual>_[^\W_]+)?")
188 if matcher.search(code):
189 microopClasses[name + 'i'] = cls
190 return cls
191
192
193 class MediaOp(X86Microop):
194 __metaclass__ = MediaOpMeta
195 # This class itself doesn't act as a microop
196 abstract = True
197
198 def __init__(self, dest, src1, op2,
199 size = None, destSize = None, srcSize = None, ext = None):
200 self.dest = dest
201 self.src1 = src1
202 self.op2 = op2
203 if size is not None:
204 self.srcSize = size
205 self.destSize = size
206 if srcSize is not None:
207 self.srcSize = srcSize
208 if destSize is not None:
209 self.destSize = destSize
210 if self.srcSize is None:
211 raise Exception, "Source size not set."
212 if self.destSize is None:
213 raise Exception, "Dest size not set."
214 if ext is None:
215 self.ext = 0
216 else:
1// Copyright (c) 2009 The Regents of The University of Michigan
2// Copyright (c) 2015 Advanced Micro Devices, Inc.
3//
4// All rights reserved.
5//
6// Redistribution and use in source and binary forms, with or without
7// modification, are permitted provided that the following conditions are
8// met: redistributions of source code must retain the above copyright
9// notice, this list of conditions and the following disclaimer;
10// redistributions in binary form must reproduce the above copyright
11// notice, this list of conditions and the following disclaimer in the
12// documentation and/or other materials provided with the distribution;
13// neither the name of the copyright holders nor the names of its
14// contributors may be used to endorse or promote products derived from
15// this software without specific prior written permission.
16//
17// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
18// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
19// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
20// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
21// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
22// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
23// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
24// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
25// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
26// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
27// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
28//
29// Authors: Gabe Black
30
31def template MediaOpExecute {{
32 Fault %(class_name)s::execute(CPU_EXEC_CONTEXT *xc,
33 Trace::InstRecord *traceData) const
34 {
35 Fault fault = NoFault;
36
37 %(op_decl)s;
38 %(op_rd)s;
39
40 %(code)s;
41
42 //Write the resulting state to the execution context
43 if(fault == NoFault)
44 {
45 %(op_wb)s;
46 }
47 return fault;
48 }
49}};
50
51def template MediaOpRegDeclare {{
52 class %(class_name)s : public %(base_class)s
53 {
54 public:
55 %(class_name)s(ExtMachInst _machInst,
56 const char * instMnem, uint64_t setFlags,
57 InstRegIndex _src1, InstRegIndex _src2, InstRegIndex _dest,
58 uint8_t _srcSize, uint8_t _destSize, uint16_t _ext);
59
60 %(BasicExecDeclare)s
61 };
62}};
63
64def template MediaOpImmDeclare {{
65
66 class %(class_name)s : public %(base_class)s
67 {
68 public:
69 %(class_name)s(ExtMachInst _machInst,
70 const char * instMnem, uint64_t setFlags,
71 InstRegIndex _src1, uint16_t _imm8, InstRegIndex _dest,
72 uint8_t _srcSize, uint8_t _destSize, uint16_t _ext);
73
74 %(BasicExecDeclare)s
75 };
76}};
77
78def template MediaOpRegConstructor {{
79 %(class_name)s::%(class_name)s(
80 ExtMachInst machInst, const char * instMnem, uint64_t setFlags,
81 InstRegIndex _src1, InstRegIndex _src2, InstRegIndex _dest,
82 uint8_t _srcSize, uint8_t _destSize, uint16_t _ext) :
83 %(base_class)s(machInst, "%(mnemonic)s", instMnem, setFlags,
84 _src1, _src2, _dest, _srcSize, _destSize, _ext,
85 %(op_class)s)
86 {
87 %(constructor)s;
88 }
89}};
90
91def template MediaOpImmConstructor {{
92 %(class_name)s::%(class_name)s(
93 ExtMachInst machInst, const char * instMnem, uint64_t setFlags,
94 InstRegIndex _src1, uint16_t _imm8, InstRegIndex _dest,
95 uint8_t _srcSize, uint8_t _destSize, uint16_t _ext) :
96 %(base_class)s(machInst, "%(mnemonic)s", instMnem, setFlags,
97 _src1, _imm8, _dest, _srcSize, _destSize, _ext,
98 %(op_class)s)
99 {
100 %(constructor)s;
101 }
102}};
103
104let {{
105 # Make these empty strings so that concatenating onto
106 # them will always work.
107 header_output = ""
108 decoder_output = ""
109 exec_output = ""
110
111 immTemplates = (
112 MediaOpImmDeclare,
113 MediaOpImmConstructor,
114 MediaOpExecute)
115
116 regTemplates = (
117 MediaOpRegDeclare,
118 MediaOpRegConstructor,
119 MediaOpExecute)
120
121 class MediaOpMeta(type):
122 def buildCppClasses(self, name, Name, suffix, code):
123
124 # Globals to stick the output in
125 global header_output
126 global decoder_output
127 global exec_output
128
129 # If op2 is used anywhere, make register and immediate versions
130 # of this code.
131 matcher = re.compile(r"(?<!\w)(?P<prefix>s?)op2(?P<typeQual>_[^\W_]+)?")
132 match = matcher.search(code)
133 if match:
134 typeQual = ""
135 if match.group("typeQual"):
136 typeQual = match.group("typeQual")
137 src2_name = "%sFpSrcReg2%s" % (match.group("prefix"), typeQual)
138 self.buildCppClasses(name, Name, suffix,
139 matcher.sub(src2_name, code))
140 self.buildCppClasses(name + "i", Name, suffix + "Imm",
141 matcher.sub("imm8", code))
142 return
143
144 base = "X86ISA::MediaOp"
145
146 # If imm8 shows up in the code, use the immediate templates, if
147 # not, hopefully the register ones will be correct.
148 matcher = re.compile("(?<!\w)imm8(?!\w)")
149 if matcher.search(code):
150 base += "Imm"
151 templates = immTemplates
152 else:
153 base += "Reg"
154 templates = regTemplates
155
156 # Get everything ready for the substitution
157 iop = InstObjParams(name, Name + suffix, base, {"code" : code})
158
159 # Generate the actual code (finally!)
160 header_output += templates[0].subst(iop)
161 decoder_output += templates[1].subst(iop)
162 exec_output += templates[2].subst(iop)
163
164
165 def __new__(mcls, Name, bases, dict):
166 abstract = False
167 name = Name.lower()
168 if "abstract" in dict:
169 abstract = dict['abstract']
170 del dict['abstract']
171
172 cls = super(MediaOpMeta, mcls).__new__(mcls, Name, bases, dict)
173 if not abstract:
174 cls.className = Name
175 cls.base_mnemonic = name
176 code = cls.code
177
178 # Set up the C++ classes
179 mcls.buildCppClasses(cls, name, Name, "", code)
180
181 # Hook into the microassembler dict
182 global microopClasses
183 microopClasses[name] = cls
184
185 # If op2 is used anywhere, make register and immediate versions
186 # of this code.
187 matcher = re.compile(r"op2(?P<typeQual>_[^\W_]+)?")
188 if matcher.search(code):
189 microopClasses[name + 'i'] = cls
190 return cls
191
192
193 class MediaOp(X86Microop):
194 __metaclass__ = MediaOpMeta
195 # This class itself doesn't act as a microop
196 abstract = True
197
198 def __init__(self, dest, src1, op2,
199 size = None, destSize = None, srcSize = None, ext = None):
200 self.dest = dest
201 self.src1 = src1
202 self.op2 = op2
203 if size is not None:
204 self.srcSize = size
205 self.destSize = size
206 if srcSize is not None:
207 self.srcSize = srcSize
208 if destSize is not None:
209 self.destSize = destSize
210 if self.srcSize is None:
211 raise Exception, "Source size not set."
212 if self.destSize is None:
213 raise Exception, "Dest size not set."
214 if ext is None:
215 self.ext = 0
216 else:
217 self.ext = ext
217 self.ext = ext
218
219 def getAllocator(self, microFlags):
220 className = self.className
221 if self.mnemonic == self.base_mnemonic + 'i':
222 className += "Imm"
223 allocator = '''new %(class_name)s(machInst, macrocodeBlock,
224 %(flags)s, %(src1)s, %(op2)s, %(dest)s,
225 %(srcSize)s, %(destSize)s, %(ext)s)''' % {
226 "class_name" : className,
227 "flags" : self.microFlagsText(microFlags),
228 "src1" : self.src1, "op2" : self.op2,
229 "dest" : self.dest,
230 "srcSize" : self.srcSize,
231 "destSize" : self.destSize,
232 "ext" : self.ext}
233 return allocator
234
235 class Mov2int(MediaOp):
236 def __init__(self, dest, src1, src2 = 0, \
237 size = None, destSize = None, srcSize = None, ext = None):
238 super(Mov2int, self).__init__(dest, src1,\
239 src2, size, destSize, srcSize, ext)
240 code = '''
241 int items = sizeof(FloatRegBits) / srcSize;
242 int offset = imm8;
243 if (bits(src1, 0) && (ext & 0x1))
244 offset -= items;
245 if (offset >= 0 && offset < items) {
246 uint64_t fpSrcReg1 =
247 bits(FpSrcReg1_uqw,
248 (offset + 1) * srcSize * 8 - 1,
249 (offset + 0) * srcSize * 8);
250 DestReg = merge(0, fpSrcReg1, destSize);
251 } else {
252 DestReg = DestReg;
253 }
254 '''
255
256 class Mov2fp(MediaOp):
257 def __init__(self, dest, src1, src2 = 0, \
258 size = None, destSize = None, srcSize = None, ext = None):
259 super(Mov2fp, self).__init__(dest, src1,\
260 src2, size, destSize, srcSize, ext)
261 code = '''
262 int items = sizeof(FloatRegBits) / destSize;
263 int offset = imm8;
264 if (bits(dest, 0) && (ext & 0x1))
265 offset -= items;
266 if (offset >= 0 && offset < items) {
267 uint64_t srcReg1 = pick(SrcReg1, 0, srcSize);
268 FpDestReg_uqw =
269 insertBits(FpDestReg_uqw,
270 (offset + 1) * destSize * 8 - 1,
271 (offset + 0) * destSize * 8, srcReg1);
272 } else {
273 FpDestReg_uqw = FpDestReg_uqw;
274 }
275 '''
276
277 class Movsign(MediaOp):
278 def __init__(self, dest, src, \
279 size = None, destSize = None, srcSize = None, ext = None):
280 super(Movsign, self).__init__(dest, src,\
281 "InstRegIndex(0)", size, destSize, srcSize, ext)
282 code = '''
283 int items = sizeof(FloatRegBits) / srcSize;
284 uint64_t result = 0;
285 int offset = (ext & 0x1) ? items : 0;
286 for (int i = 0; i < items; i++) {
287 uint64_t picked =
288 bits(FpSrcReg1_uqw, (i + 1) * 8 * srcSize - 1);
289 result = insertBits(result, i + offset, i + offset, picked);
290 }
291 DestReg = DestReg | result;
292 '''
293
294 class Maskmov(MediaOp):
295 code = '''
296 assert(srcSize == destSize);
297 int size = srcSize;
298 int sizeBits = size * 8;
299 int items = numItems(size);
300 uint64_t result = FpDestReg_uqw;
301
302 for (int i = 0; i < items; i++) {
303 int hiIndex = (i + 1) * sizeBits - 1;
304 int loIndex = (i + 0) * sizeBits;
305 uint64_t arg1Bits = bits(FpSrcReg1_uqw, hiIndex, loIndex);
306 if (bits(FpSrcReg2_uqw, hiIndex))
307 result = insertBits(result, hiIndex, loIndex, arg1Bits);
308 }
309 FpDestReg_uqw = result;
310 '''
311
312 class shuffle(MediaOp):
313 code = '''
314 assert(srcSize == destSize);
315 int size = srcSize;
316 int sizeBits = size * 8;
317 int items = sizeof(FloatRegBits) / size;
318 int options;
319 int optionBits;
320 if (size == 8) {
321 options = 2;
322 optionBits = 1;
323 } else {
324 options = 4;
325 optionBits = 2;
326 }
327
328 uint64_t result = 0;
329 uint8_t sel = ext;
330
331 for (int i = 0; i < items; i++) {
332 uint64_t resBits;
333 uint8_t lsel = sel & mask(optionBits);
334 if (lsel * size >= sizeof(FloatRegBits)) {
335 lsel -= options / 2;
336 resBits = bits(FpSrcReg2_uqw,
337 (lsel + 1) * sizeBits - 1,
338 (lsel + 0) * sizeBits);
339 } else {
340 resBits = bits(FpSrcReg1_uqw,
341 (lsel + 1) * sizeBits - 1,
342 (lsel + 0) * sizeBits);
343 }
344
345 sel >>= optionBits;
346
347 int hiIndex = (i + 1) * sizeBits - 1;
348 int loIndex = (i + 0) * sizeBits;
349 result = insertBits(result, hiIndex, loIndex, resBits);
350 }
351 FpDestReg_uqw = result;
352 '''
353
354 class Unpack(MediaOp):
355 code = '''
356 assert(srcSize == destSize);
357 int size = destSize;
358 int items = (sizeof(FloatRegBits) / size) / 2;
359 int offset = ext ? items : 0;
360 uint64_t result = 0;
361 for (int i = 0; i < items; i++) {
362 uint64_t pickedLow =
363 bits(FpSrcReg1_uqw, (i + offset + 1) * 8 * size - 1,
364 (i + offset) * 8 * size);
365 result = insertBits(result,
366 (2 * i + 1) * 8 * size - 1,
367 (2 * i + 0) * 8 * size,
368 pickedLow);
369 uint64_t pickedHigh =
370 bits(FpSrcReg2_uqw, (i + offset + 1) * 8 * size - 1,
371 (i + offset) * 8 * size);
372 result = insertBits(result,
373 (2 * i + 2) * 8 * size - 1,
374 (2 * i + 1) * 8 * size,
375 pickedHigh);
376 }
377 FpDestReg_uqw = result;
378 '''
379
380 class Pack(MediaOp):
381 code = '''
382 assert(srcSize == destSize * 2);
383 int items = (sizeof(FloatRegBits) / destSize);
384 int destBits = destSize * 8;
385 int srcBits = srcSize * 8;
386 uint64_t result = 0;
387 int i;
388 for (i = 0; i < items / 2; i++) {
389 uint64_t picked =
390 bits(FpSrcReg1_uqw, (i + 1) * srcBits - 1,
391 (i + 0) * srcBits);
392 unsigned signBit = bits(picked, srcBits - 1);
393 uint64_t overflow = bits(picked, srcBits - 1, destBits - 1);
394
395 // Handle saturation.
396 if (signBit) {
397 if (overflow != mask(destBits - srcBits + 1)) {
398 if (signedOp())
399 picked = (ULL(1) << (destBits - 1));
400 else
401 picked = 0;
402 }
403 } else {
404 if (overflow != 0) {
405 if (signedOp())
406 picked = mask(destBits - 1);
407 else
408 picked = mask(destBits);
409 }
410 }
411 result = insertBits(result,
412 (i + 1) * destBits - 1,
413 (i + 0) * destBits,
414 picked);
415 }
416 for (;i < items; i++) {
417 uint64_t picked =
418 bits(FpSrcReg2_uqw, (i - items + 1) * srcBits - 1,
419 (i - items + 0) * srcBits);
420 unsigned signBit = bits(picked, srcBits - 1);
421 uint64_t overflow = bits(picked, srcBits - 1, destBits - 1);
422
423 // Handle saturation.
424 if (signBit) {
425 if (overflow != mask(destBits - srcBits + 1)) {
426 if (signedOp())
427 picked = (ULL(1) << (destBits - 1));
428 else
429 picked = 0;
430 }
431 } else {
432 if (overflow != 0) {
433 if (signedOp())
434 picked = mask(destBits - 1);
435 else
436 picked = mask(destBits);
437 }
438 }
439 result = insertBits(result,
440 (i + 1) * destBits - 1,
441 (i + 0) * destBits,
442 picked);
443 }
444 FpDestReg_uqw = result;
445 '''
446
447 class Mxor(MediaOp):
448 def __init__(self, dest, src1, src2):
449 super(Mxor, self).__init__(dest, src1, src2, 1)
450 code = '''
451 FpDestReg_uqw = FpSrcReg1_uqw ^ FpSrcReg2_uqw;
452 '''
453
454 class Mor(MediaOp):
455 def __init__(self, dest, src1, src2):
456 super(Mor, self).__init__(dest, src1, src2, 1)
457 code = '''
458 FpDestReg_uqw = FpSrcReg1_uqw | FpSrcReg2_uqw;
459 '''
460
461 class Mand(MediaOp):
462 def __init__(self, dest, src1, src2):
463 super(Mand, self).__init__(dest, src1, src2, 1)
464 code = '''
465 FpDestReg_uqw = FpSrcReg1_uqw & FpSrcReg2_uqw;
466 '''
467
468 class Mandn(MediaOp):
469 def __init__(self, dest, src1, src2):
470 super(Mandn, self).__init__(dest, src1, src2, 1)
471 code = '''
472 FpDestReg_uqw = ~FpSrcReg1_uqw & FpSrcReg2_uqw;
473 '''
474
475 class Mminf(MediaOp):
476 code = '''
477 union floatInt
478 {
479 float f;
480 uint32_t i;
481 };
482 union doubleInt
483 {
484 double d;
485 uint64_t i;
486 };
487
488 assert(srcSize == destSize);
489 int size = srcSize;
490 int sizeBits = size * 8;
491 assert(srcSize == 4 || srcSize == 8);
492 int items = numItems(size);
493 uint64_t result = FpDestReg_uqw;
494
495 for (int i = 0; i < items; i++) {
496 double arg1, arg2;
497 int hiIndex = (i + 1) * sizeBits - 1;
498 int loIndex = (i + 0) * sizeBits;
499 uint64_t arg1Bits = bits(FpSrcReg1_uqw, hiIndex, loIndex);
500 uint64_t arg2Bits = bits(FpSrcReg2_uqw, hiIndex, loIndex);
501
502 if (size == 4) {
503 floatInt fi;
504 fi.i = arg1Bits;
505 arg1 = fi.f;
506 fi.i = arg2Bits;
507 arg2 = fi.f;
508 } else {
509 doubleInt di;
510 di.i = arg1Bits;
511 arg1 = di.d;
512 di.i = arg2Bits;
513 arg2 = di.d;
514 }
515
516 if (arg1 < arg2) {
517 result = insertBits(result, hiIndex, loIndex, arg1Bits);
518 } else {
519 result = insertBits(result, hiIndex, loIndex, arg2Bits);
520 }
521 }
522 FpDestReg_uqw = result;
523 '''
524
525 class Mmaxf(MediaOp):
526 code = '''
527 union floatInt
528 {
529 float f;
530 uint32_t i;
531 };
532 union doubleInt
533 {
534 double d;
535 uint64_t i;
536 };
537
538 assert(srcSize == destSize);
539 int size = srcSize;
540 int sizeBits = size * 8;
541 assert(srcSize == 4 || srcSize == 8);
542 int items = numItems(size);
543 uint64_t result = FpDestReg_uqw;
544
545 for (int i = 0; i < items; i++) {
546 double arg1, arg2;
547 int hiIndex = (i + 1) * sizeBits - 1;
548 int loIndex = (i + 0) * sizeBits;
549 uint64_t arg1Bits = bits(FpSrcReg1_uqw, hiIndex, loIndex);
550 uint64_t arg2Bits = bits(FpSrcReg2_uqw, hiIndex, loIndex);
551
552 if (size == 4) {
553 floatInt fi;
554 fi.i = arg1Bits;
555 arg1 = fi.f;
556 fi.i = arg2Bits;
557 arg2 = fi.f;
558 } else {
559 doubleInt di;
560 di.i = arg1Bits;
561 arg1 = di.d;
562 di.i = arg2Bits;
563 arg2 = di.d;
564 }
565
566 if (arg1 > arg2) {
567 result = insertBits(result, hiIndex, loIndex, arg1Bits);
568 } else {
569 result = insertBits(result, hiIndex, loIndex, arg2Bits);
570 }
571 }
572 FpDestReg_uqw = result;
573 '''
574
575 class Mmini(MediaOp):
576 code = '''
577
578 assert(srcSize == destSize);
579 int size = srcSize;
580 int sizeBits = size * 8;
581 int items = numItems(size);
582 uint64_t result = FpDestReg_uqw;
583
584 for (int i = 0; i < items; i++) {
585 int hiIndex = (i + 1) * sizeBits - 1;
586 int loIndex = (i + 0) * sizeBits;
587 uint64_t arg1Bits = bits(FpSrcReg1_uqw, hiIndex, loIndex);
588 int64_t arg1 = arg1Bits |
589 (0 - (arg1Bits & (ULL(1) << (sizeBits - 1))));
590 uint64_t arg2Bits = bits(FpSrcReg2_uqw, hiIndex, loIndex);
591 int64_t arg2 = arg2Bits |
592 (0 - (arg2Bits & (ULL(1) << (sizeBits - 1))));
593 uint64_t resBits;
594
595 if (signedOp()) {
596 if (arg1 < arg2) {
597 resBits = arg1Bits;
598 } else {
599 resBits = arg2Bits;
600 }
601 } else {
602 if (arg1Bits < arg2Bits) {
603 resBits = arg1Bits;
604 } else {
605 resBits = arg2Bits;
606 }
607 }
608 result = insertBits(result, hiIndex, loIndex, resBits);
609 }
610 FpDestReg_uqw = result;
611 '''
612
613 class Mmaxi(MediaOp):
614 code = '''
615
616 assert(srcSize == destSize);
617 int size = srcSize;
618 int sizeBits = size * 8;
619 int items = numItems(size);
620 uint64_t result = FpDestReg_uqw;
621
622 for (int i = 0; i < items; i++) {
623 int hiIndex = (i + 1) * sizeBits - 1;
624 int loIndex = (i + 0) * sizeBits;
625 uint64_t arg1Bits = bits(FpSrcReg1_uqw, hiIndex, loIndex);
626 int64_t arg1 = arg1Bits |
627 (0 - (arg1Bits & (ULL(1) << (sizeBits - 1))));
628 uint64_t arg2Bits = bits(FpSrcReg2_uqw, hiIndex, loIndex);
629 int64_t arg2 = arg2Bits |
630 (0 - (arg2Bits & (ULL(1) << (sizeBits - 1))));
631 uint64_t resBits;
632
633 if (signedOp()) {
634 if (arg1 > arg2) {
635 resBits = arg1Bits;
636 } else {
637 resBits = arg2Bits;
638 }
639 } else {
640 if (arg1Bits > arg2Bits) {
641 resBits = arg1Bits;
642 } else {
643 resBits = arg2Bits;
644 }
645 }
646 result = insertBits(result, hiIndex, loIndex, resBits);
647 }
648 FpDestReg_uqw = result;
649 '''
650
651 class Msqrt(MediaOp):
652 def __init__(self, dest, src, \
653 size = None, destSize = None, srcSize = None, ext = None):
654 super(Msqrt, self).__init__(dest, src,\
655 "InstRegIndex(0)", size, destSize, srcSize, ext)
656 code = '''
657 union floatInt
658 {
659 float f;
660 uint32_t i;
661 };
662 union doubleInt
663 {
664 double d;
665 uint64_t i;
666 };
667
668 assert(srcSize == destSize);
669 int size = srcSize;
670 int sizeBits = size * 8;
671 assert(srcSize == 4 || srcSize == 8);
672 int items = numItems(size);
673 uint64_t result = FpDestReg_uqw;
674
675 for (int i = 0; i < items; i++) {
676 int hiIndex = (i + 1) * sizeBits - 1;
677 int loIndex = (i + 0) * sizeBits;
678 uint64_t argBits = bits(FpSrcReg1_uqw, hiIndex, loIndex);
679
680 if (size == 4) {
681 floatInt fi;
682 fi.i = argBits;
683 fi.f = sqrt(fi.f);
684 argBits = fi.i;
685 } else {
686 doubleInt di;
687 di.i = argBits;
688 di.d = sqrt(di.d);
689 argBits = di.i;
690 }
691 result = insertBits(result, hiIndex, loIndex, argBits);
692 }
693 FpDestReg_uqw = result;
694 '''
695
696 # compute approximate reciprocal --- single-precision only
697 class Mrcp(MediaOp):
698 def __init__(self, dest, src, \
699 size = None, destSize = None, srcSize = None, ext = None):
700 super(Mrcp, self).__init__(dest, src,\
701 "InstRegIndex(0)", size, destSize, srcSize, ext)
702 code = '''
703 union floatInt
704 {
705 float f;
706 uint32_t i;
707 };
708
709 assert(srcSize == 4); // ISA defines single-precision only
710 assert(srcSize == destSize);
711 const int size = 4;
712 const int sizeBits = size * 8;
713 int items = numItems(size);
714 uint64_t result = FpDestReg_uqw;
715
716 for (int i = 0; i < items; i++) {
717 int hiIndex = (i + 1) * sizeBits - 1;
718 int loIndex = (i + 0) * sizeBits;
719 uint64_t argBits = bits(FpSrcReg1_uqw, hiIndex, loIndex);
720
721 floatInt fi;
722 fi.i = argBits;
723 // This is more accuracy than HW provides, but oh well
724 fi.f = 1.0 / fi.f;
725 argBits = fi.i;
726 result = insertBits(result, hiIndex, loIndex, argBits);
727 }
728 FpDestReg_uqw = result;
729 '''
730
731 class Maddf(MediaOp):
732 code = '''
733 union floatInt
734 {
735 float f;
736 uint32_t i;
737 };
738 union doubleInt
739 {
740 double d;
741 uint64_t i;
742 };
743
744 assert(srcSize == destSize);
745 int size = srcSize;
746 int sizeBits = size * 8;
747 assert(srcSize == 4 || srcSize == 8);
748 int items = numItems(size);
749 uint64_t result = FpDestReg_uqw;
750
751 for (int i = 0; i < items; i++) {
752 int hiIndex = (i + 1) * sizeBits - 1;
753 int loIndex = (i + 0) * sizeBits;
754 uint64_t arg1Bits = bits(FpSrcReg1_uqw, hiIndex, loIndex);
755 uint64_t arg2Bits = bits(FpSrcReg2_uqw, hiIndex, loIndex);
756 uint64_t resBits;
757
758 if (size == 4) {
759 floatInt arg1, arg2, res;
760 arg1.i = arg1Bits;
761 arg2.i = arg2Bits;
762 res.f = arg1.f + arg2.f;
763 resBits = res.i;
764 } else {
765 doubleInt arg1, arg2, res;
766 arg1.i = arg1Bits;
767 arg2.i = arg2Bits;
768 res.d = arg1.d + arg2.d;
769 resBits = res.i;
770 }
771
772 result = insertBits(result, hiIndex, loIndex, resBits);
773 }
774 FpDestReg_uqw = result;
775 '''
776
777 class Msubf(MediaOp):
778 code = '''
779 union floatInt
780 {
781 float f;
782 uint32_t i;
783 };
784 union doubleInt
785 {
786 double d;
787 uint64_t i;
788 };
789
790 assert(srcSize == destSize);
791 int size = srcSize;
792 int sizeBits = size * 8;
793 assert(srcSize == 4 || srcSize == 8);
794 int items = numItems(size);
795 uint64_t result = FpDestReg_uqw;
796
797 for (int i = 0; i < items; i++) {
798 int hiIndex = (i + 1) * sizeBits - 1;
799 int loIndex = (i + 0) * sizeBits;
800 uint64_t arg1Bits = bits(FpSrcReg1_uqw, hiIndex, loIndex);
801 uint64_t arg2Bits = bits(FpSrcReg2_uqw, hiIndex, loIndex);
802 uint64_t resBits;
803
804 if (size == 4) {
805 floatInt arg1, arg2, res;
806 arg1.i = arg1Bits;
807 arg2.i = arg2Bits;
808 res.f = arg1.f - arg2.f;
809 resBits = res.i;
810 } else {
811 doubleInt arg1, arg2, res;
812 arg1.i = arg1Bits;
813 arg2.i = arg2Bits;
814 res.d = arg1.d - arg2.d;
815 resBits = res.i;
816 }
817
818 result = insertBits(result, hiIndex, loIndex, resBits);
819 }
820 FpDestReg_uqw = result;
821 '''
822
823 class Mmulf(MediaOp):
824 code = '''
825 union floatInt
826 {
827 float f;
828 uint32_t i;
829 };
830 union doubleInt
831 {
832 double d;
833 uint64_t i;
834 };
835
836 assert(srcSize == destSize);
837 int size = srcSize;
838 int sizeBits = size * 8;
839 assert(srcSize == 4 || srcSize == 8);
840 int items = numItems(size);
841 uint64_t result = FpDestReg_uqw;
842
843 for (int i = 0; i < items; i++) {
844 int hiIndex = (i + 1) * sizeBits - 1;
845 int loIndex = (i + 0) * sizeBits;
846 uint64_t arg1Bits = bits(FpSrcReg1_uqw, hiIndex, loIndex);
847 uint64_t arg2Bits = bits(FpSrcReg2_uqw, hiIndex, loIndex);
848 uint64_t resBits;
849
850 if (size == 4) {
851 floatInt arg1, arg2, res;
852 arg1.i = arg1Bits;
853 arg2.i = arg2Bits;
854 res.f = arg1.f * arg2.f;
855 resBits = res.i;
856 } else {
857 doubleInt arg1, arg2, res;
858 arg1.i = arg1Bits;
859 arg2.i = arg2Bits;
860 res.d = arg1.d * arg2.d;
861 resBits = res.i;
862 }
863
864 result = insertBits(result, hiIndex, loIndex, resBits);
865 }
866 FpDestReg_uqw = result;
867 '''
868
869 class Mdivf(MediaOp):
870 code = '''
871 union floatInt
872 {
873 float f;
874 uint32_t i;
875 };
876 union doubleInt
877 {
878 double d;
879 uint64_t i;
880 };
881
882 assert(srcSize == destSize);
883 int size = srcSize;
884 int sizeBits = size * 8;
885 assert(srcSize == 4 || srcSize == 8);
886 int items = numItems(size);
887 uint64_t result = FpDestReg_uqw;
888
889 for (int i = 0; i < items; i++) {
890 int hiIndex = (i + 1) * sizeBits - 1;
891 int loIndex = (i + 0) * sizeBits;
892 uint64_t arg1Bits = bits(FpSrcReg1_uqw, hiIndex, loIndex);
893 uint64_t arg2Bits = bits(FpSrcReg2_uqw, hiIndex, loIndex);
894 uint64_t resBits;
895
896 if (size == 4) {
897 floatInt arg1, arg2, res;
898 arg1.i = arg1Bits;
899 arg2.i = arg2Bits;
900 res.f = arg1.f / arg2.f;
901 resBits = res.i;
902 } else {
903 doubleInt arg1, arg2, res;
904 arg1.i = arg1Bits;
905 arg2.i = arg2Bits;
906 res.d = arg1.d / arg2.d;
907 resBits = res.i;
908 }
909
910 result = insertBits(result, hiIndex, loIndex, resBits);
911 }
912 FpDestReg_uqw = result;
913 '''
914
915 class Maddi(MediaOp):
916 code = '''
917 assert(srcSize == destSize);
918 int size = srcSize;
919 int sizeBits = size * 8;
920 int items = numItems(size);
921 uint64_t result = FpDestReg_uqw;
922
923 for (int i = 0; i < items; i++) {
924 int hiIndex = (i + 1) * sizeBits - 1;
925 int loIndex = (i + 0) * sizeBits;
926 uint64_t arg1Bits = bits(FpSrcReg1_uqw, hiIndex, loIndex);
927 uint64_t arg2Bits = bits(FpSrcReg2_uqw, hiIndex, loIndex);
928 uint64_t resBits = arg1Bits + arg2Bits;
218
219 def getAllocator(self, microFlags):
220 className = self.className
221 if self.mnemonic == self.base_mnemonic + 'i':
222 className += "Imm"
223 allocator = '''new %(class_name)s(machInst, macrocodeBlock,
224 %(flags)s, %(src1)s, %(op2)s, %(dest)s,
225 %(srcSize)s, %(destSize)s, %(ext)s)''' % {
226 "class_name" : className,
227 "flags" : self.microFlagsText(microFlags),
228 "src1" : self.src1, "op2" : self.op2,
229 "dest" : self.dest,
230 "srcSize" : self.srcSize,
231 "destSize" : self.destSize,
232 "ext" : self.ext}
233 return allocator
234
235 class Mov2int(MediaOp):
236 def __init__(self, dest, src1, src2 = 0, \
237 size = None, destSize = None, srcSize = None, ext = None):
238 super(Mov2int, self).__init__(dest, src1,\
239 src2, size, destSize, srcSize, ext)
240 code = '''
241 int items = sizeof(FloatRegBits) / srcSize;
242 int offset = imm8;
243 if (bits(src1, 0) && (ext & 0x1))
244 offset -= items;
245 if (offset >= 0 && offset < items) {
246 uint64_t fpSrcReg1 =
247 bits(FpSrcReg1_uqw,
248 (offset + 1) * srcSize * 8 - 1,
249 (offset + 0) * srcSize * 8);
250 DestReg = merge(0, fpSrcReg1, destSize);
251 } else {
252 DestReg = DestReg;
253 }
254 '''
255
256 class Mov2fp(MediaOp):
257 def __init__(self, dest, src1, src2 = 0, \
258 size = None, destSize = None, srcSize = None, ext = None):
259 super(Mov2fp, self).__init__(dest, src1,\
260 src2, size, destSize, srcSize, ext)
261 code = '''
262 int items = sizeof(FloatRegBits) / destSize;
263 int offset = imm8;
264 if (bits(dest, 0) && (ext & 0x1))
265 offset -= items;
266 if (offset >= 0 && offset < items) {
267 uint64_t srcReg1 = pick(SrcReg1, 0, srcSize);
268 FpDestReg_uqw =
269 insertBits(FpDestReg_uqw,
270 (offset + 1) * destSize * 8 - 1,
271 (offset + 0) * destSize * 8, srcReg1);
272 } else {
273 FpDestReg_uqw = FpDestReg_uqw;
274 }
275 '''
276
277 class Movsign(MediaOp):
278 def __init__(self, dest, src, \
279 size = None, destSize = None, srcSize = None, ext = None):
280 super(Movsign, self).__init__(dest, src,\
281 "InstRegIndex(0)", size, destSize, srcSize, ext)
282 code = '''
283 int items = sizeof(FloatRegBits) / srcSize;
284 uint64_t result = 0;
285 int offset = (ext & 0x1) ? items : 0;
286 for (int i = 0; i < items; i++) {
287 uint64_t picked =
288 bits(FpSrcReg1_uqw, (i + 1) * 8 * srcSize - 1);
289 result = insertBits(result, i + offset, i + offset, picked);
290 }
291 DestReg = DestReg | result;
292 '''
293
294 class Maskmov(MediaOp):
295 code = '''
296 assert(srcSize == destSize);
297 int size = srcSize;
298 int sizeBits = size * 8;
299 int items = numItems(size);
300 uint64_t result = FpDestReg_uqw;
301
302 for (int i = 0; i < items; i++) {
303 int hiIndex = (i + 1) * sizeBits - 1;
304 int loIndex = (i + 0) * sizeBits;
305 uint64_t arg1Bits = bits(FpSrcReg1_uqw, hiIndex, loIndex);
306 if (bits(FpSrcReg2_uqw, hiIndex))
307 result = insertBits(result, hiIndex, loIndex, arg1Bits);
308 }
309 FpDestReg_uqw = result;
310 '''
311
312 class shuffle(MediaOp):
313 code = '''
314 assert(srcSize == destSize);
315 int size = srcSize;
316 int sizeBits = size * 8;
317 int items = sizeof(FloatRegBits) / size;
318 int options;
319 int optionBits;
320 if (size == 8) {
321 options = 2;
322 optionBits = 1;
323 } else {
324 options = 4;
325 optionBits = 2;
326 }
327
328 uint64_t result = 0;
329 uint8_t sel = ext;
330
331 for (int i = 0; i < items; i++) {
332 uint64_t resBits;
333 uint8_t lsel = sel & mask(optionBits);
334 if (lsel * size >= sizeof(FloatRegBits)) {
335 lsel -= options / 2;
336 resBits = bits(FpSrcReg2_uqw,
337 (lsel + 1) * sizeBits - 1,
338 (lsel + 0) * sizeBits);
339 } else {
340 resBits = bits(FpSrcReg1_uqw,
341 (lsel + 1) * sizeBits - 1,
342 (lsel + 0) * sizeBits);
343 }
344
345 sel >>= optionBits;
346
347 int hiIndex = (i + 1) * sizeBits - 1;
348 int loIndex = (i + 0) * sizeBits;
349 result = insertBits(result, hiIndex, loIndex, resBits);
350 }
351 FpDestReg_uqw = result;
352 '''
353
354 class Unpack(MediaOp):
355 code = '''
356 assert(srcSize == destSize);
357 int size = destSize;
358 int items = (sizeof(FloatRegBits) / size) / 2;
359 int offset = ext ? items : 0;
360 uint64_t result = 0;
361 for (int i = 0; i < items; i++) {
362 uint64_t pickedLow =
363 bits(FpSrcReg1_uqw, (i + offset + 1) * 8 * size - 1,
364 (i + offset) * 8 * size);
365 result = insertBits(result,
366 (2 * i + 1) * 8 * size - 1,
367 (2 * i + 0) * 8 * size,
368 pickedLow);
369 uint64_t pickedHigh =
370 bits(FpSrcReg2_uqw, (i + offset + 1) * 8 * size - 1,
371 (i + offset) * 8 * size);
372 result = insertBits(result,
373 (2 * i + 2) * 8 * size - 1,
374 (2 * i + 1) * 8 * size,
375 pickedHigh);
376 }
377 FpDestReg_uqw = result;
378 '''
379
380 class Pack(MediaOp):
381 code = '''
382 assert(srcSize == destSize * 2);
383 int items = (sizeof(FloatRegBits) / destSize);
384 int destBits = destSize * 8;
385 int srcBits = srcSize * 8;
386 uint64_t result = 0;
387 int i;
388 for (i = 0; i < items / 2; i++) {
389 uint64_t picked =
390 bits(FpSrcReg1_uqw, (i + 1) * srcBits - 1,
391 (i + 0) * srcBits);
392 unsigned signBit = bits(picked, srcBits - 1);
393 uint64_t overflow = bits(picked, srcBits - 1, destBits - 1);
394
395 // Handle saturation.
396 if (signBit) {
397 if (overflow != mask(destBits - srcBits + 1)) {
398 if (signedOp())
399 picked = (ULL(1) << (destBits - 1));
400 else
401 picked = 0;
402 }
403 } else {
404 if (overflow != 0) {
405 if (signedOp())
406 picked = mask(destBits - 1);
407 else
408 picked = mask(destBits);
409 }
410 }
411 result = insertBits(result,
412 (i + 1) * destBits - 1,
413 (i + 0) * destBits,
414 picked);
415 }
416 for (;i < items; i++) {
417 uint64_t picked =
418 bits(FpSrcReg2_uqw, (i - items + 1) * srcBits - 1,
419 (i - items + 0) * srcBits);
420 unsigned signBit = bits(picked, srcBits - 1);
421 uint64_t overflow = bits(picked, srcBits - 1, destBits - 1);
422
423 // Handle saturation.
424 if (signBit) {
425 if (overflow != mask(destBits - srcBits + 1)) {
426 if (signedOp())
427 picked = (ULL(1) << (destBits - 1));
428 else
429 picked = 0;
430 }
431 } else {
432 if (overflow != 0) {
433 if (signedOp())
434 picked = mask(destBits - 1);
435 else
436 picked = mask(destBits);
437 }
438 }
439 result = insertBits(result,
440 (i + 1) * destBits - 1,
441 (i + 0) * destBits,
442 picked);
443 }
444 FpDestReg_uqw = result;
445 '''
446
447 class Mxor(MediaOp):
448 def __init__(self, dest, src1, src2):
449 super(Mxor, self).__init__(dest, src1, src2, 1)
450 code = '''
451 FpDestReg_uqw = FpSrcReg1_uqw ^ FpSrcReg2_uqw;
452 '''
453
454 class Mor(MediaOp):
455 def __init__(self, dest, src1, src2):
456 super(Mor, self).__init__(dest, src1, src2, 1)
457 code = '''
458 FpDestReg_uqw = FpSrcReg1_uqw | FpSrcReg2_uqw;
459 '''
460
461 class Mand(MediaOp):
462 def __init__(self, dest, src1, src2):
463 super(Mand, self).__init__(dest, src1, src2, 1)
464 code = '''
465 FpDestReg_uqw = FpSrcReg1_uqw & FpSrcReg2_uqw;
466 '''
467
468 class Mandn(MediaOp):
469 def __init__(self, dest, src1, src2):
470 super(Mandn, self).__init__(dest, src1, src2, 1)
471 code = '''
472 FpDestReg_uqw = ~FpSrcReg1_uqw & FpSrcReg2_uqw;
473 '''
474
475 class Mminf(MediaOp):
476 code = '''
477 union floatInt
478 {
479 float f;
480 uint32_t i;
481 };
482 union doubleInt
483 {
484 double d;
485 uint64_t i;
486 };
487
488 assert(srcSize == destSize);
489 int size = srcSize;
490 int sizeBits = size * 8;
491 assert(srcSize == 4 || srcSize == 8);
492 int items = numItems(size);
493 uint64_t result = FpDestReg_uqw;
494
495 for (int i = 0; i < items; i++) {
496 double arg1, arg2;
497 int hiIndex = (i + 1) * sizeBits - 1;
498 int loIndex = (i + 0) * sizeBits;
499 uint64_t arg1Bits = bits(FpSrcReg1_uqw, hiIndex, loIndex);
500 uint64_t arg2Bits = bits(FpSrcReg2_uqw, hiIndex, loIndex);
501
502 if (size == 4) {
503 floatInt fi;
504 fi.i = arg1Bits;
505 arg1 = fi.f;
506 fi.i = arg2Bits;
507 arg2 = fi.f;
508 } else {
509 doubleInt di;
510 di.i = arg1Bits;
511 arg1 = di.d;
512 di.i = arg2Bits;
513 arg2 = di.d;
514 }
515
516 if (arg1 < arg2) {
517 result = insertBits(result, hiIndex, loIndex, arg1Bits);
518 } else {
519 result = insertBits(result, hiIndex, loIndex, arg2Bits);
520 }
521 }
522 FpDestReg_uqw = result;
523 '''
524
525 class Mmaxf(MediaOp):
526 code = '''
527 union floatInt
528 {
529 float f;
530 uint32_t i;
531 };
532 union doubleInt
533 {
534 double d;
535 uint64_t i;
536 };
537
538 assert(srcSize == destSize);
539 int size = srcSize;
540 int sizeBits = size * 8;
541 assert(srcSize == 4 || srcSize == 8);
542 int items = numItems(size);
543 uint64_t result = FpDestReg_uqw;
544
545 for (int i = 0; i < items; i++) {
546 double arg1, arg2;
547 int hiIndex = (i + 1) * sizeBits - 1;
548 int loIndex = (i + 0) * sizeBits;
549 uint64_t arg1Bits = bits(FpSrcReg1_uqw, hiIndex, loIndex);
550 uint64_t arg2Bits = bits(FpSrcReg2_uqw, hiIndex, loIndex);
551
552 if (size == 4) {
553 floatInt fi;
554 fi.i = arg1Bits;
555 arg1 = fi.f;
556 fi.i = arg2Bits;
557 arg2 = fi.f;
558 } else {
559 doubleInt di;
560 di.i = arg1Bits;
561 arg1 = di.d;
562 di.i = arg2Bits;
563 arg2 = di.d;
564 }
565
566 if (arg1 > arg2) {
567 result = insertBits(result, hiIndex, loIndex, arg1Bits);
568 } else {
569 result = insertBits(result, hiIndex, loIndex, arg2Bits);
570 }
571 }
572 FpDestReg_uqw = result;
573 '''
574
575 class Mmini(MediaOp):
576 code = '''
577
578 assert(srcSize == destSize);
579 int size = srcSize;
580 int sizeBits = size * 8;
581 int items = numItems(size);
582 uint64_t result = FpDestReg_uqw;
583
584 for (int i = 0; i < items; i++) {
585 int hiIndex = (i + 1) * sizeBits - 1;
586 int loIndex = (i + 0) * sizeBits;
587 uint64_t arg1Bits = bits(FpSrcReg1_uqw, hiIndex, loIndex);
588 int64_t arg1 = arg1Bits |
589 (0 - (arg1Bits & (ULL(1) << (sizeBits - 1))));
590 uint64_t arg2Bits = bits(FpSrcReg2_uqw, hiIndex, loIndex);
591 int64_t arg2 = arg2Bits |
592 (0 - (arg2Bits & (ULL(1) << (sizeBits - 1))));
593 uint64_t resBits;
594
595 if (signedOp()) {
596 if (arg1 < arg2) {
597 resBits = arg1Bits;
598 } else {
599 resBits = arg2Bits;
600 }
601 } else {
602 if (arg1Bits < arg2Bits) {
603 resBits = arg1Bits;
604 } else {
605 resBits = arg2Bits;
606 }
607 }
608 result = insertBits(result, hiIndex, loIndex, resBits);
609 }
610 FpDestReg_uqw = result;
611 '''
612
613 class Mmaxi(MediaOp):
614 code = '''
615
616 assert(srcSize == destSize);
617 int size = srcSize;
618 int sizeBits = size * 8;
619 int items = numItems(size);
620 uint64_t result = FpDestReg_uqw;
621
622 for (int i = 0; i < items; i++) {
623 int hiIndex = (i + 1) * sizeBits - 1;
624 int loIndex = (i + 0) * sizeBits;
625 uint64_t arg1Bits = bits(FpSrcReg1_uqw, hiIndex, loIndex);
626 int64_t arg1 = arg1Bits |
627 (0 - (arg1Bits & (ULL(1) << (sizeBits - 1))));
628 uint64_t arg2Bits = bits(FpSrcReg2_uqw, hiIndex, loIndex);
629 int64_t arg2 = arg2Bits |
630 (0 - (arg2Bits & (ULL(1) << (sizeBits - 1))));
631 uint64_t resBits;
632
633 if (signedOp()) {
634 if (arg1 > arg2) {
635 resBits = arg1Bits;
636 } else {
637 resBits = arg2Bits;
638 }
639 } else {
640 if (arg1Bits > arg2Bits) {
641 resBits = arg1Bits;
642 } else {
643 resBits = arg2Bits;
644 }
645 }
646 result = insertBits(result, hiIndex, loIndex, resBits);
647 }
648 FpDestReg_uqw = result;
649 '''
650
651 class Msqrt(MediaOp):
652 def __init__(self, dest, src, \
653 size = None, destSize = None, srcSize = None, ext = None):
654 super(Msqrt, self).__init__(dest, src,\
655 "InstRegIndex(0)", size, destSize, srcSize, ext)
656 code = '''
657 union floatInt
658 {
659 float f;
660 uint32_t i;
661 };
662 union doubleInt
663 {
664 double d;
665 uint64_t i;
666 };
667
668 assert(srcSize == destSize);
669 int size = srcSize;
670 int sizeBits = size * 8;
671 assert(srcSize == 4 || srcSize == 8);
672 int items = numItems(size);
673 uint64_t result = FpDestReg_uqw;
674
675 for (int i = 0; i < items; i++) {
676 int hiIndex = (i + 1) * sizeBits - 1;
677 int loIndex = (i + 0) * sizeBits;
678 uint64_t argBits = bits(FpSrcReg1_uqw, hiIndex, loIndex);
679
680 if (size == 4) {
681 floatInt fi;
682 fi.i = argBits;
683 fi.f = sqrt(fi.f);
684 argBits = fi.i;
685 } else {
686 doubleInt di;
687 di.i = argBits;
688 di.d = sqrt(di.d);
689 argBits = di.i;
690 }
691 result = insertBits(result, hiIndex, loIndex, argBits);
692 }
693 FpDestReg_uqw = result;
694 '''
695
696 # compute approximate reciprocal --- single-precision only
697 class Mrcp(MediaOp):
698 def __init__(self, dest, src, \
699 size = None, destSize = None, srcSize = None, ext = None):
700 super(Mrcp, self).__init__(dest, src,\
701 "InstRegIndex(0)", size, destSize, srcSize, ext)
702 code = '''
703 union floatInt
704 {
705 float f;
706 uint32_t i;
707 };
708
709 assert(srcSize == 4); // ISA defines single-precision only
710 assert(srcSize == destSize);
711 const int size = 4;
712 const int sizeBits = size * 8;
713 int items = numItems(size);
714 uint64_t result = FpDestReg_uqw;
715
716 for (int i = 0; i < items; i++) {
717 int hiIndex = (i + 1) * sizeBits - 1;
718 int loIndex = (i + 0) * sizeBits;
719 uint64_t argBits = bits(FpSrcReg1_uqw, hiIndex, loIndex);
720
721 floatInt fi;
722 fi.i = argBits;
723 // This is more accuracy than HW provides, but oh well
724 fi.f = 1.0 / fi.f;
725 argBits = fi.i;
726 result = insertBits(result, hiIndex, loIndex, argBits);
727 }
728 FpDestReg_uqw = result;
729 '''
730
731 class Maddf(MediaOp):
732 code = '''
733 union floatInt
734 {
735 float f;
736 uint32_t i;
737 };
738 union doubleInt
739 {
740 double d;
741 uint64_t i;
742 };
743
744 assert(srcSize == destSize);
745 int size = srcSize;
746 int sizeBits = size * 8;
747 assert(srcSize == 4 || srcSize == 8);
748 int items = numItems(size);
749 uint64_t result = FpDestReg_uqw;
750
751 for (int i = 0; i < items; i++) {
752 int hiIndex = (i + 1) * sizeBits - 1;
753 int loIndex = (i + 0) * sizeBits;
754 uint64_t arg1Bits = bits(FpSrcReg1_uqw, hiIndex, loIndex);
755 uint64_t arg2Bits = bits(FpSrcReg2_uqw, hiIndex, loIndex);
756 uint64_t resBits;
757
758 if (size == 4) {
759 floatInt arg1, arg2, res;
760 arg1.i = arg1Bits;
761 arg2.i = arg2Bits;
762 res.f = arg1.f + arg2.f;
763 resBits = res.i;
764 } else {
765 doubleInt arg1, arg2, res;
766 arg1.i = arg1Bits;
767 arg2.i = arg2Bits;
768 res.d = arg1.d + arg2.d;
769 resBits = res.i;
770 }
771
772 result = insertBits(result, hiIndex, loIndex, resBits);
773 }
774 FpDestReg_uqw = result;
775 '''
776
777 class Msubf(MediaOp):
778 code = '''
779 union floatInt
780 {
781 float f;
782 uint32_t i;
783 };
784 union doubleInt
785 {
786 double d;
787 uint64_t i;
788 };
789
790 assert(srcSize == destSize);
791 int size = srcSize;
792 int sizeBits = size * 8;
793 assert(srcSize == 4 || srcSize == 8);
794 int items = numItems(size);
795 uint64_t result = FpDestReg_uqw;
796
797 for (int i = 0; i < items; i++) {
798 int hiIndex = (i + 1) * sizeBits - 1;
799 int loIndex = (i + 0) * sizeBits;
800 uint64_t arg1Bits = bits(FpSrcReg1_uqw, hiIndex, loIndex);
801 uint64_t arg2Bits = bits(FpSrcReg2_uqw, hiIndex, loIndex);
802 uint64_t resBits;
803
804 if (size == 4) {
805 floatInt arg1, arg2, res;
806 arg1.i = arg1Bits;
807 arg2.i = arg2Bits;
808 res.f = arg1.f - arg2.f;
809 resBits = res.i;
810 } else {
811 doubleInt arg1, arg2, res;
812 arg1.i = arg1Bits;
813 arg2.i = arg2Bits;
814 res.d = arg1.d - arg2.d;
815 resBits = res.i;
816 }
817
818 result = insertBits(result, hiIndex, loIndex, resBits);
819 }
820 FpDestReg_uqw = result;
821 '''
822
823 class Mmulf(MediaOp):
824 code = '''
825 union floatInt
826 {
827 float f;
828 uint32_t i;
829 };
830 union doubleInt
831 {
832 double d;
833 uint64_t i;
834 };
835
836 assert(srcSize == destSize);
837 int size = srcSize;
838 int sizeBits = size * 8;
839 assert(srcSize == 4 || srcSize == 8);
840 int items = numItems(size);
841 uint64_t result = FpDestReg_uqw;
842
843 for (int i = 0; i < items; i++) {
844 int hiIndex = (i + 1) * sizeBits - 1;
845 int loIndex = (i + 0) * sizeBits;
846 uint64_t arg1Bits = bits(FpSrcReg1_uqw, hiIndex, loIndex);
847 uint64_t arg2Bits = bits(FpSrcReg2_uqw, hiIndex, loIndex);
848 uint64_t resBits;
849
850 if (size == 4) {
851 floatInt arg1, arg2, res;
852 arg1.i = arg1Bits;
853 arg2.i = arg2Bits;
854 res.f = arg1.f * arg2.f;
855 resBits = res.i;
856 } else {
857 doubleInt arg1, arg2, res;
858 arg1.i = arg1Bits;
859 arg2.i = arg2Bits;
860 res.d = arg1.d * arg2.d;
861 resBits = res.i;
862 }
863
864 result = insertBits(result, hiIndex, loIndex, resBits);
865 }
866 FpDestReg_uqw = result;
867 '''
868
869 class Mdivf(MediaOp):
870 code = '''
871 union floatInt
872 {
873 float f;
874 uint32_t i;
875 };
876 union doubleInt
877 {
878 double d;
879 uint64_t i;
880 };
881
882 assert(srcSize == destSize);
883 int size = srcSize;
884 int sizeBits = size * 8;
885 assert(srcSize == 4 || srcSize == 8);
886 int items = numItems(size);
887 uint64_t result = FpDestReg_uqw;
888
889 for (int i = 0; i < items; i++) {
890 int hiIndex = (i + 1) * sizeBits - 1;
891 int loIndex = (i + 0) * sizeBits;
892 uint64_t arg1Bits = bits(FpSrcReg1_uqw, hiIndex, loIndex);
893 uint64_t arg2Bits = bits(FpSrcReg2_uqw, hiIndex, loIndex);
894 uint64_t resBits;
895
896 if (size == 4) {
897 floatInt arg1, arg2, res;
898 arg1.i = arg1Bits;
899 arg2.i = arg2Bits;
900 res.f = arg1.f / arg2.f;
901 resBits = res.i;
902 } else {
903 doubleInt arg1, arg2, res;
904 arg1.i = arg1Bits;
905 arg2.i = arg2Bits;
906 res.d = arg1.d / arg2.d;
907 resBits = res.i;
908 }
909
910 result = insertBits(result, hiIndex, loIndex, resBits);
911 }
912 FpDestReg_uqw = result;
913 '''
914
915 class Maddi(MediaOp):
916 code = '''
917 assert(srcSize == destSize);
918 int size = srcSize;
919 int sizeBits = size * 8;
920 int items = numItems(size);
921 uint64_t result = FpDestReg_uqw;
922
923 for (int i = 0; i < items; i++) {
924 int hiIndex = (i + 1) * sizeBits - 1;
925 int loIndex = (i + 0) * sizeBits;
926 uint64_t arg1Bits = bits(FpSrcReg1_uqw, hiIndex, loIndex);
927 uint64_t arg2Bits = bits(FpSrcReg2_uqw, hiIndex, loIndex);
928 uint64_t resBits = arg1Bits + arg2Bits;
929
929
930 if (ext & 0x2) {
931 if (signedOp()) {
932 int arg1Sign = bits(arg1Bits, sizeBits - 1);
933 int arg2Sign = bits(arg2Bits, sizeBits - 1);
934 int resSign = bits(resBits, sizeBits - 1);
935 if ((arg1Sign == arg2Sign) && (arg1Sign != resSign)) {
936 if (resSign == 0)
937 resBits = (ULL(1) << (sizeBits - 1));
938 else
939 resBits = mask(sizeBits - 1);
940 }
941 } else {
942 if (findCarry(sizeBits, resBits, arg1Bits, arg2Bits))
943 resBits = mask(sizeBits);
944 }
945 }
946
947 result = insertBits(result, hiIndex, loIndex, resBits);
948 }
949 FpDestReg_uqw = result;
950 '''
951
952 class Msubi(MediaOp):
953 code = '''
954 assert(srcSize == destSize);
955 int size = srcSize;
956 int sizeBits = size * 8;
957 int items = numItems(size);
958 uint64_t result = FpDestReg_uqw;
959
960 for (int i = 0; i < items; i++) {
961 int hiIndex = (i + 1) * sizeBits - 1;
962 int loIndex = (i + 0) * sizeBits;
963 uint64_t arg1Bits = bits(FpSrcReg1_uqw, hiIndex, loIndex);
964 uint64_t arg2Bits = bits(FpSrcReg2_uqw, hiIndex, loIndex);
965 uint64_t resBits = arg1Bits - arg2Bits;
930 if (ext & 0x2) {
931 if (signedOp()) {
932 int arg1Sign = bits(arg1Bits, sizeBits - 1);
933 int arg2Sign = bits(arg2Bits, sizeBits - 1);
934 int resSign = bits(resBits, sizeBits - 1);
935 if ((arg1Sign == arg2Sign) && (arg1Sign != resSign)) {
936 if (resSign == 0)
937 resBits = (ULL(1) << (sizeBits - 1));
938 else
939 resBits = mask(sizeBits - 1);
940 }
941 } else {
942 if (findCarry(sizeBits, resBits, arg1Bits, arg2Bits))
943 resBits = mask(sizeBits);
944 }
945 }
946
947 result = insertBits(result, hiIndex, loIndex, resBits);
948 }
949 FpDestReg_uqw = result;
950 '''
951
952 class Msubi(MediaOp):
953 code = '''
954 assert(srcSize == destSize);
955 int size = srcSize;
956 int sizeBits = size * 8;
957 int items = numItems(size);
958 uint64_t result = FpDestReg_uqw;
959
960 for (int i = 0; i < items; i++) {
961 int hiIndex = (i + 1) * sizeBits - 1;
962 int loIndex = (i + 0) * sizeBits;
963 uint64_t arg1Bits = bits(FpSrcReg1_uqw, hiIndex, loIndex);
964 uint64_t arg2Bits = bits(FpSrcReg2_uqw, hiIndex, loIndex);
965 uint64_t resBits = arg1Bits - arg2Bits;
966
966
967 if (ext & 0x2) {
968 if (signedOp()) {
969 int arg1Sign = bits(arg1Bits, sizeBits - 1);
970 int arg2Sign = !bits(arg2Bits, sizeBits - 1);
971 int resSign = bits(resBits, sizeBits - 1);
972 if ((arg1Sign == arg2Sign) && (arg1Sign != resSign)) {
973 if (resSign == 0)
974 resBits = (ULL(1) << (sizeBits - 1));
975 else
976 resBits = mask(sizeBits - 1);
977 }
978 } else {
979 if (arg2Bits > arg1Bits) {
980 resBits = 0;
981 } else if (!findCarry(sizeBits, resBits,
982 arg1Bits, ~arg2Bits)) {
983 resBits = mask(sizeBits);
984 }
985 }
986 }
987
988 result = insertBits(result, hiIndex, loIndex, resBits);
989 }
990 FpDestReg_uqw = result;
991 '''
992
993 class Mmuli(MediaOp):
994 code = '''
995 int srcBits = srcSize * 8;
996 int destBits = destSize * 8;
997 assert(destBits <= 64);
998 assert(destSize >= srcSize);
999 int items = numItems(destSize);
1000 uint64_t result = FpDestReg_uqw;
1001
1002 for (int i = 0; i < items; i++) {
1003 int offset = 0;
1004 if (ext & 16) {
1005 if (ext & 32)
1006 offset = i * (destBits - srcBits);
1007 else
1008 offset = i * (destBits - srcBits) + srcBits;
1009 }
1010 int srcHiIndex = (i + 1) * srcBits - 1 + offset;
1011 int srcLoIndex = (i + 0) * srcBits + offset;
1012 uint64_t arg1Bits = bits(FpSrcReg1_uqw, srcHiIndex, srcLoIndex);
1013 uint64_t arg2Bits = bits(FpSrcReg2_uqw, srcHiIndex, srcLoIndex);
1014 uint64_t resBits;
1015
1016 if (signedOp()) {
1017 int64_t arg1 = arg1Bits |
1018 (0 - (arg1Bits & (ULL(1) << (srcBits - 1))));
1019 int64_t arg2 = arg2Bits |
1020 (0 - (arg2Bits & (ULL(1) << (srcBits - 1))));
1021 resBits = (uint64_t)(arg1 * arg2);
1022 } else {
1023 resBits = arg1Bits * arg2Bits;
1024 }
1025
1026 if (ext & 0x4)
1027 resBits += (ULL(1) << (destBits - 1));
967 if (ext & 0x2) {
968 if (signedOp()) {
969 int arg1Sign = bits(arg1Bits, sizeBits - 1);
970 int arg2Sign = !bits(arg2Bits, sizeBits - 1);
971 int resSign = bits(resBits, sizeBits - 1);
972 if ((arg1Sign == arg2Sign) && (arg1Sign != resSign)) {
973 if (resSign == 0)
974 resBits = (ULL(1) << (sizeBits - 1));
975 else
976 resBits = mask(sizeBits - 1);
977 }
978 } else {
979 if (arg2Bits > arg1Bits) {
980 resBits = 0;
981 } else if (!findCarry(sizeBits, resBits,
982 arg1Bits, ~arg2Bits)) {
983 resBits = mask(sizeBits);
984 }
985 }
986 }
987
988 result = insertBits(result, hiIndex, loIndex, resBits);
989 }
990 FpDestReg_uqw = result;
991 '''
992
993 class Mmuli(MediaOp):
994 code = '''
995 int srcBits = srcSize * 8;
996 int destBits = destSize * 8;
997 assert(destBits <= 64);
998 assert(destSize >= srcSize);
999 int items = numItems(destSize);
1000 uint64_t result = FpDestReg_uqw;
1001
1002 for (int i = 0; i < items; i++) {
1003 int offset = 0;
1004 if (ext & 16) {
1005 if (ext & 32)
1006 offset = i * (destBits - srcBits);
1007 else
1008 offset = i * (destBits - srcBits) + srcBits;
1009 }
1010 int srcHiIndex = (i + 1) * srcBits - 1 + offset;
1011 int srcLoIndex = (i + 0) * srcBits + offset;
1012 uint64_t arg1Bits = bits(FpSrcReg1_uqw, srcHiIndex, srcLoIndex);
1013 uint64_t arg2Bits = bits(FpSrcReg2_uqw, srcHiIndex, srcLoIndex);
1014 uint64_t resBits;
1015
1016 if (signedOp()) {
1017 int64_t arg1 = arg1Bits |
1018 (0 - (arg1Bits & (ULL(1) << (srcBits - 1))));
1019 int64_t arg2 = arg2Bits |
1020 (0 - (arg2Bits & (ULL(1) << (srcBits - 1))));
1021 resBits = (uint64_t)(arg1 * arg2);
1022 } else {
1023 resBits = arg1Bits * arg2Bits;
1024 }
1025
1026 if (ext & 0x4)
1027 resBits += (ULL(1) << (destBits - 1));
1028
1028
1029 if (multHi())
1030 resBits >>= destBits;
1031
1032 int destHiIndex = (i + 1) * destBits - 1;
1033 int destLoIndex = (i + 0) * destBits;
1034 result = insertBits(result, destHiIndex, destLoIndex, resBits);
1035 }
1036 FpDestReg_uqw = result;
1037 '''
1038
1039 class Mavg(MediaOp):
1040 code = '''
1041 assert(srcSize == destSize);
1042 int size = srcSize;
1043 int sizeBits = size * 8;
1044 int items = numItems(size);
1045 uint64_t result = FpDestReg_uqw;
1046
1047 for (int i = 0; i < items; i++) {
1048 int hiIndex = (i + 1) * sizeBits - 1;
1049 int loIndex = (i + 0) * sizeBits;
1050 uint64_t arg1Bits = bits(FpSrcReg1_uqw, hiIndex, loIndex);
1051 uint64_t arg2Bits = bits(FpSrcReg2_uqw, hiIndex, loIndex);
1052 uint64_t resBits = (arg1Bits + arg2Bits + 1) / 2;
1029 if (multHi())
1030 resBits >>= destBits;
1031
1032 int destHiIndex = (i + 1) * destBits - 1;
1033 int destLoIndex = (i + 0) * destBits;
1034 result = insertBits(result, destHiIndex, destLoIndex, resBits);
1035 }
1036 FpDestReg_uqw = result;
1037 '''
1038
1039 class Mavg(MediaOp):
1040 code = '''
1041 assert(srcSize == destSize);
1042 int size = srcSize;
1043 int sizeBits = size * 8;
1044 int items = numItems(size);
1045 uint64_t result = FpDestReg_uqw;
1046
1047 for (int i = 0; i < items; i++) {
1048 int hiIndex = (i + 1) * sizeBits - 1;
1049 int loIndex = (i + 0) * sizeBits;
1050 uint64_t arg1Bits = bits(FpSrcReg1_uqw, hiIndex, loIndex);
1051 uint64_t arg2Bits = bits(FpSrcReg2_uqw, hiIndex, loIndex);
1052 uint64_t resBits = (arg1Bits + arg2Bits + 1) / 2;
1053
1053
1054 result = insertBits(result, hiIndex, loIndex, resBits);
1055 }
1056 FpDestReg_uqw = result;
1057 '''
1058
1059 class Msad(MediaOp):
1060 code = '''
1061 int srcBits = srcSize * 8;
1062 int items = sizeof(FloatRegBits) / srcSize;
1063
1064 uint64_t sum = 0;
1065 for (int i = 0; i < items; i++) {
1066 int hiIndex = (i + 1) * srcBits - 1;
1067 int loIndex = (i + 0) * srcBits;
1068 uint64_t arg1Bits = bits(FpSrcReg1_uqw, hiIndex, loIndex);
1069 uint64_t arg2Bits = bits(FpSrcReg2_uqw, hiIndex, loIndex);
1070 int64_t resBits = arg1Bits - arg2Bits;
1071 if (resBits < 0)
1072 resBits = -resBits;
1073 sum += resBits;
1074 }
1075 FpDestReg_uqw = sum & mask(destSize * 8);
1076 '''
1077
1078 class Msrl(MediaOp):
1079 code = '''
1080
1081 assert(srcSize == destSize);
1082 int size = srcSize;
1083 int sizeBits = size * 8;
1084 int items = numItems(size);
1085 uint64_t shiftAmt = op2_uqw;
1086 uint64_t result = FpDestReg_uqw;
1087
1088 for (int i = 0; i < items; i++) {
1089 int hiIndex = (i + 1) * sizeBits - 1;
1090 int loIndex = (i + 0) * sizeBits;
1091 uint64_t arg1Bits = bits(FpSrcReg1_uqw, hiIndex, loIndex);
1092 uint64_t resBits;
1093 if (shiftAmt >= sizeBits) {
1094 resBits = 0;
1095 } else {
1096 resBits = (arg1Bits >> shiftAmt) &
1097 mask(sizeBits - shiftAmt);
1098 }
1099
1100 result = insertBits(result, hiIndex, loIndex, resBits);
1101 }
1102 FpDestReg_uqw = result;
1103 '''
1104
1105 class Msra(MediaOp):
1106 code = '''
1107
1108 assert(srcSize == destSize);
1109 int size = srcSize;
1110 int sizeBits = size * 8;
1111 int items = numItems(size);
1112 uint64_t shiftAmt = op2_uqw;
1113 uint64_t result = FpDestReg_uqw;
1114
1115 for (int i = 0; i < items; i++) {
1116 int hiIndex = (i + 1) * sizeBits - 1;
1117 int loIndex = (i + 0) * sizeBits;
1118 uint64_t arg1Bits = bits(FpSrcReg1_uqw, hiIndex, loIndex);
1119 uint64_t resBits;
1120 if (shiftAmt >= sizeBits) {
1121 if (bits(arg1Bits, sizeBits - 1))
1122 resBits = mask(sizeBits);
1123 else
1124 resBits = 0;
1125 } else {
1126 resBits = (arg1Bits >> shiftAmt);
1127 resBits = resBits |
1128 (0 - (resBits & (ULL(1) << (sizeBits - 1 - shiftAmt))));
1129 }
1130
1131 result = insertBits(result, hiIndex, loIndex, resBits);
1132 }
1133 FpDestReg_uqw = result;
1134 '''
1135
1136 class Msll(MediaOp):
1137 code = '''
1138
1139 assert(srcSize == destSize);
1140 int size = srcSize;
1141 int sizeBits = size * 8;
1142 int items = numItems(size);
1143 uint64_t shiftAmt = op2_uqw;
1144 uint64_t result = FpDestReg_uqw;
1145
1146 for (int i = 0; i < items; i++) {
1147 int hiIndex = (i + 1) * sizeBits - 1;
1148 int loIndex = (i + 0) * sizeBits;
1149 uint64_t arg1Bits = bits(FpSrcReg1_uqw, hiIndex, loIndex);
1150 uint64_t resBits;
1151 if (shiftAmt >= sizeBits) {
1152 resBits = 0;
1153 } else {
1154 resBits = (arg1Bits << shiftAmt);
1155 }
1156
1157 result = insertBits(result, hiIndex, loIndex, resBits);
1158 }
1159 FpDestReg_uqw = result;
1160 '''
1161
1162 class Cvtf2i(MediaOp):
1163 def __init__(self, dest, src, \
1164 size = None, destSize = None, srcSize = None, ext = None):
1165 super(Cvtf2i, self).__init__(dest, src,\
1166 "InstRegIndex(0)", size, destSize, srcSize, ext)
1167 code = '''
1168 union floatInt
1169 {
1170 float f;
1171 uint32_t i;
1172 };
1173 union doubleInt
1174 {
1175 double d;
1176 uint64_t i;
1177 };
1178
1179 assert(destSize == 4 || destSize == 8);
1180 assert(srcSize == 4 || srcSize == 8);
1181 int srcSizeBits = srcSize * 8;
1182 int destSizeBits = destSize * 8;
1183 int items;
1184 int srcStart = 0;
1185 int destStart = 0;
1186 if (srcSize == 2 * destSize) {
1187 items = numItems(srcSize);
1188 if (ext & 0x2)
1189 destStart = destSizeBits * items;
1190 } else if (destSize == 2 * srcSize) {
1191 items = numItems(destSize);
1192 if (ext & 0x2)
1193 srcStart = srcSizeBits * items;
1194 } else {
1195 items = numItems(destSize);
1196 }
1197 uint64_t result = FpDestReg_uqw;
1198
1199 for (int i = 0; i < items; i++) {
1200 int srcHiIndex = srcStart + (i + 1) * srcSizeBits - 1;
1201 int srcLoIndex = srcStart + (i + 0) * srcSizeBits;
1202 uint64_t argBits = bits(FpSrcReg1_uqw, srcHiIndex, srcLoIndex);
1203 double arg;
1204
1205 if (srcSize == 4) {
1206 floatInt fi;
1207 fi.i = argBits;
1208 arg = fi.f;
1209 } else {
1210 doubleInt di;
1211 di.i = argBits;
1212 arg = di.d;
1213 }
1214
1215 if (ext & 0x4) {
1216 if (arg >= 0)
1217 arg += 0.5;
1218 else
1219 arg -= 0.5;
1220 }
1221
1222 if (destSize == 4) {
1223 argBits = (uint32_t)arg;
1224 } else {
1225 argBits = (uint64_t)arg;
1226 }
1227 int destHiIndex = destStart + (i + 1) * destSizeBits - 1;
1228 int destLoIndex = destStart + (i + 0) * destSizeBits;
1229 result = insertBits(result, destHiIndex, destLoIndex, argBits);
1230 }
1231 FpDestReg_uqw = result;
1232 '''
1233
1234 class Cvti2f(MediaOp):
1235 def __init__(self, dest, src, \
1236 size = None, destSize = None, srcSize = None, ext = None):
1237 super(Cvti2f, self).__init__(dest, src,\
1238 "InstRegIndex(0)", size, destSize, srcSize, ext)
1239 code = '''
1240 union floatInt
1241 {
1242 float f;
1243 uint32_t i;
1244 };
1245 union doubleInt
1246 {
1247 double d;
1248 uint64_t i;
1249 };
1250
1251 assert(destSize == 4 || destSize == 8);
1252 assert(srcSize == 4 || srcSize == 8);
1253 int srcSizeBits = srcSize * 8;
1254 int destSizeBits = destSize * 8;
1255 int items;
1256 int srcStart = 0;
1257 int destStart = 0;
1258 if (srcSize == 2 * destSize) {
1259 items = numItems(srcSize);
1260 if (ext & 0x2)
1261 destStart = destSizeBits * items;
1262 } else if (destSize == 2 * srcSize) {
1263 items = numItems(destSize);
1264 if (ext & 0x2)
1265 srcStart = srcSizeBits * items;
1266 } else {
1267 items = numItems(destSize);
1268 }
1269 uint64_t result = FpDestReg_uqw;
1270
1271 for (int i = 0; i < items; i++) {
1272 int srcHiIndex = srcStart + (i + 1) * srcSizeBits - 1;
1273 int srcLoIndex = srcStart + (i + 0) * srcSizeBits;
1274 uint64_t argBits = bits(FpSrcReg1_uqw, srcHiIndex, srcLoIndex);
1275
1276 int64_t sArg = argBits |
1277 (0 - (argBits & (ULL(1) << (srcSizeBits - 1))));
1278 double arg = sArg;
1279
1280 if (destSize == 4) {
1281 floatInt fi;
1282 fi.f = arg;
1283 argBits = fi.i;
1284 } else {
1285 doubleInt di;
1286 di.d = arg;
1287 argBits = di.i;
1288 }
1289 int destHiIndex = destStart + (i + 1) * destSizeBits - 1;
1290 int destLoIndex = destStart + (i + 0) * destSizeBits;
1291 result = insertBits(result, destHiIndex, destLoIndex, argBits);
1292 }
1293 FpDestReg_uqw = result;
1294 '''
1295
1296 class Cvtf2f(MediaOp):
1297 def __init__(self, dest, src, \
1298 size = None, destSize = None, srcSize = None, ext = None):
1299 super(Cvtf2f, self).__init__(dest, src,\
1300 "InstRegIndex(0)", size, destSize, srcSize, ext)
1301 code = '''
1302 union floatInt
1303 {
1304 float f;
1305 uint32_t i;
1306 };
1307 union doubleInt
1308 {
1309 double d;
1310 uint64_t i;
1311 };
1312
1313 assert(destSize == 4 || destSize == 8);
1314 assert(srcSize == 4 || srcSize == 8);
1315 int srcSizeBits = srcSize * 8;
1316 int destSizeBits = destSize * 8;
1317 int items;
1318 int srcStart = 0;
1319 int destStart = 0;
1320 if (srcSize == 2 * destSize) {
1321 items = numItems(srcSize);
1322 if (ext & 0x2)
1323 destStart = destSizeBits * items;
1324 } else if (destSize == 2 * srcSize) {
1325 items = numItems(destSize);
1326 if (ext & 0x2)
1327 srcStart = srcSizeBits * items;
1328 } else {
1329 items = numItems(destSize);
1330 }
1331 uint64_t result = FpDestReg_uqw;
1332
1333 for (int i = 0; i < items; i++) {
1334 int srcHiIndex = srcStart + (i + 1) * srcSizeBits - 1;
1335 int srcLoIndex = srcStart + (i + 0) * srcSizeBits;
1336 uint64_t argBits = bits(FpSrcReg1_uqw, srcHiIndex, srcLoIndex);
1337 double arg;
1338
1339 if (srcSize == 4) {
1340 floatInt fi;
1341 fi.i = argBits;
1342 arg = fi.f;
1343 } else {
1344 doubleInt di;
1345 di.i = argBits;
1346 arg = di.d;
1347 }
1348 if (destSize == 4) {
1349 floatInt fi;
1350 fi.f = arg;
1351 argBits = fi.i;
1352 } else {
1353 doubleInt di;
1354 di.d = arg;
1355 argBits = di.i;
1356 }
1357 int destHiIndex = destStart + (i + 1) * destSizeBits - 1;
1358 int destLoIndex = destStart + (i + 0) * destSizeBits;
1359 result = insertBits(result, destHiIndex, destLoIndex, argBits);
1360 }
1361 FpDestReg_uqw = result;
1362 '''
1363
1364 class Mcmpi2r(MediaOp):
1365 code = '''
1366 union floatInt
1367 {
1368 float f;
1369 uint32_t i;
1370 };
1371 union doubleInt
1372 {
1373 double d;
1374 uint64_t i;
1375 };
1376
1377 assert(srcSize == destSize);
1378 int size = srcSize;
1379 int sizeBits = size * 8;
1380 int items = numItems(size);
1381 uint64_t result = FpDestReg_uqw;
1382
1383 for (int i = 0; i < items; i++) {
1384 int hiIndex = (i + 1) * sizeBits - 1;
1385 int loIndex = (i + 0) * sizeBits;
1386 uint64_t arg1Bits = bits(FpSrcReg1_uqw, hiIndex, loIndex);
1387 int64_t arg1 = arg1Bits |
1388 (0 - (arg1Bits & (ULL(1) << (sizeBits - 1))));
1389 uint64_t arg2Bits = bits(FpSrcReg2_uqw, hiIndex, loIndex);
1390 int64_t arg2 = arg2Bits |
1391 (0 - (arg2Bits & (ULL(1) << (sizeBits - 1))));
1392
1393 uint64_t resBits = 0;
1394 if (((ext & 0x2) == 0 && arg1 == arg2) ||
1395 ((ext & 0x2) == 0x2 && arg1 > arg2))
1396 resBits = mask(sizeBits);
1397
1398 result = insertBits(result, hiIndex, loIndex, resBits);
1399 }
1400 FpDestReg_uqw = result;
1401 '''
1402
1403 class Mcmpf2r(MediaOp):
1404 code = '''
1405 union floatInt
1406 {
1407 float f;
1408 uint32_t i;
1409 };
1410 union doubleInt
1411 {
1412 double d;
1413 uint64_t i;
1414 };
1415
1416 assert(srcSize == destSize);
1417 int size = srcSize;
1418 int sizeBits = size * 8;
1419 int items = numItems(size);
1420 uint64_t result = FpDestReg_uqw;
1421
1422 for (int i = 0; i < items; i++) {
1423 int hiIndex = (i + 1) * sizeBits - 1;
1424 int loIndex = (i + 0) * sizeBits;
1425 uint64_t arg1Bits = bits(FpSrcReg1_uqw, hiIndex, loIndex);
1426 uint64_t arg2Bits = bits(FpSrcReg2_uqw, hiIndex, loIndex);
1427 double arg1, arg2;
1428
1429 if (size == 4) {
1430 floatInt fi;
1431 fi.i = arg1Bits;
1432 arg1 = fi.f;
1433 fi.i = arg2Bits;
1434 arg2 = fi.f;
1435 } else {
1436 doubleInt di;
1437 di.i = arg1Bits;
1438 arg1 = di.d;
1439 di.i = arg2Bits;
1440 arg2 = di.d;
1441 }
1442
1443 uint64_t resBits = 0;
1444 bool nanop = std::isnan(arg1) || std::isnan(arg2);
1445 switch (ext & mask(3)) {
1446 case 0:
1447 if (arg1 == arg2 && !nanop)
1448 resBits = mask(sizeBits);
1449 break;
1450 case 1:
1451 if (arg1 < arg2 && !nanop)
1452 resBits = mask(sizeBits);
1453 break;
1454 case 2:
1455 if (arg1 <= arg2 && !nanop)
1456 resBits = mask(sizeBits);
1457 break;
1458 case 3:
1459 if (nanop)
1460 resBits = mask(sizeBits);
1461 break;
1462 case 4:
1463 if (arg1 != arg2 || nanop)
1464 resBits = mask(sizeBits);
1465 break;
1466 case 5:
1467 if (!(arg1 < arg2) || nanop)
1468 resBits = mask(sizeBits);
1469 break;
1470 case 6:
1471 if (!(arg1 <= arg2) || nanop)
1472 resBits = mask(sizeBits);
1473 break;
1474 case 7:
1475 if (!nanop)
1476 resBits = mask(sizeBits);
1477 break;
1478 };
1479
1480 result = insertBits(result, hiIndex, loIndex, resBits);
1481 }
1482 FpDestReg_uqw = result;
1483 '''
1484
1485 class Mcmpf2rf(MediaOp):
1486 def __init__(self, src1, src2,\
1487 size = None, destSize = None, srcSize = None, ext = None):
1488 super(Mcmpf2rf, self).__init__("InstRegIndex(0)", src1,\
1489 src2, size, destSize, srcSize, ext)
1490 code = '''
1491 union floatInt
1492 {
1493 float f;
1494 uint32_t i;
1495 };
1496 union doubleInt
1497 {
1498 double d;
1499 uint64_t i;
1500 };
1501
1502 assert(srcSize == destSize);
1503 assert(srcSize == 4 || srcSize == 8);
1504 int size = srcSize;
1505 int sizeBits = size * 8;
1506
1507 double arg1, arg2;
1508 uint64_t arg1Bits = bits(FpSrcReg1_uqw, sizeBits - 1, 0);
1509 uint64_t arg2Bits = bits(FpSrcReg2_uqw, sizeBits - 1, 0);
1510 if (size == 4) {
1511 floatInt fi;
1512 fi.i = arg1Bits;
1513 arg1 = fi.f;
1514 fi.i = arg2Bits;
1515 arg2 = fi.f;
1516 } else {
1517 doubleInt di;
1518 di.i = arg1Bits;
1519 arg1 = di.d;
1520 di.i = arg2Bits;
1521 arg2 = di.d;
1522 }
1523
1524 // ZF PF CF
1525 // Unordered 1 1 1
1526 // Greater than 0 0 0
1527 // Less than 0 0 1
1528 // Equal 1 0 0
1529 // OF = SF = AF = 0
1530 ccFlagBits = ccFlagBits & ~(SFBit | AFBit | ZFBit | PFBit);
1531 cfofBits = cfofBits & ~(OFBit | CFBit);
1532
1533 if (std::isnan(arg1) || std::isnan(arg2)) {
1534 ccFlagBits = ccFlagBits | (ZFBit | PFBit);
1535 cfofBits = cfofBits | CFBit;
1536 }
1537 else if(arg1 < arg2)
1538 cfofBits = cfofBits | CFBit;
1539 else if(arg1 == arg2)
1540 ccFlagBits = ccFlagBits | ZFBit;
1541 '''
1542
1543 class Emms(MediaOp):
1544 def __init__(self):
1545 super(Emms, self).__init__('InstRegIndex(MISCREG_FTW)',
1546 'InstRegIndex(0)', 'InstRegIndex(0)', 2)
1547 code = 'FTW = 0xFFFF;'
1548}};
1054 result = insertBits(result, hiIndex, loIndex, resBits);
1055 }
1056 FpDestReg_uqw = result;
1057 '''
1058
1059 class Msad(MediaOp):
1060 code = '''
1061 int srcBits = srcSize * 8;
1062 int items = sizeof(FloatRegBits) / srcSize;
1063
1064 uint64_t sum = 0;
1065 for (int i = 0; i < items; i++) {
1066 int hiIndex = (i + 1) * srcBits - 1;
1067 int loIndex = (i + 0) * srcBits;
1068 uint64_t arg1Bits = bits(FpSrcReg1_uqw, hiIndex, loIndex);
1069 uint64_t arg2Bits = bits(FpSrcReg2_uqw, hiIndex, loIndex);
1070 int64_t resBits = arg1Bits - arg2Bits;
1071 if (resBits < 0)
1072 resBits = -resBits;
1073 sum += resBits;
1074 }
1075 FpDestReg_uqw = sum & mask(destSize * 8);
1076 '''
1077
1078 class Msrl(MediaOp):
1079 code = '''
1080
1081 assert(srcSize == destSize);
1082 int size = srcSize;
1083 int sizeBits = size * 8;
1084 int items = numItems(size);
1085 uint64_t shiftAmt = op2_uqw;
1086 uint64_t result = FpDestReg_uqw;
1087
1088 for (int i = 0; i < items; i++) {
1089 int hiIndex = (i + 1) * sizeBits - 1;
1090 int loIndex = (i + 0) * sizeBits;
1091 uint64_t arg1Bits = bits(FpSrcReg1_uqw, hiIndex, loIndex);
1092 uint64_t resBits;
1093 if (shiftAmt >= sizeBits) {
1094 resBits = 0;
1095 } else {
1096 resBits = (arg1Bits >> shiftAmt) &
1097 mask(sizeBits - shiftAmt);
1098 }
1099
1100 result = insertBits(result, hiIndex, loIndex, resBits);
1101 }
1102 FpDestReg_uqw = result;
1103 '''
1104
1105 class Msra(MediaOp):
1106 code = '''
1107
1108 assert(srcSize == destSize);
1109 int size = srcSize;
1110 int sizeBits = size * 8;
1111 int items = numItems(size);
1112 uint64_t shiftAmt = op2_uqw;
1113 uint64_t result = FpDestReg_uqw;
1114
1115 for (int i = 0; i < items; i++) {
1116 int hiIndex = (i + 1) * sizeBits - 1;
1117 int loIndex = (i + 0) * sizeBits;
1118 uint64_t arg1Bits = bits(FpSrcReg1_uqw, hiIndex, loIndex);
1119 uint64_t resBits;
1120 if (shiftAmt >= sizeBits) {
1121 if (bits(arg1Bits, sizeBits - 1))
1122 resBits = mask(sizeBits);
1123 else
1124 resBits = 0;
1125 } else {
1126 resBits = (arg1Bits >> shiftAmt);
1127 resBits = resBits |
1128 (0 - (resBits & (ULL(1) << (sizeBits - 1 - shiftAmt))));
1129 }
1130
1131 result = insertBits(result, hiIndex, loIndex, resBits);
1132 }
1133 FpDestReg_uqw = result;
1134 '''
1135
1136 class Msll(MediaOp):
1137 code = '''
1138
1139 assert(srcSize == destSize);
1140 int size = srcSize;
1141 int sizeBits = size * 8;
1142 int items = numItems(size);
1143 uint64_t shiftAmt = op2_uqw;
1144 uint64_t result = FpDestReg_uqw;
1145
1146 for (int i = 0; i < items; i++) {
1147 int hiIndex = (i + 1) * sizeBits - 1;
1148 int loIndex = (i + 0) * sizeBits;
1149 uint64_t arg1Bits = bits(FpSrcReg1_uqw, hiIndex, loIndex);
1150 uint64_t resBits;
1151 if (shiftAmt >= sizeBits) {
1152 resBits = 0;
1153 } else {
1154 resBits = (arg1Bits << shiftAmt);
1155 }
1156
1157 result = insertBits(result, hiIndex, loIndex, resBits);
1158 }
1159 FpDestReg_uqw = result;
1160 '''
1161
1162 class Cvtf2i(MediaOp):
1163 def __init__(self, dest, src, \
1164 size = None, destSize = None, srcSize = None, ext = None):
1165 super(Cvtf2i, self).__init__(dest, src,\
1166 "InstRegIndex(0)", size, destSize, srcSize, ext)
1167 code = '''
1168 union floatInt
1169 {
1170 float f;
1171 uint32_t i;
1172 };
1173 union doubleInt
1174 {
1175 double d;
1176 uint64_t i;
1177 };
1178
1179 assert(destSize == 4 || destSize == 8);
1180 assert(srcSize == 4 || srcSize == 8);
1181 int srcSizeBits = srcSize * 8;
1182 int destSizeBits = destSize * 8;
1183 int items;
1184 int srcStart = 0;
1185 int destStart = 0;
1186 if (srcSize == 2 * destSize) {
1187 items = numItems(srcSize);
1188 if (ext & 0x2)
1189 destStart = destSizeBits * items;
1190 } else if (destSize == 2 * srcSize) {
1191 items = numItems(destSize);
1192 if (ext & 0x2)
1193 srcStart = srcSizeBits * items;
1194 } else {
1195 items = numItems(destSize);
1196 }
1197 uint64_t result = FpDestReg_uqw;
1198
1199 for (int i = 0; i < items; i++) {
1200 int srcHiIndex = srcStart + (i + 1) * srcSizeBits - 1;
1201 int srcLoIndex = srcStart + (i + 0) * srcSizeBits;
1202 uint64_t argBits = bits(FpSrcReg1_uqw, srcHiIndex, srcLoIndex);
1203 double arg;
1204
1205 if (srcSize == 4) {
1206 floatInt fi;
1207 fi.i = argBits;
1208 arg = fi.f;
1209 } else {
1210 doubleInt di;
1211 di.i = argBits;
1212 arg = di.d;
1213 }
1214
1215 if (ext & 0x4) {
1216 if (arg >= 0)
1217 arg += 0.5;
1218 else
1219 arg -= 0.5;
1220 }
1221
1222 if (destSize == 4) {
1223 argBits = (uint32_t)arg;
1224 } else {
1225 argBits = (uint64_t)arg;
1226 }
1227 int destHiIndex = destStart + (i + 1) * destSizeBits - 1;
1228 int destLoIndex = destStart + (i + 0) * destSizeBits;
1229 result = insertBits(result, destHiIndex, destLoIndex, argBits);
1230 }
1231 FpDestReg_uqw = result;
1232 '''
1233
1234 class Cvti2f(MediaOp):
1235 def __init__(self, dest, src, \
1236 size = None, destSize = None, srcSize = None, ext = None):
1237 super(Cvti2f, self).__init__(dest, src,\
1238 "InstRegIndex(0)", size, destSize, srcSize, ext)
1239 code = '''
1240 union floatInt
1241 {
1242 float f;
1243 uint32_t i;
1244 };
1245 union doubleInt
1246 {
1247 double d;
1248 uint64_t i;
1249 };
1250
1251 assert(destSize == 4 || destSize == 8);
1252 assert(srcSize == 4 || srcSize == 8);
1253 int srcSizeBits = srcSize * 8;
1254 int destSizeBits = destSize * 8;
1255 int items;
1256 int srcStart = 0;
1257 int destStart = 0;
1258 if (srcSize == 2 * destSize) {
1259 items = numItems(srcSize);
1260 if (ext & 0x2)
1261 destStart = destSizeBits * items;
1262 } else if (destSize == 2 * srcSize) {
1263 items = numItems(destSize);
1264 if (ext & 0x2)
1265 srcStart = srcSizeBits * items;
1266 } else {
1267 items = numItems(destSize);
1268 }
1269 uint64_t result = FpDestReg_uqw;
1270
1271 for (int i = 0; i < items; i++) {
1272 int srcHiIndex = srcStart + (i + 1) * srcSizeBits - 1;
1273 int srcLoIndex = srcStart + (i + 0) * srcSizeBits;
1274 uint64_t argBits = bits(FpSrcReg1_uqw, srcHiIndex, srcLoIndex);
1275
1276 int64_t sArg = argBits |
1277 (0 - (argBits & (ULL(1) << (srcSizeBits - 1))));
1278 double arg = sArg;
1279
1280 if (destSize == 4) {
1281 floatInt fi;
1282 fi.f = arg;
1283 argBits = fi.i;
1284 } else {
1285 doubleInt di;
1286 di.d = arg;
1287 argBits = di.i;
1288 }
1289 int destHiIndex = destStart + (i + 1) * destSizeBits - 1;
1290 int destLoIndex = destStart + (i + 0) * destSizeBits;
1291 result = insertBits(result, destHiIndex, destLoIndex, argBits);
1292 }
1293 FpDestReg_uqw = result;
1294 '''
1295
1296 class Cvtf2f(MediaOp):
1297 def __init__(self, dest, src, \
1298 size = None, destSize = None, srcSize = None, ext = None):
1299 super(Cvtf2f, self).__init__(dest, src,\
1300 "InstRegIndex(0)", size, destSize, srcSize, ext)
1301 code = '''
1302 union floatInt
1303 {
1304 float f;
1305 uint32_t i;
1306 };
1307 union doubleInt
1308 {
1309 double d;
1310 uint64_t i;
1311 };
1312
1313 assert(destSize == 4 || destSize == 8);
1314 assert(srcSize == 4 || srcSize == 8);
1315 int srcSizeBits = srcSize * 8;
1316 int destSizeBits = destSize * 8;
1317 int items;
1318 int srcStart = 0;
1319 int destStart = 0;
1320 if (srcSize == 2 * destSize) {
1321 items = numItems(srcSize);
1322 if (ext & 0x2)
1323 destStart = destSizeBits * items;
1324 } else if (destSize == 2 * srcSize) {
1325 items = numItems(destSize);
1326 if (ext & 0x2)
1327 srcStart = srcSizeBits * items;
1328 } else {
1329 items = numItems(destSize);
1330 }
1331 uint64_t result = FpDestReg_uqw;
1332
1333 for (int i = 0; i < items; i++) {
1334 int srcHiIndex = srcStart + (i + 1) * srcSizeBits - 1;
1335 int srcLoIndex = srcStart + (i + 0) * srcSizeBits;
1336 uint64_t argBits = bits(FpSrcReg1_uqw, srcHiIndex, srcLoIndex);
1337 double arg;
1338
1339 if (srcSize == 4) {
1340 floatInt fi;
1341 fi.i = argBits;
1342 arg = fi.f;
1343 } else {
1344 doubleInt di;
1345 di.i = argBits;
1346 arg = di.d;
1347 }
1348 if (destSize == 4) {
1349 floatInt fi;
1350 fi.f = arg;
1351 argBits = fi.i;
1352 } else {
1353 doubleInt di;
1354 di.d = arg;
1355 argBits = di.i;
1356 }
1357 int destHiIndex = destStart + (i + 1) * destSizeBits - 1;
1358 int destLoIndex = destStart + (i + 0) * destSizeBits;
1359 result = insertBits(result, destHiIndex, destLoIndex, argBits);
1360 }
1361 FpDestReg_uqw = result;
1362 '''
1363
1364 class Mcmpi2r(MediaOp):
1365 code = '''
1366 union floatInt
1367 {
1368 float f;
1369 uint32_t i;
1370 };
1371 union doubleInt
1372 {
1373 double d;
1374 uint64_t i;
1375 };
1376
1377 assert(srcSize == destSize);
1378 int size = srcSize;
1379 int sizeBits = size * 8;
1380 int items = numItems(size);
1381 uint64_t result = FpDestReg_uqw;
1382
1383 for (int i = 0; i < items; i++) {
1384 int hiIndex = (i + 1) * sizeBits - 1;
1385 int loIndex = (i + 0) * sizeBits;
1386 uint64_t arg1Bits = bits(FpSrcReg1_uqw, hiIndex, loIndex);
1387 int64_t arg1 = arg1Bits |
1388 (0 - (arg1Bits & (ULL(1) << (sizeBits - 1))));
1389 uint64_t arg2Bits = bits(FpSrcReg2_uqw, hiIndex, loIndex);
1390 int64_t arg2 = arg2Bits |
1391 (0 - (arg2Bits & (ULL(1) << (sizeBits - 1))));
1392
1393 uint64_t resBits = 0;
1394 if (((ext & 0x2) == 0 && arg1 == arg2) ||
1395 ((ext & 0x2) == 0x2 && arg1 > arg2))
1396 resBits = mask(sizeBits);
1397
1398 result = insertBits(result, hiIndex, loIndex, resBits);
1399 }
1400 FpDestReg_uqw = result;
1401 '''
1402
1403 class Mcmpf2r(MediaOp):
1404 code = '''
1405 union floatInt
1406 {
1407 float f;
1408 uint32_t i;
1409 };
1410 union doubleInt
1411 {
1412 double d;
1413 uint64_t i;
1414 };
1415
1416 assert(srcSize == destSize);
1417 int size = srcSize;
1418 int sizeBits = size * 8;
1419 int items = numItems(size);
1420 uint64_t result = FpDestReg_uqw;
1421
1422 for (int i = 0; i < items; i++) {
1423 int hiIndex = (i + 1) * sizeBits - 1;
1424 int loIndex = (i + 0) * sizeBits;
1425 uint64_t arg1Bits = bits(FpSrcReg1_uqw, hiIndex, loIndex);
1426 uint64_t arg2Bits = bits(FpSrcReg2_uqw, hiIndex, loIndex);
1427 double arg1, arg2;
1428
1429 if (size == 4) {
1430 floatInt fi;
1431 fi.i = arg1Bits;
1432 arg1 = fi.f;
1433 fi.i = arg2Bits;
1434 arg2 = fi.f;
1435 } else {
1436 doubleInt di;
1437 di.i = arg1Bits;
1438 arg1 = di.d;
1439 di.i = arg2Bits;
1440 arg2 = di.d;
1441 }
1442
1443 uint64_t resBits = 0;
1444 bool nanop = std::isnan(arg1) || std::isnan(arg2);
1445 switch (ext & mask(3)) {
1446 case 0:
1447 if (arg1 == arg2 && !nanop)
1448 resBits = mask(sizeBits);
1449 break;
1450 case 1:
1451 if (arg1 < arg2 && !nanop)
1452 resBits = mask(sizeBits);
1453 break;
1454 case 2:
1455 if (arg1 <= arg2 && !nanop)
1456 resBits = mask(sizeBits);
1457 break;
1458 case 3:
1459 if (nanop)
1460 resBits = mask(sizeBits);
1461 break;
1462 case 4:
1463 if (arg1 != arg2 || nanop)
1464 resBits = mask(sizeBits);
1465 break;
1466 case 5:
1467 if (!(arg1 < arg2) || nanop)
1468 resBits = mask(sizeBits);
1469 break;
1470 case 6:
1471 if (!(arg1 <= arg2) || nanop)
1472 resBits = mask(sizeBits);
1473 break;
1474 case 7:
1475 if (!nanop)
1476 resBits = mask(sizeBits);
1477 break;
1478 };
1479
1480 result = insertBits(result, hiIndex, loIndex, resBits);
1481 }
1482 FpDestReg_uqw = result;
1483 '''
1484
1485 class Mcmpf2rf(MediaOp):
1486 def __init__(self, src1, src2,\
1487 size = None, destSize = None, srcSize = None, ext = None):
1488 super(Mcmpf2rf, self).__init__("InstRegIndex(0)", src1,\
1489 src2, size, destSize, srcSize, ext)
1490 code = '''
1491 union floatInt
1492 {
1493 float f;
1494 uint32_t i;
1495 };
1496 union doubleInt
1497 {
1498 double d;
1499 uint64_t i;
1500 };
1501
1502 assert(srcSize == destSize);
1503 assert(srcSize == 4 || srcSize == 8);
1504 int size = srcSize;
1505 int sizeBits = size * 8;
1506
1507 double arg1, arg2;
1508 uint64_t arg1Bits = bits(FpSrcReg1_uqw, sizeBits - 1, 0);
1509 uint64_t arg2Bits = bits(FpSrcReg2_uqw, sizeBits - 1, 0);
1510 if (size == 4) {
1511 floatInt fi;
1512 fi.i = arg1Bits;
1513 arg1 = fi.f;
1514 fi.i = arg2Bits;
1515 arg2 = fi.f;
1516 } else {
1517 doubleInt di;
1518 di.i = arg1Bits;
1519 arg1 = di.d;
1520 di.i = arg2Bits;
1521 arg2 = di.d;
1522 }
1523
1524 // ZF PF CF
1525 // Unordered 1 1 1
1526 // Greater than 0 0 0
1527 // Less than 0 0 1
1528 // Equal 1 0 0
1529 // OF = SF = AF = 0
1530 ccFlagBits = ccFlagBits & ~(SFBit | AFBit | ZFBit | PFBit);
1531 cfofBits = cfofBits & ~(OFBit | CFBit);
1532
1533 if (std::isnan(arg1) || std::isnan(arg2)) {
1534 ccFlagBits = ccFlagBits | (ZFBit | PFBit);
1535 cfofBits = cfofBits | CFBit;
1536 }
1537 else if(arg1 < arg2)
1538 cfofBits = cfofBits | CFBit;
1539 else if(arg1 == arg2)
1540 ccFlagBits = ccFlagBits | ZFBit;
1541 '''
1542
1543 class Emms(MediaOp):
1544 def __init__(self):
1545 super(Emms, self).__init__('InstRegIndex(MISCREG_FTW)',
1546 'InstRegIndex(0)', 'InstRegIndex(0)', 2)
1547 code = 'FTW = 0xFFFF;'
1548}};