Deleted Added
sdiff udiff text old ( 7620:3d8a23caa1ef ) new ( 7626:bdd926760470 )
full compact
1/// Copyright (c) 2009 The Regents of The University of Michigan
2// All rights reserved.
3//
4// Redistribution and use in source and binary forms, with or without
5// modification, are permitted provided that the following conditions are
6// met: redistributions of source code must retain the above copyright
7// notice, this list of conditions and the following disclaimer;
8// redistributions in binary form must reproduce the above copyright
9// notice, this list of conditions and the following disclaimer in the
10// documentation and/or other materials provided with the distribution;
11// neither the name of the copyright holders nor the names of its
12// contributors may be used to endorse or promote products derived from
13// this software without specific prior written permission.
14//
15// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
16// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
17// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
18// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
19// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
20// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
21// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
22// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
23// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
24// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
25// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
26//
27// Authors: Gabe Black
28
29def template MediaOpExecute {{
30 Fault %(class_name)s::execute(%(CPU_exec_context)s *xc,
31 Trace::InstRecord *traceData) const
32 {
33 Fault fault = NoFault;
34
35 %(op_decl)s;
36 %(op_rd)s;
37
38 %(code)s;
39
40 //Write the resulting state to the execution context
41 if(fault == NoFault)
42 {
43 %(op_wb)s;
44 }
45 return fault;
46 }
47}};
48
49def template MediaOpRegDeclare {{
50 class %(class_name)s : public %(base_class)s
51 {
52 public:
53 %(class_name)s(ExtMachInst _machInst,
54 const char * instMnem, uint64_t setFlags,
55 InstRegIndex _src1, InstRegIndex _src2, InstRegIndex _dest,
56 uint8_t _srcSize, uint8_t _destSize, uint16_t _ext);
57
58 %(BasicExecDeclare)s
59 };
60}};
61
62def template MediaOpImmDeclare {{
63
64 class %(class_name)s : public %(base_class)s
65 {
66 public:
67 %(class_name)s(ExtMachInst _machInst,
68 const char * instMnem, uint64_t setFlags,
69 InstRegIndex _src1, uint16_t _imm8, InstRegIndex _dest,
70 uint8_t _srcSize, uint8_t _destSize, uint16_t _ext);
71
72 %(BasicExecDeclare)s
73 };
74}};
75
76def template MediaOpRegConstructor {{
77 inline %(class_name)s::%(class_name)s(
78 ExtMachInst machInst, const char * instMnem, uint64_t setFlags,
79 InstRegIndex _src1, InstRegIndex _src2, InstRegIndex _dest,
80 uint8_t _srcSize, uint8_t _destSize, uint16_t _ext) :
81 %(base_class)s(machInst, "%(mnemonic)s", instMnem, setFlags,
82 _src1, _src2, _dest, _srcSize, _destSize, _ext,
83 %(op_class)s)
84 {
85 %(constructor)s;
86 }
87}};
88
89def template MediaOpImmConstructor {{
90 inline %(class_name)s::%(class_name)s(
91 ExtMachInst machInst, const char * instMnem, uint64_t setFlags,
92 InstRegIndex _src1, uint16_t _imm8, InstRegIndex _dest,
93 uint8_t _srcSize, uint8_t _destSize, uint16_t _ext) :
94 %(base_class)s(machInst, "%(mnemonic)s", instMnem, setFlags,
95 _src1, _imm8, _dest, _srcSize, _destSize, _ext,
96 %(op_class)s)
97 {
98 %(constructor)s;
99 }
100}};
101
102let {{
103 # Make these empty strings so that concatenating onto
104 # them will always work.
105 header_output = ""
106 decoder_output = ""
107 exec_output = ""
108
109 immTemplates = (
110 MediaOpImmDeclare,
111 MediaOpImmConstructor,
112 MediaOpExecute)
113
114 regTemplates = (
115 MediaOpRegDeclare,
116 MediaOpRegConstructor,
117 MediaOpExecute)
118
119 class MediaOpMeta(type):
120 def buildCppClasses(self, name, Name, suffix, code):
121
122 # Globals to stick the output in
123 global header_output
124 global decoder_output
125 global exec_output
126
127 # If op2 is used anywhere, make register and immediate versions
128 # of this code.
129 matcher = re.compile("(?<!\\w)(?P<prefix>s?)op2(?P<typeQual>\\.\\w+)?")
130 match = matcher.search(code)
131 if match:
132 typeQual = ""
133 if match.group("typeQual"):
134 typeQual = match.group("typeQual")
135 src2_name = "%sFpSrcReg2%s" % (match.group("prefix"), typeQual)
136 self.buildCppClasses(name, Name, suffix,
137 matcher.sub(src2_name, code))
138 self.buildCppClasses(name + "i", Name, suffix + "Imm",
139 matcher.sub("imm8", code))
140 return
141
142 base = "X86ISA::MediaOp"
143
144 # If imm8 shows up in the code, use the immediate templates, if
145 # not, hopefully the register ones will be correct.
146 matcher = re.compile("(?<!\w)imm8(?!\w)")
147 if matcher.search(code):
148 base += "Imm"
149 templates = immTemplates
150 else:
151 base += "Reg"
152 templates = regTemplates
153
154 # Get everything ready for the substitution
155 iop = InstObjParams(name, Name + suffix, base, {"code" : code})
156
157 # Generate the actual code (finally!)
158 header_output += templates[0].subst(iop)
159 decoder_output += templates[1].subst(iop)
160 exec_output += templates[2].subst(iop)
161
162
163 def __new__(mcls, Name, bases, dict):
164 abstract = False
165 name = Name.lower()
166 if "abstract" in dict:
167 abstract = dict['abstract']
168 del dict['abstract']
169
170 cls = super(MediaOpMeta, mcls).__new__(mcls, Name, bases, dict)
171 if not abstract:
172 cls.className = Name
173 cls.base_mnemonic = name
174 code = cls.code
175
176 # Set up the C++ classes
177 mcls.buildCppClasses(cls, name, Name, "", code)
178
179 # Hook into the microassembler dict
180 global microopClasses
181 microopClasses[name] = cls
182
183 # If op2 is used anywhere, make register and immediate versions
184 # of this code.
185 matcher = re.compile("op2(?P<typeQual>\\.\\w+)?")
186 if matcher.search(code):
187 microopClasses[name + 'i'] = cls
188 return cls
189
190
191 class MediaOp(X86Microop):
192 __metaclass__ = MediaOpMeta
193 # This class itself doesn't act as a microop
194 abstract = True
195
196 def __init__(self, dest, src1, op2,
197 size = None, destSize = None, srcSize = None, ext = None):
198 self.dest = dest
199 self.src1 = src1
200 self.op2 = op2
201 if size is not None:
202 self.srcSize = size
203 self.destSize = size
204 if srcSize is not None:
205 self.srcSize = srcSize
206 if destSize is not None:
207 self.destSize = destSize
208 if self.srcSize is None:
209 raise Exception, "Source size not set."
210 if self.destSize is None:
211 raise Exception, "Dest size not set."
212 if ext is None:
213 self.ext = 0
214 else:
215 self.ext = ext
216
217 def getAllocator(self, microFlags):
218 className = self.className
219 if self.mnemonic == self.base_mnemonic + 'i':
220 className += "Imm"
221 allocator = '''new %(class_name)s(machInst, macrocodeBlock,
222 %(flags)s, %(src1)s, %(op2)s, %(dest)s,
223 %(srcSize)s, %(destSize)s, %(ext)s)''' % {
224 "class_name" : className,
225 "flags" : self.microFlagsText(microFlags),
226 "src1" : self.src1, "op2" : self.op2,
227 "dest" : self.dest,
228 "srcSize" : self.srcSize,
229 "destSize" : self.destSize,
230 "ext" : self.ext}
231 return allocator
232
233 class Mov2int(MediaOp):
234 def __init__(self, dest, src1, src2 = 0, \
235 size = None, destSize = None, srcSize = None, ext = None):
236 super(Mov2int, self).__init__(dest, src1,\
237 src2, size, destSize, srcSize, ext)
238 code = '''
239 int items = sizeof(FloatRegBits) / srcSize;
240 int offset = imm8;
241 if (bits(src1, 0) && (ext & 0x1))
242 offset -= items;
243 if (offset >= 0 && offset < items) {
244 uint64_t fpSrcReg1 =
245 bits(FpSrcReg1.uqw,
246 (offset + 1) * srcSize * 8 - 1,
247 (offset + 0) * srcSize * 8);
248 DestReg = merge(0, fpSrcReg1, destSize);
249 } else {
250 DestReg = DestReg;
251 }
252 '''
253
254 class Mov2fp(MediaOp):
255 def __init__(self, dest, src1, src2 = 0, \
256 size = None, destSize = None, srcSize = None, ext = None):
257 super(Mov2fp, self).__init__(dest, src1,\
258 src2, size, destSize, srcSize, ext)
259 code = '''
260 int items = sizeof(FloatRegBits) / destSize;
261 int offset = imm8;
262 if (bits(dest, 0) && (ext & 0x1))
263 offset -= items;
264 if (offset >= 0 && offset < items) {
265 uint64_t srcReg1 = pick(SrcReg1, 0, srcSize);
266 FpDestReg.uqw =
267 insertBits(FpDestReg.uqw,
268 (offset + 1) * destSize * 8 - 1,
269 (offset + 0) * destSize * 8, srcReg1);
270 } else {
271 FpDestReg.uqw = FpDestReg.uqw;
272 }
273 '''
274
275 class Movsign(MediaOp):
276 def __init__(self, dest, src, \
277 size = None, destSize = None, srcSize = None, ext = None):
278 super(Movsign, self).__init__(dest, src,\
279 "InstRegIndex(0)", size, destSize, srcSize, ext)
280 code = '''
281 int items = sizeof(FloatRegBits) / srcSize;
282 uint64_t result = 0;
283 int offset = (ext & 0x1) ? items : 0;
284 for (int i = 0; i < items; i++) {
285 uint64_t picked =
286 bits(FpSrcReg1.uqw, (i + 1) * 8 * srcSize - 1);
287 result = insertBits(result, i + offset, i + offset, picked);
288 }
289 DestReg = DestReg | result;
290 '''
291
292 class Maskmov(MediaOp):
293 code = '''
294 assert(srcSize == destSize);
295 int size = srcSize;
296 int sizeBits = size * 8;
297 int items = numItems(size);
298 uint64_t result = FpDestReg.uqw;
299
300 for (int i = 0; i < items; i++) {
301 int hiIndex = (i + 1) * sizeBits - 1;
302 int loIndex = (i + 0) * sizeBits;
303 uint64_t arg1Bits = bits(FpSrcReg1.uqw, hiIndex, loIndex);
304 if (bits(FpSrcReg2.uqw, hiIndex))
305 result = insertBits(result, hiIndex, loIndex, arg1Bits);
306 }
307 FpDestReg.uqw = result;
308 '''
309
310 class shuffle(MediaOp):
311 code = '''
312 assert(srcSize == destSize);
313 int size = srcSize;
314 int sizeBits = size * 8;
315 int items = sizeof(FloatRegBits) / size;
316 int options;
317 int optionBits;
318 if (size == 8) {
319 options = 2;
320 optionBits = 1;
321 } else {
322 options = 4;
323 optionBits = 2;
324 }
325
326 uint64_t result = 0;
327 uint8_t sel = ext;
328
329 for (int i = 0; i < items; i++) {
330 uint64_t resBits;
331 uint8_t lsel = sel & mask(optionBits);
332 if (lsel * size >= sizeof(FloatRegBits)) {
333 lsel -= options / 2;
334 resBits = bits(FpSrcReg2.uqw,
335 (lsel + 1) * sizeBits - 1,
336 (lsel + 0) * sizeBits);
337 } else {
338 resBits = bits(FpSrcReg1.uqw,
339 (lsel + 1) * sizeBits - 1,
340 (lsel + 0) * sizeBits);
341 }
342
343 sel >>= optionBits;
344
345 int hiIndex = (i + 1) * sizeBits - 1;
346 int loIndex = (i + 0) * sizeBits;
347 result = insertBits(result, hiIndex, loIndex, resBits);
348 }
349 FpDestReg.uqw = result;
350 '''
351
352 class Unpack(MediaOp):
353 code = '''
354 assert(srcSize == destSize);
355 int size = destSize;
356 int items = (sizeof(FloatRegBits) / size) / 2;
357 int offset = ext ? items : 0;
358 uint64_t result = 0;
359 for (int i = 0; i < items; i++) {
360 uint64_t pickedLow =
361 bits(FpSrcReg1.uqw, (i + offset + 1) * 8 * size - 1,
362 (i + offset) * 8 * size);
363 result = insertBits(result,
364 (2 * i + 1) * 8 * size - 1,
365 (2 * i + 0) * 8 * size,
366 pickedLow);
367 uint64_t pickedHigh =
368 bits(FpSrcReg2.uqw, (i + offset + 1) * 8 * size - 1,
369 (i + offset) * 8 * size);
370 result = insertBits(result,
371 (2 * i + 2) * 8 * size - 1,
372 (2 * i + 1) * 8 * size,
373 pickedHigh);
374 }
375 FpDestReg.uqw = result;
376 '''
377
378 class Pack(MediaOp):
379 code = '''
380 assert(srcSize == destSize * 2);
381 int items = (sizeof(FloatRegBits) / destSize);
382 int destBits = destSize * 8;
383 int srcBits = srcSize * 8;
384 uint64_t result = 0;
385 int i;
386 for (i = 0; i < items / 2; i++) {
387 uint64_t picked =
388 bits(FpSrcReg1.uqw, (i + 1) * srcBits - 1,
389 (i + 0) * srcBits);
390 unsigned signBit = bits(picked, srcBits - 1);
391 uint64_t overflow = bits(picked, srcBits - 1, destBits - 1);
392
393 // Handle saturation.
394 if (signBit) {
395 if (overflow != mask(destBits - srcBits + 1)) {
396 if (signedOp())
397 picked = (ULL(1) << (destBits - 1));
398 else
399 picked = 0;
400 }
401 } else {
402 if (overflow != 0) {
403 if (signedOp())
404 picked = mask(destBits - 1);
405 else
406 picked = mask(destBits);
407 }
408 }
409 result = insertBits(result,
410 (i + 1) * destBits - 1,
411 (i + 0) * destBits,
412 picked);
413 }
414 for (;i < items; i++) {
415 uint64_t picked =
416 bits(FpSrcReg2.uqw, (i - items + 1) * srcBits - 1,
417 (i - items + 0) * srcBits);
418 unsigned signBit = bits(picked, srcBits - 1);
419 uint64_t overflow = bits(picked, srcBits - 1, destBits - 1);
420
421 // Handle saturation.
422 if (signBit) {
423 if (overflow != mask(destBits - srcBits + 1)) {
424 if (signedOp())
425 picked = (ULL(1) << (destBits - 1));
426 else
427 picked = 0;
428 }
429 } else {
430 if (overflow != 0) {
431 if (signedOp())
432 picked = mask(destBits - 1);
433 else
434 picked = mask(destBits);
435 }
436 }
437 result = insertBits(result,
438 (i + 1) * destBits - 1,
439 (i + 0) * destBits,
440 picked);
441 }
442 FpDestReg.uqw = result;
443 '''
444
445 class Mxor(MediaOp):
446 def __init__(self, dest, src1, src2):
447 super(Mxor, self).__init__(dest, src1, src2, 1)
448 code = '''
449 FpDestReg.uqw = FpSrcReg1.uqw ^ FpSrcReg2.uqw;
450 '''
451
452 class Mor(MediaOp):
453 def __init__(self, dest, src1, src2):
454 super(Mor, self).__init__(dest, src1, src2, 1)
455 code = '''
456 FpDestReg.uqw = FpSrcReg1.uqw | FpSrcReg2.uqw;
457 '''
458
459 class Mand(MediaOp):
460 def __init__(self, dest, src1, src2):
461 super(Mand, self).__init__(dest, src1, src2, 1)
462 code = '''
463 FpDestReg.uqw = FpSrcReg1.uqw & FpSrcReg2.uqw;
464 '''
465
466 class Mandn(MediaOp):
467 def __init__(self, dest, src1, src2):
468 super(Mandn, self).__init__(dest, src1, src2, 1)
469 code = '''
470 FpDestReg.uqw = ~FpSrcReg1.uqw & FpSrcReg2.uqw;
471 '''
472
473 class Mminf(MediaOp):
474 code = '''
475 union floatInt
476 {
477 float f;
478 uint32_t i;
479 };
480 union doubleInt
481 {
482 double d;
483 uint64_t i;
484 };
485
486 assert(srcSize == destSize);
487 int size = srcSize;
488 int sizeBits = size * 8;
489 assert(srcSize == 4 || srcSize == 8);
490 int items = numItems(size);
491 uint64_t result = FpDestReg.uqw;
492
493 for (int i = 0; i < items; i++) {
494 double arg1, arg2;
495 int hiIndex = (i + 1) * sizeBits - 1;
496 int loIndex = (i + 0) * sizeBits;
497 uint64_t arg1Bits = bits(FpSrcReg1.uqw, hiIndex, loIndex);
498 uint64_t arg2Bits = bits(FpSrcReg2.uqw, hiIndex, loIndex);
499
500 if (size == 4) {
501 floatInt fi;
502 fi.i = arg1Bits;
503 arg1 = fi.f;
504 fi.i = arg2Bits;
505 arg2 = fi.f;
506 } else {
507 doubleInt di;
508 di.i = arg1Bits;
509 arg1 = di.d;
510 di.i = arg2Bits;
511 arg2 = di.d;
512 }
513
514 if (arg1 < arg2) {
515 result = insertBits(result, hiIndex, loIndex, arg1Bits);
516 } else {
517 result = insertBits(result, hiIndex, loIndex, arg2Bits);
518 }
519 }
520 FpDestReg.uqw = result;
521 '''
522
523 class Mmaxf(MediaOp):
524 code = '''
525 union floatInt
526 {
527 float f;
528 uint32_t i;
529 };
530 union doubleInt
531 {
532 double d;
533 uint64_t i;
534 };
535
536 assert(srcSize == destSize);
537 int size = srcSize;
538 int sizeBits = size * 8;
539 assert(srcSize == 4 || srcSize == 8);
540 int items = numItems(size);
541 uint64_t result = FpDestReg.uqw;
542
543 for (int i = 0; i < items; i++) {
544 double arg1, arg2;
545 int hiIndex = (i + 1) * sizeBits - 1;
546 int loIndex = (i + 0) * sizeBits;
547 uint64_t arg1Bits = bits(FpSrcReg1.uqw, hiIndex, loIndex);
548 uint64_t arg2Bits = bits(FpSrcReg2.uqw, hiIndex, loIndex);
549
550 if (size == 4) {
551 floatInt fi;
552 fi.i = arg1Bits;
553 arg1 = fi.f;
554 fi.i = arg2Bits;
555 arg2 = fi.f;
556 } else {
557 doubleInt di;
558 di.i = arg1Bits;
559 arg1 = di.d;
560 di.i = arg2Bits;
561 arg2 = di.d;
562 }
563
564 if (arg1 > arg2) {
565 result = insertBits(result, hiIndex, loIndex, arg1Bits);
566 } else {
567 result = insertBits(result, hiIndex, loIndex, arg2Bits);
568 }
569 }
570 FpDestReg.uqw = result;
571 '''
572
573 class Mmini(MediaOp):
574 code = '''
575
576 assert(srcSize == destSize);
577 int size = srcSize;
578 int sizeBits = size * 8;
579 int items = numItems(size);
580 uint64_t result = FpDestReg.uqw;
581
582 for (int i = 0; i < items; i++) {
583 int hiIndex = (i + 1) * sizeBits - 1;
584 int loIndex = (i + 0) * sizeBits;
585 uint64_t arg1Bits = bits(FpSrcReg1.uqw, hiIndex, loIndex);
586 int64_t arg1 = arg1Bits |
587 (0 - (arg1Bits & (ULL(1) << (sizeBits - 1))));
588 uint64_t arg2Bits = bits(FpSrcReg2.uqw, hiIndex, loIndex);
589 int64_t arg2 = arg2Bits |
590 (0 - (arg2Bits & (ULL(1) << (sizeBits - 1))));
591 uint64_t resBits;
592
593 if (signedOp()) {
594 if (arg1 < arg2) {
595 resBits = arg1Bits;
596 } else {
597 resBits = arg2Bits;
598 }
599 } else {
600 if (arg1Bits < arg2Bits) {
601 resBits = arg1Bits;
602 } else {
603 resBits = arg2Bits;
604 }
605 }
606 result = insertBits(result, hiIndex, loIndex, resBits);
607 }
608 FpDestReg.uqw = result;
609 '''
610
611 class Mmaxi(MediaOp):
612 code = '''
613
614 assert(srcSize == destSize);
615 int size = srcSize;
616 int sizeBits = size * 8;
617 int items = numItems(size);
618 uint64_t result = FpDestReg.uqw;
619
620 for (int i = 0; i < items; i++) {
621 int hiIndex = (i + 1) * sizeBits - 1;
622 int loIndex = (i + 0) * sizeBits;
623 uint64_t arg1Bits = bits(FpSrcReg1.uqw, hiIndex, loIndex);
624 int64_t arg1 = arg1Bits |
625 (0 - (arg1Bits & (ULL(1) << (sizeBits - 1))));
626 uint64_t arg2Bits = bits(FpSrcReg2.uqw, hiIndex, loIndex);
627 int64_t arg2 = arg2Bits |
628 (0 - (arg2Bits & (ULL(1) << (sizeBits - 1))));
629 uint64_t resBits;
630
631 if (signedOp()) {
632 if (arg1 > arg2) {
633 resBits = arg1Bits;
634 } else {
635 resBits = arg2Bits;
636 }
637 } else {
638 if (arg1Bits > arg2Bits) {
639 resBits = arg1Bits;
640 } else {
641 resBits = arg2Bits;
642 }
643 }
644 result = insertBits(result, hiIndex, loIndex, resBits);
645 }
646 FpDestReg.uqw = result;
647 '''
648
649 class Msqrt(MediaOp):
650 def __init__(self, dest, src, \
651 size = None, destSize = None, srcSize = None, ext = None):
652 super(Msqrt, self).__init__(dest, src,\
653 "InstRegIndex(0)", size, destSize, srcSize, ext)
654 code = '''
655 union floatInt
656 {
657 float f;
658 uint32_t i;
659 };
660 union doubleInt
661 {
662 double d;
663 uint64_t i;
664 };
665
666 assert(srcSize == destSize);
667 int size = srcSize;
668 int sizeBits = size * 8;
669 assert(srcSize == 4 || srcSize == 8);
670 int items = numItems(size);
671 uint64_t result = FpDestReg.uqw;
672
673 for (int i = 0; i < items; i++) {
674 int hiIndex = (i + 1) * sizeBits - 1;
675 int loIndex = (i + 0) * sizeBits;
676 uint64_t argBits = bits(FpSrcReg1.uqw, hiIndex, loIndex);
677
678 if (size == 4) {
679 floatInt fi;
680 fi.i = argBits;
681 fi.f = sqrt(fi.f);
682 argBits = fi.i;
683 } else {
684 doubleInt di;
685 di.i = argBits;
686 di.d = sqrt(di.d);
687 argBits = di.i;
688 }
689 result = insertBits(result, hiIndex, loIndex, argBits);
690 }
691 FpDestReg.uqw = result;
692 '''
693
694 class Maddf(MediaOp):
695 code = '''
696 union floatInt
697 {
698 float f;
699 uint32_t i;
700 };
701 union doubleInt
702 {
703 double d;
704 uint64_t i;
705 };
706
707 assert(srcSize == destSize);
708 int size = srcSize;
709 int sizeBits = size * 8;
710 assert(srcSize == 4 || srcSize == 8);
711 int items = numItems(size);
712 uint64_t result = FpDestReg.uqw;
713
714 for (int i = 0; i < items; i++) {
715 int hiIndex = (i + 1) * sizeBits - 1;
716 int loIndex = (i + 0) * sizeBits;
717 uint64_t arg1Bits = bits(FpSrcReg1.uqw, hiIndex, loIndex);
718 uint64_t arg2Bits = bits(FpSrcReg2.uqw, hiIndex, loIndex);
719 uint64_t resBits;
720
721 if (size == 4) {
722 floatInt arg1, arg2, res;
723 arg1.i = arg1Bits;
724 arg2.i = arg2Bits;
725 res.f = arg1.f + arg2.f;
726 resBits = res.i;
727 } else {
728 doubleInt arg1, arg2, res;
729 arg1.i = arg1Bits;
730 arg2.i = arg2Bits;
731 res.d = arg1.d + arg2.d;
732 resBits = res.i;
733 }
734
735 result = insertBits(result, hiIndex, loIndex, resBits);
736 }
737 FpDestReg.uqw = result;
738 '''
739
740 class Msubf(MediaOp):
741 code = '''
742 union floatInt
743 {
744 float f;
745 uint32_t i;
746 };
747 union doubleInt
748 {
749 double d;
750 uint64_t i;
751 };
752
753 assert(srcSize == destSize);
754 int size = srcSize;
755 int sizeBits = size * 8;
756 assert(srcSize == 4 || srcSize == 8);
757 int items = numItems(size);
758 uint64_t result = FpDestReg.uqw;
759
760 for (int i = 0; i < items; i++) {
761 int hiIndex = (i + 1) * sizeBits - 1;
762 int loIndex = (i + 0) * sizeBits;
763 uint64_t arg1Bits = bits(FpSrcReg1.uqw, hiIndex, loIndex);
764 uint64_t arg2Bits = bits(FpSrcReg2.uqw, hiIndex, loIndex);
765 uint64_t resBits;
766
767 if (size == 4) {
768 floatInt arg1, arg2, res;
769 arg1.i = arg1Bits;
770 arg2.i = arg2Bits;
771 res.f = arg1.f - arg2.f;
772 resBits = res.i;
773 } else {
774 doubleInt arg1, arg2, res;
775 arg1.i = arg1Bits;
776 arg2.i = arg2Bits;
777 res.d = arg1.d - arg2.d;
778 resBits = res.i;
779 }
780
781 result = insertBits(result, hiIndex, loIndex, resBits);
782 }
783 FpDestReg.uqw = result;
784 '''
785
786 class Mmulf(MediaOp):
787 code = '''
788 union floatInt
789 {
790 float f;
791 uint32_t i;
792 };
793 union doubleInt
794 {
795 double d;
796 uint64_t i;
797 };
798
799 assert(srcSize == destSize);
800 int size = srcSize;
801 int sizeBits = size * 8;
802 assert(srcSize == 4 || srcSize == 8);
803 int items = numItems(size);
804 uint64_t result = FpDestReg.uqw;
805
806 for (int i = 0; i < items; i++) {
807 int hiIndex = (i + 1) * sizeBits - 1;
808 int loIndex = (i + 0) * sizeBits;
809 uint64_t arg1Bits = bits(FpSrcReg1.uqw, hiIndex, loIndex);
810 uint64_t arg2Bits = bits(FpSrcReg2.uqw, hiIndex, loIndex);
811 uint64_t resBits;
812
813 if (size == 4) {
814 floatInt arg1, arg2, res;
815 arg1.i = arg1Bits;
816 arg2.i = arg2Bits;
817 res.f = arg1.f * arg2.f;
818 resBits = res.i;
819 } else {
820 doubleInt arg1, arg2, res;
821 arg1.i = arg1Bits;
822 arg2.i = arg2Bits;
823 res.d = arg1.d * arg2.d;
824 resBits = res.i;
825 }
826
827 result = insertBits(result, hiIndex, loIndex, resBits);
828 }
829 FpDestReg.uqw = result;
830 '''
831
832 class Mdivf(MediaOp):
833 code = '''
834 union floatInt
835 {
836 float f;
837 uint32_t i;
838 };
839 union doubleInt
840 {
841 double d;
842 uint64_t i;
843 };
844
845 assert(srcSize == destSize);
846 int size = srcSize;
847 int sizeBits = size * 8;
848 assert(srcSize == 4 || srcSize == 8);
849 int items = numItems(size);
850 uint64_t result = FpDestReg.uqw;
851
852 for (int i = 0; i < items; i++) {
853 int hiIndex = (i + 1) * sizeBits - 1;
854 int loIndex = (i + 0) * sizeBits;
855 uint64_t arg1Bits = bits(FpSrcReg1.uqw, hiIndex, loIndex);
856 uint64_t arg2Bits = bits(FpSrcReg2.uqw, hiIndex, loIndex);
857 uint64_t resBits;
858
859 if (size == 4) {
860 floatInt arg1, arg2, res;
861 arg1.i = arg1Bits;
862 arg2.i = arg2Bits;
863 res.f = arg1.f / arg2.f;
864 resBits = res.i;
865 } else {
866 doubleInt arg1, arg2, res;
867 arg1.i = arg1Bits;
868 arg2.i = arg2Bits;
869 res.d = arg1.d / arg2.d;
870 resBits = res.i;
871 }
872
873 result = insertBits(result, hiIndex, loIndex, resBits);
874 }
875 FpDestReg.uqw = result;
876 '''
877
878 class Maddi(MediaOp):
879 code = '''
880 assert(srcSize == destSize);
881 int size = srcSize;
882 int sizeBits = size * 8;
883 int items = numItems(size);
884 uint64_t result = FpDestReg.uqw;
885
886 for (int i = 0; i < items; i++) {
887 int hiIndex = (i + 1) * sizeBits - 1;
888 int loIndex = (i + 0) * sizeBits;
889 uint64_t arg1Bits = bits(FpSrcReg1.uqw, hiIndex, loIndex);
890 uint64_t arg2Bits = bits(FpSrcReg2.uqw, hiIndex, loIndex);
891 uint64_t resBits = arg1Bits + arg2Bits;
892
893 if (ext & 0x2) {
894 if (signedOp()) {
895 int arg1Sign = bits(arg1Bits, sizeBits - 1);
896 int arg2Sign = bits(arg2Bits, sizeBits - 1);
897 int resSign = bits(resBits, sizeBits - 1);
898 if ((arg1Sign == arg2Sign) && (arg1Sign != resSign)) {
899 if (resSign == 0)
900 resBits = (ULL(1) << (sizeBits - 1));
901 else
902 resBits = mask(sizeBits - 1);
903 }
904 } else {
905 if (findCarry(sizeBits, resBits, arg1Bits, arg2Bits))
906 resBits = mask(sizeBits);
907 }
908 }
909
910 result = insertBits(result, hiIndex, loIndex, resBits);
911 }
912 FpDestReg.uqw = result;
913 '''
914
915 class Msubi(MediaOp):
916 code = '''
917 assert(srcSize == destSize);
918 int size = srcSize;
919 int sizeBits = size * 8;
920 int items = numItems(size);
921 uint64_t result = FpDestReg.uqw;
922
923 for (int i = 0; i < items; i++) {
924 int hiIndex = (i + 1) * sizeBits - 1;
925 int loIndex = (i + 0) * sizeBits;
926 uint64_t arg1Bits = bits(FpSrcReg1.uqw, hiIndex, loIndex);
927 uint64_t arg2Bits = bits(FpSrcReg2.uqw, hiIndex, loIndex);
928 uint64_t resBits = arg1Bits - arg2Bits;
929
930 if (ext & 0x2) {
931 if (signedOp()) {
932 int arg1Sign = bits(arg1Bits, sizeBits - 1);
933 int arg2Sign = !bits(arg2Bits, sizeBits - 1);
934 int resSign = bits(resBits, sizeBits - 1);
935 if ((arg1Sign == arg2Sign) && (arg1Sign != resSign)) {
936 if (resSign == 0)
937 resBits = (ULL(1) << (sizeBits - 1));
938 else
939 resBits = mask(sizeBits - 1);
940 }
941 } else {
942 if (arg2Bits > arg1Bits) {
943 resBits = 0;
944 } else if (!findCarry(sizeBits, resBits,
945 arg1Bits, ~arg2Bits)) {
946 resBits = mask(sizeBits);
947 }
948 }
949 }
950
951 result = insertBits(result, hiIndex, loIndex, resBits);
952 }
953 FpDestReg.uqw = result;
954 '''
955
956 class Mmuli(MediaOp):
957 code = '''
958 int srcBits = srcSize * 8;
959 int destBits = destSize * 8;
960 assert(destBits <= 64);
961 assert(destSize >= srcSize);
962 int items = numItems(destSize);
963 uint64_t result = FpDestReg.uqw;
964
965 for (int i = 0; i < items; i++) {
966 int offset = 0;
967 if (ext & 16) {
968 if (ext & 32)
969 offset = i * (destBits - srcBits);
970 else
971 offset = i * (destBits - srcBits) + srcBits;
972 }
973 int srcHiIndex = (i + 1) * srcBits - 1 + offset;
974 int srcLoIndex = (i + 0) * srcBits + offset;
975 uint64_t arg1Bits = bits(FpSrcReg1.uqw, srcHiIndex, srcLoIndex);
976 uint64_t arg2Bits = bits(FpSrcReg2.uqw, srcHiIndex, srcLoIndex);
977 uint64_t resBits;
978
979 if (signedOp()) {
980 int64_t arg1 = arg1Bits |
981 (0 - (arg1Bits & (ULL(1) << (srcBits - 1))));
982 int64_t arg2 = arg2Bits |
983 (0 - (arg2Bits & (ULL(1) << (srcBits - 1))));
984 resBits = (uint64_t)(arg1 * arg2);
985 } else {
986 resBits = arg1Bits * arg2Bits;
987 }
988
989 if (ext & 0x4)
990 resBits += (ULL(1) << (destBits - 1));
991
992 if (multHi())
993 resBits >>= destBits;
994
995 int destHiIndex = (i + 1) * destBits - 1;
996 int destLoIndex = (i + 0) * destBits;
997 result = insertBits(result, destHiIndex, destLoIndex, resBits);
998 }
999 FpDestReg.uqw = result;
1000 '''
1001
1002 class Mavg(MediaOp):
1003 code = '''
1004 assert(srcSize == destSize);
1005 int size = srcSize;
1006 int sizeBits = size * 8;
1007 int items = numItems(size);
1008 uint64_t result = FpDestReg.uqw;
1009
1010 for (int i = 0; i < items; i++) {
1011 int hiIndex = (i + 1) * sizeBits - 1;
1012 int loIndex = (i + 0) * sizeBits;
1013 uint64_t arg1Bits = bits(FpSrcReg1.uqw, hiIndex, loIndex);
1014 uint64_t arg2Bits = bits(FpSrcReg2.uqw, hiIndex, loIndex);
1015 uint64_t resBits = (arg1Bits + arg2Bits + 1) / 2;
1016
1017 result = insertBits(result, hiIndex, loIndex, resBits);
1018 }
1019 FpDestReg.uqw = result;
1020 '''
1021
1022 class Msad(MediaOp):
1023 code = '''
1024 int srcBits = srcSize * 8;
1025 int items = sizeof(FloatRegBits) / srcSize;
1026
1027 uint64_t sum = 0;
1028 for (int i = 0; i < items; i++) {
1029 int hiIndex = (i + 1) * srcBits - 1;
1030 int loIndex = (i + 0) * srcBits;
1031 uint64_t arg1Bits = bits(FpSrcReg1.uqw, hiIndex, loIndex);
1032 uint64_t arg2Bits = bits(FpSrcReg2.uqw, hiIndex, loIndex);
1033 int64_t resBits = arg1Bits - arg2Bits;
1034 if (resBits < 0)
1035 resBits = -resBits;
1036 sum += resBits;
1037 }
1038 FpDestReg.uqw = sum & mask(destSize * 8);
1039 '''
1040
1041 class Msrl(MediaOp):
1042 code = '''
1043
1044 assert(srcSize == destSize);
1045 int size = srcSize;
1046 int sizeBits = size * 8;
1047 int items = numItems(size);
1048 uint64_t shiftAmt = op2.uqw;
1049 uint64_t result = FpDestReg.uqw;
1050
1051 for (int i = 0; i < items; i++) {
1052 int hiIndex = (i + 1) * sizeBits - 1;
1053 int loIndex = (i + 0) * sizeBits;
1054 uint64_t arg1Bits = bits(FpSrcReg1.uqw, hiIndex, loIndex);
1055 uint64_t resBits;
1056 if (shiftAmt >= sizeBits) {
1057 resBits = 0;
1058 } else {
1059 resBits = (arg1Bits >> shiftAmt) &
1060 mask(sizeBits - shiftAmt);
1061 }
1062
1063 result = insertBits(result, hiIndex, loIndex, resBits);
1064 }
1065 FpDestReg.uqw = result;
1066 '''
1067
1068 class Msra(MediaOp):
1069 code = '''
1070
1071 assert(srcSize == destSize);
1072 int size = srcSize;
1073 int sizeBits = size * 8;
1074 int items = numItems(size);
1075 uint64_t shiftAmt = op2.uqw;
1076 uint64_t result = FpDestReg.uqw;
1077
1078 for (int i = 0; i < items; i++) {
1079 int hiIndex = (i + 1) * sizeBits - 1;
1080 int loIndex = (i + 0) * sizeBits;
1081 uint64_t arg1Bits = bits(FpSrcReg1.uqw, hiIndex, loIndex);
1082 uint64_t resBits;
1083 if (shiftAmt >= sizeBits) {
1084 if (bits(arg1Bits, sizeBits - 1))
1085 resBits = mask(sizeBits);
1086 else
1087 resBits = 0;
1088 } else {
1089 resBits = (arg1Bits >> shiftAmt);
1090 resBits = resBits |
1091 (0 - (resBits & (ULL(1) << (sizeBits - 1 - shiftAmt))));
1092 }
1093
1094 result = insertBits(result, hiIndex, loIndex, resBits);
1095 }
1096 FpDestReg.uqw = result;
1097 '''
1098
1099 class Msll(MediaOp):
1100 code = '''
1101
1102 assert(srcSize == destSize);
1103 int size = srcSize;
1104 int sizeBits = size * 8;
1105 int items = numItems(size);
1106 uint64_t shiftAmt = op2.uqw;
1107 uint64_t result = FpDestReg.uqw;
1108
1109 for (int i = 0; i < items; i++) {
1110 int hiIndex = (i + 1) * sizeBits - 1;
1111 int loIndex = (i + 0) * sizeBits;
1112 uint64_t arg1Bits = bits(FpSrcReg1.uqw, hiIndex, loIndex);
1113 uint64_t resBits;
1114 if (shiftAmt >= sizeBits) {
1115 resBits = 0;
1116 } else {
1117 resBits = (arg1Bits << shiftAmt);
1118 }
1119
1120 result = insertBits(result, hiIndex, loIndex, resBits);
1121 }
1122 FpDestReg.uqw = result;
1123 '''
1124
1125 class Cvtf2i(MediaOp):
1126 def __init__(self, dest, src, \
1127 size = None, destSize = None, srcSize = None, ext = None):
1128 super(Cvtf2i, self).__init__(dest, src,\
1129 "InstRegIndex(0)", size, destSize, srcSize, ext)
1130 code = '''
1131 union floatInt
1132 {
1133 float f;
1134 uint32_t i;
1135 };
1136 union doubleInt
1137 {
1138 double d;
1139 uint64_t i;
1140 };
1141
1142 assert(destSize == 4 || destSize == 8);
1143 assert(srcSize == 4 || srcSize == 8);
1144 int srcSizeBits = srcSize * 8;
1145 int destSizeBits = destSize * 8;
1146 int items;
1147 int srcStart = 0;
1148 int destStart = 0;
1149 if (srcSize == 2 * destSize) {
1150 items = numItems(srcSize);
1151 if (ext & 0x2)
1152 destStart = destSizeBits * items;
1153 } else if (destSize == 2 * srcSize) {
1154 items = numItems(destSize);
1155 if (ext & 0x2)
1156 srcStart = srcSizeBits * items;
1157 } else {
1158 items = numItems(destSize);
1159 }
1160 uint64_t result = FpDestReg.uqw;
1161
1162 for (int i = 0; i < items; i++) {
1163 int srcHiIndex = srcStart + (i + 1) * srcSizeBits - 1;
1164 int srcLoIndex = srcStart + (i + 0) * srcSizeBits;
1165 uint64_t argBits = bits(FpSrcReg1.uqw, srcHiIndex, srcLoIndex);
1166 double arg;
1167
1168 if (srcSize == 4) {
1169 floatInt fi;
1170 fi.i = argBits;
1171 arg = fi.f;
1172 } else {
1173 doubleInt di;
1174 di.i = argBits;
1175 arg = di.d;
1176 }
1177
1178 if (ext & 0x4) {
1179 if (arg >= 0)
1180 arg += 0.5;
1181 else
1182 arg -= 0.5;
1183 }
1184
1185 if (destSize == 4) {
1186 argBits = (uint32_t)arg;
1187 } else {
1188 argBits = (uint64_t)arg;
1189 }
1190 int destHiIndex = destStart + (i + 1) * destSizeBits - 1;
1191 int destLoIndex = destStart + (i + 0) * destSizeBits;
1192 result = insertBits(result, destHiIndex, destLoIndex, argBits);
1193 }
1194 FpDestReg.uqw = result;
1195 '''
1196
1197 class Cvti2f(MediaOp):
1198 def __init__(self, dest, src, \
1199 size = None, destSize = None, srcSize = None, ext = None):
1200 super(Cvti2f, self).__init__(dest, src,\
1201 "InstRegIndex(0)", size, destSize, srcSize, ext)
1202 code = '''
1203 union floatInt
1204 {
1205 float f;
1206 uint32_t i;
1207 };
1208 union doubleInt
1209 {
1210 double d;
1211 uint64_t i;
1212 };
1213
1214 assert(destSize == 4 || destSize == 8);
1215 assert(srcSize == 4 || srcSize == 8);
1216 int srcSizeBits = srcSize * 8;
1217 int destSizeBits = destSize * 8;
1218 int items;
1219 int srcStart = 0;
1220 int destStart = 0;
1221 if (srcSize == 2 * destSize) {
1222 items = numItems(srcSize);
1223 if (ext & 0x2)
1224 destStart = destSizeBits * items;
1225 } else if (destSize == 2 * srcSize) {
1226 items = numItems(destSize);
1227 if (ext & 0x2)
1228 srcStart = srcSizeBits * items;
1229 } else {
1230 items = numItems(destSize);
1231 }
1232 uint64_t result = FpDestReg.uqw;
1233
1234 for (int i = 0; i < items; i++) {
1235 int srcHiIndex = srcStart + (i + 1) * srcSizeBits - 1;
1236 int srcLoIndex = srcStart + (i + 0) * srcSizeBits;
1237 uint64_t argBits = bits(FpSrcReg1.uqw, srcHiIndex, srcLoIndex);
1238
1239 int64_t sArg = argBits |
1240 (0 - (argBits & (ULL(1) << (srcSizeBits - 1))));
1241 double arg = sArg;
1242
1243 if (destSize == 4) {
1244 floatInt fi;
1245 fi.f = arg;
1246 argBits = fi.i;
1247 } else {
1248 doubleInt di;
1249 di.d = arg;
1250 argBits = di.i;
1251 }
1252 int destHiIndex = destStart + (i + 1) * destSizeBits - 1;
1253 int destLoIndex = destStart + (i + 0) * destSizeBits;
1254 result = insertBits(result, destHiIndex, destLoIndex, argBits);
1255 }
1256 FpDestReg.uqw = result;
1257 '''
1258
1259 class Cvtf2f(MediaOp):
1260 def __init__(self, dest, src, \
1261 size = None, destSize = None, srcSize = None, ext = None):
1262 super(Cvtf2f, self).__init__(dest, src,\
1263 "InstRegIndex(0)", size, destSize, srcSize, ext)
1264 code = '''
1265 union floatInt
1266 {
1267 float f;
1268 uint32_t i;
1269 };
1270 union doubleInt
1271 {
1272 double d;
1273 uint64_t i;
1274 };
1275
1276 assert(destSize == 4 || destSize == 8);
1277 assert(srcSize == 4 || srcSize == 8);
1278 int srcSizeBits = srcSize * 8;
1279 int destSizeBits = destSize * 8;
1280 int items;
1281 int srcStart = 0;
1282 int destStart = 0;
1283 if (srcSize == 2 * destSize) {
1284 items = numItems(srcSize);
1285 if (ext & 0x2)
1286 destStart = destSizeBits * items;
1287 } else if (destSize == 2 * srcSize) {
1288 items = numItems(destSize);
1289 if (ext & 0x2)
1290 srcStart = srcSizeBits * items;
1291 } else {
1292 items = numItems(destSize);
1293 }
1294 uint64_t result = FpDestReg.uqw;
1295
1296 for (int i = 0; i < items; i++) {
1297 int srcHiIndex = srcStart + (i + 1) * srcSizeBits - 1;
1298 int srcLoIndex = srcStart + (i + 0) * srcSizeBits;
1299 uint64_t argBits = bits(FpSrcReg1.uqw, srcHiIndex, srcLoIndex);
1300 double arg;
1301
1302 if (srcSize == 4) {
1303 floatInt fi;
1304 fi.i = argBits;
1305 arg = fi.f;
1306 } else {
1307 doubleInt di;
1308 di.i = argBits;
1309 arg = di.d;
1310 }
1311 if (destSize == 4) {
1312 floatInt fi;
1313 fi.f = arg;
1314 argBits = fi.i;
1315 } else {
1316 doubleInt di;
1317 di.d = arg;
1318 argBits = di.i;
1319 }
1320 int destHiIndex = destStart + (i + 1) * destSizeBits - 1;
1321 int destLoIndex = destStart + (i + 0) * destSizeBits;
1322 result = insertBits(result, destHiIndex, destLoIndex, argBits);
1323 }
1324 FpDestReg.uqw = result;
1325 '''
1326
1327 class Mcmpi2r(MediaOp):
1328 code = '''
1329 union floatInt
1330 {
1331 float f;
1332 uint32_t i;
1333 };
1334 union doubleInt
1335 {
1336 double d;
1337 uint64_t i;
1338 };
1339
1340 assert(srcSize == destSize);
1341 int size = srcSize;
1342 int sizeBits = size * 8;
1343 int items = numItems(size);
1344 uint64_t result = FpDestReg.uqw;
1345
1346 for (int i = 0; i < items; i++) {
1347 int hiIndex = (i + 1) * sizeBits - 1;
1348 int loIndex = (i + 0) * sizeBits;
1349 uint64_t arg1Bits = bits(FpSrcReg1.uqw, hiIndex, loIndex);
1350 int64_t arg1 = arg1Bits |
1351 (0 - (arg1Bits & (ULL(1) << (sizeBits - 1))));
1352 uint64_t arg2Bits = bits(FpSrcReg2.uqw, hiIndex, loIndex);
1353 int64_t arg2 = arg2Bits |
1354 (0 - (arg2Bits & (ULL(1) << (sizeBits - 1))));
1355
1356 uint64_t resBits = 0;
1357 if (((ext & 0x2) == 0 && arg1 == arg2) ||
1358 ((ext & 0x2) == 0x2 && arg1 > arg2))
1359 resBits = mask(sizeBits);
1360
1361 result = insertBits(result, hiIndex, loIndex, resBits);
1362 }
1363 FpDestReg.uqw = result;
1364 '''
1365
1366 class Mcmpf2r(MediaOp):
1367 code = '''
1368 union floatInt
1369 {
1370 float f;
1371 uint32_t i;
1372 };
1373 union doubleInt
1374 {
1375 double d;
1376 uint64_t i;
1377 };
1378
1379 assert(srcSize == destSize);
1380 int size = srcSize;
1381 int sizeBits = size * 8;
1382 int items = numItems(size);
1383 uint64_t result = FpDestReg.uqw;
1384
1385 for (int i = 0; i < items; i++) {
1386 int hiIndex = (i + 1) * sizeBits - 1;
1387 int loIndex = (i + 0) * sizeBits;
1388 uint64_t arg1Bits = bits(FpSrcReg1.uqw, hiIndex, loIndex);
1389 uint64_t arg2Bits = bits(FpSrcReg2.uqw, hiIndex, loIndex);
1390 double arg1, arg2;
1391
1392 if (size == 4) {
1393 floatInt fi;
1394 fi.i = arg1Bits;
1395 arg1 = fi.f;
1396 fi.i = arg2Bits;
1397 arg2 = fi.f;
1398 } else {
1399 doubleInt di;
1400 di.i = arg1Bits;
1401 arg1 = di.d;
1402 di.i = arg2Bits;
1403 arg2 = di.d;
1404 }
1405
1406 uint64_t resBits = 0;
1407 bool nanop = isnan(arg1) || isnan(arg2);
1408 switch (ext & mask(3)) {
1409 case 0:
1410 if (arg1 == arg2 && !nanop)
1411 resBits = mask(sizeBits);
1412 break;
1413 case 1:
1414 if (arg1 < arg2 && !nanop)
1415 resBits = mask(sizeBits);
1416 break;
1417 case 2:
1418 if (arg1 <= arg2 && !nanop)
1419 resBits = mask(sizeBits);
1420 break;
1421 case 3:
1422 if (nanop)
1423 resBits = mask(sizeBits);
1424 break;
1425 case 4:
1426 if (arg1 != arg2 || nanop)
1427 resBits = mask(sizeBits);
1428 break;
1429 case 5:
1430 if (!(arg1 < arg2) || nanop)
1431 resBits = mask(sizeBits);
1432 break;
1433 case 6:
1434 if (!(arg1 <= arg2) || nanop)
1435 resBits = mask(sizeBits);
1436 break;
1437 case 7:
1438 if (!nanop)
1439 resBits = mask(sizeBits);
1440 break;
1441 };
1442
1443 result = insertBits(result, hiIndex, loIndex, resBits);
1444 }
1445 FpDestReg.uqw = result;
1446 '''
1447
1448 class Mcmpf2rf(MediaOp):
1449 def __init__(self, src1, src2,\
1450 size = None, destSize = None, srcSize = None, ext = None):
1451 super(Mcmpf2rf, self).__init__("InstRegIndex(0)", src1,\
1452 src2, size, destSize, srcSize, ext)
1453 code = '''
1454 union floatInt
1455 {
1456 float f;
1457 uint32_t i;
1458 };
1459 union doubleInt
1460 {
1461 double d;
1462 uint64_t i;
1463 };
1464
1465 assert(srcSize == destSize);
1466 assert(srcSize == 4 || srcSize == 8);
1467 int size = srcSize;
1468 int sizeBits = size * 8;
1469
1470 double arg1, arg2;
1471 uint64_t arg1Bits = bits(FpSrcReg1.uqw, sizeBits - 1, 0);
1472 uint64_t arg2Bits = bits(FpSrcReg2.uqw, sizeBits - 1, 0);
1473 if (size == 4) {
1474 floatInt fi;
1475 fi.i = arg1Bits;
1476 arg1 = fi.f;
1477 fi.i = arg2Bits;
1478 arg2 = fi.f;
1479 } else {
1480 doubleInt di;
1481 di.i = arg1Bits;
1482 arg1 = di.d;
1483 di.i = arg2Bits;
1484 arg2 = di.d;
1485 }
1486
1487 // ZF PF CF
1488 // Unordered 1 1 1
1489 // Greater than 0 0 0
1490 // Less than 0 0 1
1491 // Equal 1 0 0
1492 // OF = SF = AF = 0
1493 ccFlagBits = ccFlagBits & ~(OFBit | SFBit | AFBit |
1494 ZFBit | PFBit | CFBit);
1495 if (isnan(arg1) || isnan(arg2))
1496 ccFlagBits = ccFlagBits | (ZFBit | PFBit | CFBit);
1497 else if(arg1 < arg2)
1498 ccFlagBits = ccFlagBits | CFBit;
1499 else if(arg1 == arg2)
1500 ccFlagBits = ccFlagBits | ZFBit;
1501 '''
1502}};