mediaop.isa revision 8588:ef28ed90449d
1/// Copyright (c) 2009 The Regents of The University of Michigan
2// All rights reserved.
3//
4// Redistribution and use in source and binary forms, with or without
5// modification, are permitted provided that the following conditions are
6// met: redistributions of source code must retain the above copyright
7// notice, this list of conditions and the following disclaimer;
8// redistributions in binary form must reproduce the above copyright
9// notice, this list of conditions and the following disclaimer in the
10// documentation and/or other materials provided with the distribution;
11// neither the name of the copyright holders nor the names of its
12// contributors may be used to endorse or promote products derived from
13// this software without specific prior written permission.
14//
15// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
16// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
17// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
18// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
19// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
20// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
21// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
22// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
23// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
24// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
25// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
26//
27// Authors: Gabe Black
28
29def template MediaOpExecute {{
30        Fault %(class_name)s::execute(%(CPU_exec_context)s *xc,
31                Trace::InstRecord *traceData) const
32        {
33            Fault fault = NoFault;
34
35            %(op_decl)s;
36            %(op_rd)s;
37
38            %(code)s;
39
40            //Write the resulting state to the execution context
41            if(fault == NoFault)
42            {
43                %(op_wb)s;
44            }
45            return fault;
46        }
47}};
48
49def template MediaOpRegDeclare {{
50    class %(class_name)s : public %(base_class)s
51    {
52      public:
53        %(class_name)s(ExtMachInst _machInst,
54                const char * instMnem, uint64_t setFlags,
55                InstRegIndex _src1, InstRegIndex _src2, InstRegIndex _dest,
56                uint8_t _srcSize, uint8_t _destSize, uint16_t _ext);
57
58        %(BasicExecDeclare)s
59    };
60}};
61
62def template MediaOpImmDeclare {{
63
64    class %(class_name)s : public %(base_class)s
65    {
66      public:
67        %(class_name)s(ExtMachInst _machInst,
68                const char * instMnem, uint64_t setFlags,
69                InstRegIndex _src1, uint16_t _imm8, InstRegIndex _dest,
70                uint8_t _srcSize, uint8_t _destSize, uint16_t _ext);
71
72        %(BasicExecDeclare)s
73    };
74}};
75
76def template MediaOpRegConstructor {{
77    inline %(class_name)s::%(class_name)s(
78            ExtMachInst machInst, const char * instMnem, uint64_t setFlags,
79            InstRegIndex _src1, InstRegIndex _src2, InstRegIndex _dest,
80            uint8_t _srcSize, uint8_t _destSize, uint16_t _ext) :
81        %(base_class)s(machInst, "%(mnemonic)s", instMnem, setFlags,
82                _src1, _src2, _dest, _srcSize, _destSize, _ext,
83                %(op_class)s)
84    {
85        %(constructor)s;
86    }
87}};
88
89def template MediaOpImmConstructor {{
90    inline %(class_name)s::%(class_name)s(
91            ExtMachInst machInst, const char * instMnem, uint64_t setFlags,
92            InstRegIndex _src1, uint16_t _imm8, InstRegIndex _dest,
93            uint8_t _srcSize, uint8_t _destSize, uint16_t _ext) :
94        %(base_class)s(machInst, "%(mnemonic)s", instMnem, setFlags,
95                _src1, _imm8, _dest, _srcSize, _destSize, _ext,
96                %(op_class)s)
97    {
98        %(constructor)s;
99    }
100}};
101
102let {{
103    # Make these empty strings so that concatenating onto
104    # them will always work.
105    header_output = ""
106    decoder_output = ""
107    exec_output = ""
108
109    immTemplates = (
110            MediaOpImmDeclare,
111            MediaOpImmConstructor,
112            MediaOpExecute)
113
114    regTemplates = (
115            MediaOpRegDeclare,
116            MediaOpRegConstructor,
117            MediaOpExecute)
118
119    class MediaOpMeta(type):
120        def buildCppClasses(self, name, Name, suffix, code):
121
122            # Globals to stick the output in
123            global header_output
124            global decoder_output
125            global exec_output
126
127            # If op2 is used anywhere, make register and immediate versions
128            # of this code.
129            matcher = re.compile(r"(?<!\w)(?P<prefix>s?)op2(?P<typeQual>_[^\W_]+)?")
130            match = matcher.search(code)
131            if match:
132                typeQual = ""
133                if match.group("typeQual"):
134                    typeQual = match.group("typeQual")
135                src2_name = "%sFpSrcReg2%s" % (match.group("prefix"), typeQual)
136                self.buildCppClasses(name, Name, suffix,
137                        matcher.sub(src2_name, code))
138                self.buildCppClasses(name + "i", Name, suffix + "Imm",
139                        matcher.sub("imm8", code))
140                return
141
142            base = "X86ISA::MediaOp"
143
144            # If imm8 shows up in the code, use the immediate templates, if
145            # not, hopefully the register ones will be correct.
146            matcher = re.compile("(?<!\w)imm8(?!\w)")
147            if matcher.search(code):
148                base += "Imm"
149                templates = immTemplates
150            else:
151                base += "Reg"
152                templates = regTemplates
153
154            # Get everything ready for the substitution
155            iop = InstObjParams(name, Name + suffix, base, {"code" : code})
156
157            # Generate the actual code (finally!)
158            header_output += templates[0].subst(iop)
159            decoder_output += templates[1].subst(iop)
160            exec_output += templates[2].subst(iop)
161
162
163        def __new__(mcls, Name, bases, dict):
164            abstract = False
165            name = Name.lower()
166            if "abstract" in dict:
167                abstract = dict['abstract']
168                del dict['abstract']
169
170            cls = super(MediaOpMeta, mcls).__new__(mcls, Name, bases, dict)
171            if not abstract:
172                cls.className = Name
173                cls.base_mnemonic = name
174                code = cls.code
175
176                # Set up the C++ classes
177                mcls.buildCppClasses(cls, name, Name, "", code)
178
179                # Hook into the microassembler dict
180                global microopClasses
181                microopClasses[name] = cls
182
183                # If op2 is used anywhere, make register and immediate versions
184                # of this code.
185                matcher = re.compile(r"op2(?P<typeQual>_[^\W_]+)?")
186                if matcher.search(code):
187                    microopClasses[name + 'i'] = cls
188            return cls
189
190
191    class MediaOp(X86Microop):
192        __metaclass__ = MediaOpMeta
193        # This class itself doesn't act as a microop
194        abstract = True
195
196        def __init__(self, dest, src1, op2,
197                size = None, destSize = None, srcSize = None, ext = None):
198            self.dest = dest
199            self.src1 = src1
200            self.op2 = op2
201            if size is not None:
202                self.srcSize = size
203                self.destSize = size
204            if srcSize is not None:
205                self.srcSize = srcSize
206            if destSize is not None:
207                self.destSize = destSize
208            if self.srcSize is None:
209                raise Exception, "Source size not set."
210            if self.destSize is None:
211                raise Exception, "Dest size not set."
212            if ext is None:
213                self.ext = 0
214            else:
215                self.ext = ext 
216
217        def getAllocator(self, microFlags):
218            className = self.className
219            if self.mnemonic == self.base_mnemonic + 'i':
220                className += "Imm"
221            allocator = '''new %(class_name)s(machInst, macrocodeBlock,
222                    %(flags)s, %(src1)s, %(op2)s, %(dest)s,
223                    %(srcSize)s, %(destSize)s, %(ext)s)''' % {
224                "class_name" : className,
225                "flags" : self.microFlagsText(microFlags),
226                "src1" : self.src1, "op2" : self.op2,
227                "dest" : self.dest,
228                "srcSize" : self.srcSize,
229                "destSize" : self.destSize,
230                "ext" : self.ext}
231            return allocator
232
233    class Mov2int(MediaOp):
234        def __init__(self, dest, src1, src2 = 0, \
235                size = None, destSize = None, srcSize = None, ext = None):
236            super(Mov2int, self).__init__(dest, src1,\
237                    src2, size, destSize, srcSize, ext)
238        code = '''
239            int items = sizeof(FloatRegBits) / srcSize;
240            int offset = imm8;
241            if (bits(src1, 0) && (ext & 0x1))
242                offset -= items;
243            if (offset >= 0 && offset < items) {
244                uint64_t fpSrcReg1 =
245                    bits(FpSrcReg1_uqw,
246                            (offset + 1) * srcSize * 8 - 1,
247                            (offset + 0) * srcSize * 8);
248                DestReg = merge(0, fpSrcReg1, destSize);
249            } else {
250                DestReg = DestReg;
251            }
252        '''
253
254    class Mov2fp(MediaOp):
255        def __init__(self, dest, src1, src2 = 0, \
256                size = None, destSize = None, srcSize = None, ext = None):
257            super(Mov2fp, self).__init__(dest, src1,\
258                    src2, size, destSize, srcSize, ext)
259        code = '''
260            int items = sizeof(FloatRegBits) / destSize;
261            int offset = imm8;
262            if (bits(dest, 0) && (ext & 0x1))
263                offset -= items;
264            if (offset >= 0 && offset < items) {
265                uint64_t srcReg1 = pick(SrcReg1, 0, srcSize);
266                FpDestReg_uqw =
267                    insertBits(FpDestReg_uqw,
268                            (offset + 1) * destSize * 8 - 1,
269                            (offset + 0) * destSize * 8, srcReg1);
270            } else {
271                FpDestReg_uqw = FpDestReg_uqw;
272            }
273        '''
274
275    class Movsign(MediaOp):
276        def __init__(self, dest, src, \
277                size = None, destSize = None, srcSize = None, ext = None):
278            super(Movsign, self).__init__(dest, src,\
279                    "InstRegIndex(0)", size, destSize, srcSize, ext)
280        code = '''
281            int items = sizeof(FloatRegBits) / srcSize;
282            uint64_t result = 0;
283            int offset = (ext & 0x1) ? items : 0;
284            for (int i = 0; i < items; i++) {
285                uint64_t picked =
286                    bits(FpSrcReg1_uqw, (i + 1) * 8 * srcSize - 1);
287                result = insertBits(result, i + offset, i + offset, picked);
288            }
289            DestReg = DestReg | result;
290        '''
291
292    class Maskmov(MediaOp):
293        code = '''
294            assert(srcSize == destSize);
295            int size = srcSize;
296            int sizeBits = size * 8;
297            int items = numItems(size);
298            uint64_t result = FpDestReg_uqw;
299
300            for (int i = 0; i < items; i++) {
301                int hiIndex = (i + 1) * sizeBits - 1;
302                int loIndex = (i + 0) * sizeBits;
303                uint64_t arg1Bits = bits(FpSrcReg1_uqw, hiIndex, loIndex);
304                if (bits(FpSrcReg2_uqw, hiIndex))
305                    result = insertBits(result, hiIndex, loIndex, arg1Bits);
306            }
307            FpDestReg_uqw = result;
308        '''
309
310    class shuffle(MediaOp):
311        code = '''
312            assert(srcSize == destSize);
313            int size = srcSize;
314            int sizeBits = size * 8;
315            int items = sizeof(FloatRegBits) / size;
316            int options;
317            int optionBits;
318            if (size == 8) {
319                options = 2;
320                optionBits = 1;
321            } else {
322                options = 4;
323                optionBits = 2;
324            }
325
326            uint64_t result = 0;
327            uint8_t sel = ext;
328
329            for (int i = 0; i < items; i++) {
330                uint64_t resBits;
331                uint8_t lsel = sel & mask(optionBits);
332                if (lsel * size >= sizeof(FloatRegBits)) {
333                    lsel -= options / 2;
334                    resBits = bits(FpSrcReg2_uqw,
335                            (lsel + 1) * sizeBits - 1,
336                            (lsel + 0) * sizeBits);
337                }  else {
338                    resBits = bits(FpSrcReg1_uqw,
339                            (lsel + 1) * sizeBits - 1,
340                            (lsel + 0) * sizeBits);
341                }
342
343                sel >>= optionBits;
344
345                int hiIndex = (i + 1) * sizeBits - 1;
346                int loIndex = (i + 0) * sizeBits;
347                result = insertBits(result, hiIndex, loIndex, resBits);
348            }
349            FpDestReg_uqw = result;
350        '''
351
352    class Unpack(MediaOp):
353        code = '''
354            assert(srcSize == destSize);
355            int size = destSize;
356            int items = (sizeof(FloatRegBits) / size) / 2;
357            int offset = ext ? items : 0;
358            uint64_t result = 0;
359            for (int i = 0; i < items; i++) {
360                uint64_t pickedLow =
361                    bits(FpSrcReg1_uqw, (i + offset + 1) * 8 * size - 1,
362                                        (i + offset) * 8 * size);
363                result = insertBits(result,
364                                    (2 * i + 1) * 8 * size - 1,
365                                    (2 * i + 0) * 8 * size,
366                                    pickedLow);
367                uint64_t pickedHigh =
368                    bits(FpSrcReg2_uqw, (i + offset + 1) * 8 * size - 1,
369                                        (i + offset) * 8 * size);
370                result = insertBits(result,
371                                    (2 * i + 2) * 8 * size - 1,
372                                    (2 * i + 1) * 8 * size,
373                                    pickedHigh);
374            }
375            FpDestReg_uqw = result;
376        '''
377
378    class Pack(MediaOp):
379        code = '''
380            assert(srcSize == destSize * 2);
381            int items = (sizeof(FloatRegBits) / destSize);
382            int destBits = destSize * 8;
383            int srcBits = srcSize * 8;
384            uint64_t result = 0;
385            int i;
386            for (i = 0; i < items / 2; i++) {
387                uint64_t picked =
388                    bits(FpSrcReg1_uqw, (i + 1) * srcBits - 1,
389                                        (i + 0) * srcBits);
390                unsigned signBit = bits(picked, srcBits - 1);
391                uint64_t overflow = bits(picked, srcBits - 1, destBits - 1);
392
393                // Handle saturation.
394                if (signBit) {
395                    if (overflow != mask(destBits - srcBits + 1)) {
396                        if (signedOp())
397                            picked = (ULL(1) << (destBits - 1));
398                        else
399                            picked = 0;
400                    }
401                } else {
402                    if (overflow != 0) {
403                        if (signedOp())
404                            picked = mask(destBits - 1);
405                        else
406                            picked = mask(destBits);
407                    }
408                }
409                result = insertBits(result,
410                                    (i + 1) * destBits - 1,
411                                    (i + 0) * destBits,
412                                    picked);
413            }
414            for (;i < items; i++) {
415                uint64_t picked =
416                    bits(FpSrcReg2_uqw, (i - items + 1) * srcBits - 1,
417                                        (i - items + 0) * srcBits);
418                unsigned signBit = bits(picked, srcBits - 1);
419                uint64_t overflow = bits(picked, srcBits - 1, destBits - 1);
420
421                // Handle saturation.
422                if (signBit) {
423                    if (overflow != mask(destBits - srcBits + 1)) {
424                        if (signedOp())
425                            picked = (ULL(1) << (destBits - 1));
426                        else
427                            picked = 0;
428                    }
429                } else {
430                    if (overflow != 0) {
431                        if (signedOp())
432                            picked = mask(destBits - 1);
433                        else
434                            picked = mask(destBits);
435                    }
436                }
437                result = insertBits(result,
438                                    (i + 1) * destBits - 1,
439                                    (i + 0) * destBits,
440                                    picked);
441            }
442            FpDestReg_uqw = result;
443        '''
444
445    class Mxor(MediaOp):
446        def __init__(self, dest, src1, src2):
447            super(Mxor, self).__init__(dest, src1, src2, 1)
448        code = '''
449            FpDestReg_uqw = FpSrcReg1_uqw ^ FpSrcReg2_uqw;
450        '''
451
452    class Mor(MediaOp):
453        def __init__(self, dest, src1, src2):
454            super(Mor, self).__init__(dest, src1, src2, 1)
455        code = '''
456            FpDestReg_uqw = FpSrcReg1_uqw | FpSrcReg2_uqw;
457        '''
458
459    class Mand(MediaOp):
460        def __init__(self, dest, src1, src2):
461            super(Mand, self).__init__(dest, src1, src2, 1)
462        code = '''
463            FpDestReg_uqw = FpSrcReg1_uqw & FpSrcReg2_uqw;
464        '''
465
466    class Mandn(MediaOp):
467        def __init__(self, dest, src1, src2):
468            super(Mandn, self).__init__(dest, src1, src2, 1)
469        code = '''
470            FpDestReg_uqw = ~FpSrcReg1_uqw & FpSrcReg2_uqw;
471        '''
472
473    class Mminf(MediaOp):
474        code = '''
475            union floatInt
476            {
477                float f;
478                uint32_t i;
479            };
480            union doubleInt
481            {
482                double d;
483                uint64_t i;
484            };
485
486            assert(srcSize == destSize);
487            int size = srcSize;
488            int sizeBits = size * 8;
489            assert(srcSize == 4 || srcSize == 8);
490            int items = numItems(size);
491            uint64_t result = FpDestReg_uqw;
492
493            for (int i = 0; i < items; i++) {
494                double arg1, arg2;
495                int hiIndex = (i + 1) * sizeBits - 1;
496                int loIndex = (i + 0) * sizeBits;
497                uint64_t arg1Bits = bits(FpSrcReg1_uqw, hiIndex, loIndex);
498                uint64_t arg2Bits = bits(FpSrcReg2_uqw, hiIndex, loIndex);
499
500                if (size == 4) {
501                    floatInt fi;
502                    fi.i = arg1Bits;
503                    arg1 = fi.f;
504                    fi.i = arg2Bits;
505                    arg2 = fi.f;
506                } else {
507                    doubleInt di;
508                    di.i = arg1Bits;
509                    arg1 = di.d;
510                    di.i = arg2Bits;
511                    arg2 = di.d;
512                }
513
514                if (arg1 < arg2) {
515                    result = insertBits(result, hiIndex, loIndex, arg1Bits);
516                } else {
517                    result = insertBits(result, hiIndex, loIndex, arg2Bits);
518                }
519            }
520            FpDestReg_uqw = result;
521        '''
522
523    class Mmaxf(MediaOp):
524        code = '''
525            union floatInt
526            {
527                float f;
528                uint32_t i;
529            };
530            union doubleInt
531            {
532                double d;
533                uint64_t i;
534            };
535
536            assert(srcSize == destSize);
537            int size = srcSize;
538            int sizeBits = size * 8;
539            assert(srcSize == 4 || srcSize == 8);
540            int items = numItems(size);
541            uint64_t result = FpDestReg_uqw;
542
543            for (int i = 0; i < items; i++) {
544                double arg1, arg2;
545                int hiIndex = (i + 1) * sizeBits - 1;
546                int loIndex = (i + 0) * sizeBits;
547                uint64_t arg1Bits = bits(FpSrcReg1_uqw, hiIndex, loIndex);
548                uint64_t arg2Bits = bits(FpSrcReg2_uqw, hiIndex, loIndex);
549
550                if (size == 4) {
551                    floatInt fi;
552                    fi.i = arg1Bits;
553                    arg1 = fi.f;
554                    fi.i = arg2Bits;
555                    arg2 = fi.f;
556                } else {
557                    doubleInt di;
558                    di.i = arg1Bits;
559                    arg1 = di.d;
560                    di.i = arg2Bits;
561                    arg2 = di.d;
562                }
563
564                if (arg1 > arg2) {
565                    result = insertBits(result, hiIndex, loIndex, arg1Bits);
566                } else {
567                    result = insertBits(result, hiIndex, loIndex, arg2Bits);
568                }
569            }
570            FpDestReg_uqw = result;
571        '''
572
573    class Mmini(MediaOp):
574        code = '''
575
576            assert(srcSize == destSize);
577            int size = srcSize;
578            int sizeBits = size * 8;
579            int items = numItems(size);
580            uint64_t result = FpDestReg_uqw;
581
582            for (int i = 0; i < items; i++) {
583                int hiIndex = (i + 1) * sizeBits - 1;
584                int loIndex = (i + 0) * sizeBits;
585                uint64_t arg1Bits = bits(FpSrcReg1_uqw, hiIndex, loIndex);
586                int64_t arg1 = arg1Bits |
587                    (0 - (arg1Bits & (ULL(1) << (sizeBits - 1))));
588                uint64_t arg2Bits = bits(FpSrcReg2_uqw, hiIndex, loIndex);
589                int64_t arg2 = arg2Bits |
590                    (0 - (arg2Bits & (ULL(1) << (sizeBits - 1))));
591                uint64_t resBits;
592
593                if (signedOp()) {
594                    if (arg1 < arg2) {
595                        resBits = arg1Bits;
596                    } else {
597                        resBits = arg2Bits;
598                    }
599                } else {
600                    if (arg1Bits < arg2Bits) {
601                        resBits = arg1Bits;
602                    } else {
603                        resBits = arg2Bits;
604                    }
605                }
606                result = insertBits(result, hiIndex, loIndex, resBits);
607            }
608            FpDestReg_uqw = result;
609        '''
610
611    class Mmaxi(MediaOp):
612        code = '''
613
614            assert(srcSize == destSize);
615            int size = srcSize;
616            int sizeBits = size * 8;
617            int items = numItems(size);
618            uint64_t result = FpDestReg_uqw;
619
620            for (int i = 0; i < items; i++) {
621                int hiIndex = (i + 1) * sizeBits - 1;
622                int loIndex = (i + 0) * sizeBits;
623                uint64_t arg1Bits = bits(FpSrcReg1_uqw, hiIndex, loIndex);
624                int64_t arg1 = arg1Bits |
625                    (0 - (arg1Bits & (ULL(1) << (sizeBits - 1))));
626                uint64_t arg2Bits = bits(FpSrcReg2_uqw, hiIndex, loIndex);
627                int64_t arg2 = arg2Bits |
628                    (0 - (arg2Bits & (ULL(1) << (sizeBits - 1))));
629                uint64_t resBits;
630
631                if (signedOp()) {
632                    if (arg1 > arg2) {
633                        resBits = arg1Bits;
634                    } else {
635                        resBits = arg2Bits;
636                    }
637                } else {
638                    if (arg1Bits > arg2Bits) {
639                        resBits = arg1Bits;
640                    } else {
641                        resBits = arg2Bits;
642                    }
643                }
644                result = insertBits(result, hiIndex, loIndex, resBits);
645            }
646            FpDestReg_uqw = result;
647        '''
648
649    class Msqrt(MediaOp):
650        def __init__(self, dest, src, \
651                size = None, destSize = None, srcSize = None, ext = None):
652            super(Msqrt, self).__init__(dest, src,\
653                    "InstRegIndex(0)", size, destSize, srcSize, ext)
654        code = '''
655            union floatInt
656            {
657                float f;
658                uint32_t i;
659            };
660            union doubleInt
661            {
662                double d;
663                uint64_t i;
664            };
665
666            assert(srcSize == destSize);
667            int size = srcSize;
668            int sizeBits = size * 8;
669            assert(srcSize == 4 || srcSize == 8);
670            int items = numItems(size);
671            uint64_t result = FpDestReg_uqw;
672
673            for (int i = 0; i < items; i++) {
674                int hiIndex = (i + 1) * sizeBits - 1;
675                int loIndex = (i + 0) * sizeBits;
676                uint64_t argBits = bits(FpSrcReg1_uqw, hiIndex, loIndex);
677
678                if (size == 4) {
679                    floatInt fi;
680                    fi.i = argBits;
681                    fi.f = sqrt(fi.f);
682                    argBits = fi.i;
683                } else {
684                    doubleInt di;
685                    di.i = argBits;
686                    di.d = sqrt(di.d);
687                    argBits = di.i;
688                }
689                result = insertBits(result, hiIndex, loIndex, argBits);
690            }
691            FpDestReg_uqw = result;
692        '''
693
694    class Maddf(MediaOp):
695        code = '''
696            union floatInt
697            {
698                float f;
699                uint32_t i;
700            };
701            union doubleInt
702            {
703                double d;
704                uint64_t i;
705            };
706
707            assert(srcSize == destSize);
708            int size = srcSize;
709            int sizeBits = size * 8;
710            assert(srcSize == 4 || srcSize == 8);
711            int items = numItems(size);
712            uint64_t result = FpDestReg_uqw;
713
714            for (int i = 0; i < items; i++) {
715                int hiIndex = (i + 1) * sizeBits - 1;
716                int loIndex = (i + 0) * sizeBits;
717                uint64_t arg1Bits = bits(FpSrcReg1_uqw, hiIndex, loIndex);
718                uint64_t arg2Bits = bits(FpSrcReg2_uqw, hiIndex, loIndex);
719                uint64_t resBits;
720
721                if (size == 4) {
722                    floatInt arg1, arg2, res;
723                    arg1.i = arg1Bits;
724                    arg2.i = arg2Bits;
725                    res.f = arg1.f + arg2.f;
726                    resBits = res.i;
727                } else {
728                    doubleInt arg1, arg2, res;
729                    arg1.i = arg1Bits;
730                    arg2.i = arg2Bits;
731                    res.d = arg1.d + arg2.d;
732                    resBits = res.i;
733                }
734
735                result = insertBits(result, hiIndex, loIndex, resBits);
736            }
737            FpDestReg_uqw = result;
738        '''
739
740    class Msubf(MediaOp):
741        code = '''
742            union floatInt
743            {
744                float f;
745                uint32_t i;
746            };
747            union doubleInt
748            {
749                double d;
750                uint64_t i;
751            };
752
753            assert(srcSize == destSize);
754            int size = srcSize;
755            int sizeBits = size * 8;
756            assert(srcSize == 4 || srcSize == 8);
757            int items = numItems(size);
758            uint64_t result = FpDestReg_uqw;
759
760            for (int i = 0; i < items; i++) {
761                int hiIndex = (i + 1) * sizeBits - 1;
762                int loIndex = (i + 0) * sizeBits;
763                uint64_t arg1Bits = bits(FpSrcReg1_uqw, hiIndex, loIndex);
764                uint64_t arg2Bits = bits(FpSrcReg2_uqw, hiIndex, loIndex);
765                uint64_t resBits;
766
767                if (size == 4) {
768                    floatInt arg1, arg2, res;
769                    arg1.i = arg1Bits;
770                    arg2.i = arg2Bits;
771                    res.f = arg1.f - arg2.f;
772                    resBits = res.i;
773                } else {
774                    doubleInt arg1, arg2, res;
775                    arg1.i = arg1Bits;
776                    arg2.i = arg2Bits;
777                    res.d = arg1.d - arg2.d;
778                    resBits = res.i;
779                }
780
781                result = insertBits(result, hiIndex, loIndex, resBits);
782            }
783            FpDestReg_uqw = result;
784        '''
785
786    class Mmulf(MediaOp):
787        code = '''
788            union floatInt
789            {
790                float f;
791                uint32_t i;
792            };
793            union doubleInt
794            {
795                double d;
796                uint64_t i;
797            };
798
799            assert(srcSize == destSize);
800            int size = srcSize;
801            int sizeBits = size * 8;
802            assert(srcSize == 4 || srcSize == 8);
803            int items = numItems(size);
804            uint64_t result = FpDestReg_uqw;
805
806            for (int i = 0; i < items; i++) {
807                int hiIndex = (i + 1) * sizeBits - 1;
808                int loIndex = (i + 0) * sizeBits;
809                uint64_t arg1Bits = bits(FpSrcReg1_uqw, hiIndex, loIndex);
810                uint64_t arg2Bits = bits(FpSrcReg2_uqw, hiIndex, loIndex);
811                uint64_t resBits;
812
813                if (size == 4) {
814                    floatInt arg1, arg2, res;
815                    arg1.i = arg1Bits;
816                    arg2.i = arg2Bits;
817                    res.f = arg1.f * arg2.f;
818                    resBits = res.i;
819                } else {
820                    doubleInt arg1, arg2, res;
821                    arg1.i = arg1Bits;
822                    arg2.i = arg2Bits;
823                    res.d = arg1.d * arg2.d;
824                    resBits = res.i;
825                }
826
827                result = insertBits(result, hiIndex, loIndex, resBits);
828            }
829            FpDestReg_uqw = result;
830        '''
831
832    class Mdivf(MediaOp):
833        code = '''
834            union floatInt
835            {
836                float f;
837                uint32_t i;
838            };
839            union doubleInt
840            {
841                double d;
842                uint64_t i;
843            };
844
845            assert(srcSize == destSize);
846            int size = srcSize;
847            int sizeBits = size * 8;
848            assert(srcSize == 4 || srcSize == 8);
849            int items = numItems(size);
850            uint64_t result = FpDestReg_uqw;
851
852            for (int i = 0; i < items; i++) {
853                int hiIndex = (i + 1) * sizeBits - 1;
854                int loIndex = (i + 0) * sizeBits;
855                uint64_t arg1Bits = bits(FpSrcReg1_uqw, hiIndex, loIndex);
856                uint64_t arg2Bits = bits(FpSrcReg2_uqw, hiIndex, loIndex);
857                uint64_t resBits;
858
859                if (size == 4) {
860                    floatInt arg1, arg2, res;
861                    arg1.i = arg1Bits;
862                    arg2.i = arg2Bits;
863                    res.f = arg1.f / arg2.f;
864                    resBits = res.i;
865                } else {
866                    doubleInt arg1, arg2, res;
867                    arg1.i = arg1Bits;
868                    arg2.i = arg2Bits;
869                    res.d = arg1.d / arg2.d;
870                    resBits = res.i;
871                }
872
873                result = insertBits(result, hiIndex, loIndex, resBits);
874            }
875            FpDestReg_uqw = result;
876        '''
877
878    class Maddi(MediaOp):
879        code = '''
880            assert(srcSize == destSize);
881            int size = srcSize;
882            int sizeBits = size * 8;
883            int items = numItems(size);
884            uint64_t result = FpDestReg_uqw;
885
886            for (int i = 0; i < items; i++) {
887                int hiIndex = (i + 1) * sizeBits - 1;
888                int loIndex = (i + 0) * sizeBits;
889                uint64_t arg1Bits = bits(FpSrcReg1_uqw, hiIndex, loIndex);
890                uint64_t arg2Bits = bits(FpSrcReg2_uqw, hiIndex, loIndex);
891                uint64_t resBits = arg1Bits + arg2Bits;
892                
893                if (ext & 0x2) {
894                    if (signedOp()) {
895                        int arg1Sign = bits(arg1Bits, sizeBits - 1);
896                        int arg2Sign = bits(arg2Bits, sizeBits - 1);
897                        int resSign = bits(resBits, sizeBits - 1);
898                        if ((arg1Sign == arg2Sign) && (arg1Sign != resSign)) {
899                            if (resSign == 0)
900                                resBits = (ULL(1) << (sizeBits - 1));
901                            else
902                                resBits = mask(sizeBits - 1);
903                        }
904                    } else {
905                        if (findCarry(sizeBits, resBits, arg1Bits, arg2Bits))
906                            resBits = mask(sizeBits);
907                    }
908                }
909
910                result = insertBits(result, hiIndex, loIndex, resBits);
911            }
912            FpDestReg_uqw = result;
913        '''
914
915    class Msubi(MediaOp):
916        code = '''
917            assert(srcSize == destSize);
918            int size = srcSize;
919            int sizeBits = size * 8;
920            int items = numItems(size);
921            uint64_t result = FpDestReg_uqw;
922
923            for (int i = 0; i < items; i++) {
924                int hiIndex = (i + 1) * sizeBits - 1;
925                int loIndex = (i + 0) * sizeBits;
926                uint64_t arg1Bits = bits(FpSrcReg1_uqw, hiIndex, loIndex);
927                uint64_t arg2Bits = bits(FpSrcReg2_uqw, hiIndex, loIndex);
928                uint64_t resBits = arg1Bits - arg2Bits;
929                
930                if (ext & 0x2) {
931                    if (signedOp()) {
932                        int arg1Sign = bits(arg1Bits, sizeBits - 1);
933                        int arg2Sign = !bits(arg2Bits, sizeBits - 1);
934                        int resSign = bits(resBits, sizeBits - 1);
935                        if ((arg1Sign == arg2Sign) && (arg1Sign != resSign)) {
936                            if (resSign == 0)
937                                resBits = (ULL(1) << (sizeBits - 1));
938                            else
939                                resBits = mask(sizeBits - 1);
940                        }
941                    } else {
942                        if (arg2Bits > arg1Bits) {
943                            resBits = 0;
944                        } else if (!findCarry(sizeBits, resBits,
945                                             arg1Bits, ~arg2Bits)) {
946                            resBits = mask(sizeBits);
947                        }
948                    }
949                }
950
951                result = insertBits(result, hiIndex, loIndex, resBits);
952            }
953            FpDestReg_uqw = result;
954        '''
955
956    class Mmuli(MediaOp):
957        code = '''
958            int srcBits = srcSize * 8;
959            int destBits = destSize * 8;
960            assert(destBits <= 64);
961            assert(destSize >= srcSize);
962            int items = numItems(destSize);
963            uint64_t result = FpDestReg_uqw;
964
965            for (int i = 0; i < items; i++) {
966                int offset = 0;
967                if (ext & 16) {
968                    if (ext & 32)
969                        offset = i * (destBits - srcBits);
970                    else
971                        offset = i * (destBits - srcBits) + srcBits;
972                }
973                int srcHiIndex = (i + 1) * srcBits - 1 + offset;
974                int srcLoIndex = (i + 0) * srcBits + offset;
975                uint64_t arg1Bits = bits(FpSrcReg1_uqw, srcHiIndex, srcLoIndex);
976                uint64_t arg2Bits = bits(FpSrcReg2_uqw, srcHiIndex, srcLoIndex);
977                uint64_t resBits;
978
979                if (signedOp()) {
980                    int64_t arg1 = arg1Bits |
981                        (0 - (arg1Bits & (ULL(1) << (srcBits - 1))));
982                    int64_t arg2 = arg2Bits |
983                        (0 - (arg2Bits & (ULL(1) << (srcBits - 1))));
984                    resBits = (uint64_t)(arg1 * arg2);
985                } else {
986                    resBits = arg1Bits * arg2Bits;
987                }
988
989                if (ext & 0x4)
990                    resBits += (ULL(1) << (destBits - 1));
991                
992                if (multHi())
993                    resBits >>= destBits;
994
995                int destHiIndex = (i + 1) * destBits - 1;
996                int destLoIndex = (i + 0) * destBits;
997                result = insertBits(result, destHiIndex, destLoIndex, resBits);
998            }
999            FpDestReg_uqw = result;
1000        '''
1001
1002    class Mavg(MediaOp):
1003        code = '''
1004            assert(srcSize == destSize);
1005            int size = srcSize;
1006            int sizeBits = size * 8;
1007            int items = numItems(size);
1008            uint64_t result = FpDestReg_uqw;
1009
1010            for (int i = 0; i < items; i++) {
1011                int hiIndex = (i + 1) * sizeBits - 1;
1012                int loIndex = (i + 0) * sizeBits;
1013                uint64_t arg1Bits = bits(FpSrcReg1_uqw, hiIndex, loIndex);
1014                uint64_t arg2Bits = bits(FpSrcReg2_uqw, hiIndex, loIndex);
1015                uint64_t resBits = (arg1Bits + arg2Bits + 1) / 2;
1016                
1017                result = insertBits(result, hiIndex, loIndex, resBits);
1018            }
1019            FpDestReg_uqw = result;
1020        '''
1021
1022    class Msad(MediaOp):
1023        code = '''
1024            int srcBits = srcSize * 8;
1025            int items = sizeof(FloatRegBits) / srcSize;
1026
1027            uint64_t sum = 0;
1028            for (int i = 0; i < items; i++) {
1029                int hiIndex = (i + 1) * srcBits - 1;
1030                int loIndex = (i + 0) * srcBits;
1031                uint64_t arg1Bits = bits(FpSrcReg1_uqw, hiIndex, loIndex);
1032                uint64_t arg2Bits = bits(FpSrcReg2_uqw, hiIndex, loIndex);
1033                int64_t resBits = arg1Bits - arg2Bits;
1034                if (resBits < 0)
1035                    resBits = -resBits;
1036                sum += resBits;
1037            }
1038            FpDestReg_uqw = sum & mask(destSize * 8);
1039        '''
1040
1041    class Msrl(MediaOp):
1042        code = '''
1043
1044            assert(srcSize == destSize);
1045            int size = srcSize;
1046            int sizeBits = size * 8;
1047            int items = numItems(size);
1048            uint64_t shiftAmt = op2_uqw;
1049            uint64_t result = FpDestReg_uqw;
1050
1051            for (int i = 0; i < items; i++) {
1052                int hiIndex = (i + 1) * sizeBits - 1;
1053                int loIndex = (i + 0) * sizeBits;
1054                uint64_t arg1Bits = bits(FpSrcReg1_uqw, hiIndex, loIndex);
1055                uint64_t resBits;
1056                if (shiftAmt >= sizeBits) {
1057                    resBits = 0;
1058                } else {
1059                    resBits = (arg1Bits >> shiftAmt) &
1060                        mask(sizeBits - shiftAmt);
1061                }
1062
1063                result = insertBits(result, hiIndex, loIndex, resBits);
1064            }
1065            FpDestReg_uqw = result;
1066        '''
1067
1068    class Msra(MediaOp):
1069        code = '''
1070
1071            assert(srcSize == destSize);
1072            int size = srcSize;
1073            int sizeBits = size * 8;
1074            int items = numItems(size);
1075            uint64_t shiftAmt = op2_uqw;
1076            uint64_t result = FpDestReg_uqw;
1077
1078            for (int i = 0; i < items; i++) {
1079                int hiIndex = (i + 1) * sizeBits - 1;
1080                int loIndex = (i + 0) * sizeBits;
1081                uint64_t arg1Bits = bits(FpSrcReg1_uqw, hiIndex, loIndex);
1082                uint64_t resBits;
1083                if (shiftAmt >= sizeBits) {
1084                    if (bits(arg1Bits, sizeBits - 1))
1085                        resBits = mask(sizeBits);
1086                    else
1087                        resBits = 0;
1088                } else {
1089                    resBits = (arg1Bits >> shiftAmt);
1090                    resBits = resBits |
1091                        (0 - (resBits & (ULL(1) << (sizeBits - 1 - shiftAmt))));
1092                }
1093
1094                result = insertBits(result, hiIndex, loIndex, resBits);
1095            }
1096            FpDestReg_uqw = result;
1097        '''
1098
1099    class Msll(MediaOp):
1100        code = '''
1101
1102            assert(srcSize == destSize);
1103            int size = srcSize;
1104            int sizeBits = size * 8;
1105            int items = numItems(size);
1106            uint64_t shiftAmt = op2_uqw;
1107            uint64_t result = FpDestReg_uqw;
1108
1109            for (int i = 0; i < items; i++) {
1110                int hiIndex = (i + 1) * sizeBits - 1;
1111                int loIndex = (i + 0) * sizeBits;
1112                uint64_t arg1Bits = bits(FpSrcReg1_uqw, hiIndex, loIndex);
1113                uint64_t resBits;
1114                if (shiftAmt >= sizeBits) {
1115                    resBits = 0;
1116                } else {
1117                    resBits = (arg1Bits << shiftAmt);
1118                }
1119
1120                result = insertBits(result, hiIndex, loIndex, resBits);
1121            }
1122            FpDestReg_uqw = result;
1123        '''
1124
1125    class Cvtf2i(MediaOp):
1126        def __init__(self, dest, src, \
1127                size = None, destSize = None, srcSize = None, ext = None):
1128            super(Cvtf2i, self).__init__(dest, src,\
1129                    "InstRegIndex(0)", size, destSize, srcSize, ext)
1130        code = '''
1131            union floatInt
1132            {
1133                float f;
1134                uint32_t i;
1135            };
1136            union doubleInt
1137            {
1138                double d;
1139                uint64_t i;
1140            };
1141
1142            assert(destSize == 4 || destSize == 8);
1143            assert(srcSize == 4 || srcSize == 8);
1144            int srcSizeBits = srcSize * 8;
1145            int destSizeBits = destSize * 8;
1146            int items;
1147            int srcStart = 0;
1148            int destStart = 0;
1149            if (srcSize == 2 * destSize) {
1150                items = numItems(srcSize);
1151                if (ext & 0x2)
1152                    destStart = destSizeBits * items;
1153            } else if (destSize == 2 * srcSize) {
1154                items = numItems(destSize);
1155                if (ext & 0x2)
1156                    srcStart = srcSizeBits * items;
1157            } else {
1158                items = numItems(destSize);
1159            }
1160            uint64_t result = FpDestReg_uqw;
1161
1162            for (int i = 0; i < items; i++) {
1163                int srcHiIndex = srcStart + (i + 1) * srcSizeBits - 1;
1164                int srcLoIndex = srcStart + (i + 0) * srcSizeBits;
1165                uint64_t argBits = bits(FpSrcReg1_uqw, srcHiIndex, srcLoIndex);
1166                double arg;
1167
1168                if (srcSize == 4) {
1169                    floatInt fi;
1170                    fi.i = argBits;
1171                    arg = fi.f;
1172                } else {
1173                    doubleInt di;
1174                    di.i = argBits;
1175                    arg = di.d;
1176                }
1177
1178                if (ext & 0x4) {
1179                    if (arg >= 0)
1180                        arg += 0.5;
1181                    else
1182                        arg -= 0.5;
1183                }
1184
1185                if (destSize == 4) {
1186                    argBits = (uint32_t)arg;
1187                } else {
1188                    argBits = (uint64_t)arg;
1189                }
1190                int destHiIndex = destStart + (i + 1) * destSizeBits - 1;
1191                int destLoIndex = destStart + (i + 0) * destSizeBits;
1192                result = insertBits(result, destHiIndex, destLoIndex, argBits);
1193            }
1194            FpDestReg_uqw = result;
1195        '''
1196
1197    class Cvti2f(MediaOp):
1198        def __init__(self, dest, src, \
1199                size = None, destSize = None, srcSize = None, ext = None):
1200            super(Cvti2f, self).__init__(dest, src,\
1201                    "InstRegIndex(0)", size, destSize, srcSize, ext)
1202        code = '''
1203            union floatInt
1204            {
1205                float f;
1206                uint32_t i;
1207            };
1208            union doubleInt
1209            {
1210                double d;
1211                uint64_t i;
1212            };
1213
1214            assert(destSize == 4 || destSize == 8);
1215            assert(srcSize == 4 || srcSize == 8);
1216            int srcSizeBits = srcSize * 8;
1217            int destSizeBits = destSize * 8;
1218            int items;
1219            int srcStart = 0;
1220            int destStart = 0;
1221            if (srcSize == 2 * destSize) {
1222                items = numItems(srcSize);
1223                if (ext & 0x2)
1224                    destStart = destSizeBits * items;
1225            } else if (destSize == 2 * srcSize) {
1226                items = numItems(destSize);
1227                if (ext & 0x2)
1228                    srcStart = srcSizeBits * items;
1229            } else {
1230                items = numItems(destSize);
1231            }
1232            uint64_t result = FpDestReg_uqw;
1233
1234            for (int i = 0; i < items; i++) {
1235                int srcHiIndex = srcStart + (i + 1) * srcSizeBits - 1;
1236                int srcLoIndex = srcStart + (i + 0) * srcSizeBits;
1237                uint64_t argBits = bits(FpSrcReg1_uqw, srcHiIndex, srcLoIndex);
1238
1239                int64_t sArg = argBits |
1240                    (0 - (argBits & (ULL(1) << (srcSizeBits - 1))));
1241                double arg = sArg;
1242
1243                if (destSize == 4) {
1244                    floatInt fi;
1245                    fi.f = arg;
1246                    argBits = fi.i;
1247                } else {
1248                    doubleInt di;
1249                    di.d = arg;
1250                    argBits = di.i;
1251                }
1252                int destHiIndex = destStart + (i + 1) * destSizeBits - 1;
1253                int destLoIndex = destStart + (i + 0) * destSizeBits;
1254                result = insertBits(result, destHiIndex, destLoIndex, argBits);
1255            }
1256            FpDestReg_uqw = result;
1257        '''
1258
1259    class Cvtf2f(MediaOp):
1260        def __init__(self, dest, src, \
1261                size = None, destSize = None, srcSize = None, ext = None):
1262            super(Cvtf2f, self).__init__(dest, src,\
1263                    "InstRegIndex(0)", size, destSize, srcSize, ext)
1264        code = '''
1265            union floatInt
1266            {
1267                float f;
1268                uint32_t i;
1269            };
1270            union doubleInt
1271            {
1272                double d;
1273                uint64_t i;
1274            };
1275
1276            assert(destSize == 4 || destSize == 8);
1277            assert(srcSize == 4 || srcSize == 8);
1278            int srcSizeBits = srcSize * 8;
1279            int destSizeBits = destSize * 8;
1280            int items;
1281            int srcStart = 0;
1282            int destStart = 0;
1283            if (srcSize == 2 * destSize) {
1284                items = numItems(srcSize);
1285                if (ext & 0x2)
1286                    destStart = destSizeBits * items;
1287            } else if (destSize == 2 * srcSize) {
1288                items = numItems(destSize);
1289                if (ext & 0x2)
1290                    srcStart = srcSizeBits * items;
1291            } else {
1292                items = numItems(destSize);
1293            }
1294            uint64_t result = FpDestReg_uqw;
1295
1296            for (int i = 0; i < items; i++) {
1297                int srcHiIndex = srcStart + (i + 1) * srcSizeBits - 1;
1298                int srcLoIndex = srcStart + (i + 0) * srcSizeBits;
1299                uint64_t argBits = bits(FpSrcReg1_uqw, srcHiIndex, srcLoIndex);
1300                double arg;
1301
1302                if (srcSize == 4) {
1303                    floatInt fi;
1304                    fi.i = argBits;
1305                    arg = fi.f;
1306                } else {
1307                    doubleInt di;
1308                    di.i = argBits;
1309                    arg = di.d;
1310                }
1311                if (destSize == 4) {
1312                    floatInt fi;
1313                    fi.f = arg;
1314                    argBits = fi.i;
1315                } else {
1316                    doubleInt di;
1317                    di.d = arg;
1318                    argBits = di.i;
1319                }
1320                int destHiIndex = destStart + (i + 1) * destSizeBits - 1;
1321                int destLoIndex = destStart + (i + 0) * destSizeBits;
1322                result = insertBits(result, destHiIndex, destLoIndex, argBits);
1323            }
1324            FpDestReg_uqw = result;
1325        '''
1326
1327    class Mcmpi2r(MediaOp):
1328        code = '''
1329            union floatInt
1330            {
1331                float f;
1332                uint32_t i;
1333            };
1334            union doubleInt
1335            {
1336                double d;
1337                uint64_t i;
1338            };
1339
1340            assert(srcSize == destSize);
1341            int size = srcSize;
1342            int sizeBits = size * 8;
1343            int items = numItems(size);
1344            uint64_t result = FpDestReg_uqw;
1345
1346            for (int i = 0; i < items; i++) {
1347                int hiIndex = (i + 1) * sizeBits - 1;
1348                int loIndex = (i + 0) * sizeBits;
1349                uint64_t arg1Bits = bits(FpSrcReg1_uqw, hiIndex, loIndex);
1350                int64_t arg1 = arg1Bits |
1351                    (0 - (arg1Bits & (ULL(1) << (sizeBits - 1))));
1352                uint64_t arg2Bits = bits(FpSrcReg2_uqw, hiIndex, loIndex);
1353                int64_t arg2 = arg2Bits |
1354                    (0 - (arg2Bits & (ULL(1) << (sizeBits - 1))));
1355
1356                uint64_t resBits = 0;
1357                if (((ext & 0x2) == 0 && arg1 == arg2) ||
1358                    ((ext & 0x2) == 0x2 && arg1 > arg2))
1359                    resBits = mask(sizeBits);
1360
1361                result = insertBits(result, hiIndex, loIndex, resBits);
1362            }
1363            FpDestReg_uqw = result;
1364        '''
1365
1366    class Mcmpf2r(MediaOp):
1367        code = '''
1368            union floatInt
1369            {
1370                float f;
1371                uint32_t i;
1372            };
1373            union doubleInt
1374            {
1375                double d;
1376                uint64_t i;
1377            };
1378
1379            assert(srcSize == destSize);
1380            int size = srcSize;
1381            int sizeBits = size * 8;
1382            int items = numItems(size);
1383            uint64_t result = FpDestReg_uqw;
1384
1385            for (int i = 0; i < items; i++) {
1386                int hiIndex = (i + 1) * sizeBits - 1;
1387                int loIndex = (i + 0) * sizeBits;
1388                uint64_t arg1Bits = bits(FpSrcReg1_uqw, hiIndex, loIndex);
1389                uint64_t arg2Bits = bits(FpSrcReg2_uqw, hiIndex, loIndex);
1390                double arg1, arg2;
1391
1392                if (size == 4) {
1393                    floatInt fi;
1394                    fi.i = arg1Bits;
1395                    arg1 = fi.f;
1396                    fi.i = arg2Bits;
1397                    arg2 = fi.f;
1398                } else {
1399                    doubleInt di;
1400                    di.i = arg1Bits;
1401                    arg1 = di.d;
1402                    di.i = arg2Bits;
1403                    arg2 = di.d;
1404                }
1405
1406                uint64_t resBits = 0;
1407                bool nanop = isnan(arg1) || isnan(arg2);
1408                switch (ext & mask(3)) {
1409                  case 0:
1410                    if (arg1 == arg2 && !nanop)
1411                        resBits = mask(sizeBits);
1412                    break;
1413                  case 1:
1414                    if (arg1 < arg2 && !nanop)
1415                        resBits = mask(sizeBits);
1416                    break;
1417                  case 2:
1418                    if (arg1 <= arg2 && !nanop)
1419                        resBits = mask(sizeBits);
1420                    break;
1421                  case 3:
1422                    if (nanop)
1423                        resBits = mask(sizeBits);
1424                    break;
1425                  case 4:
1426                    if (arg1 != arg2 || nanop)
1427                        resBits = mask(sizeBits);
1428                    break;
1429                  case 5:
1430                    if (!(arg1 < arg2) || nanop)
1431                        resBits = mask(sizeBits);
1432                    break;
1433                  case 6:
1434                    if (!(arg1 <= arg2) || nanop)
1435                        resBits = mask(sizeBits);
1436                    break;
1437                  case 7:
1438                    if (!nanop)
1439                        resBits = mask(sizeBits);
1440                    break;
1441                };
1442
1443                result = insertBits(result, hiIndex, loIndex, resBits);
1444            }
1445            FpDestReg_uqw = result;
1446        '''
1447
1448    class Mcmpf2rf(MediaOp):
1449        def __init__(self, src1, src2,\
1450                size = None, destSize = None, srcSize = None, ext = None):
1451            super(Mcmpf2rf, self).__init__("InstRegIndex(0)", src1,\
1452                    src2, size, destSize, srcSize, ext)
1453        code = '''
1454            union floatInt
1455            {
1456                float f;
1457                uint32_t i;
1458            };
1459            union doubleInt
1460            {
1461                double d;
1462                uint64_t i;
1463            };
1464
1465            assert(srcSize == destSize);
1466            assert(srcSize == 4 || srcSize == 8);
1467            int size = srcSize;
1468            int sizeBits = size * 8;
1469
1470            double arg1, arg2;
1471            uint64_t arg1Bits = bits(FpSrcReg1_uqw, sizeBits - 1, 0);
1472            uint64_t arg2Bits = bits(FpSrcReg2_uqw, sizeBits - 1, 0);
1473            if (size == 4) {
1474                floatInt fi;
1475                fi.i = arg1Bits;
1476                arg1 = fi.f;
1477                fi.i = arg2Bits;
1478                arg2 = fi.f;
1479            } else {
1480                doubleInt di;
1481                di.i = arg1Bits;
1482                arg1 = di.d;
1483                di.i = arg2Bits;
1484                arg2 = di.d;
1485            }
1486
1487            //               ZF PF CF
1488            // Unordered      1  1  1
1489            // Greater than   0  0  0
1490            // Less than      0  0  1
1491            // Equal          1  0  0
1492            //           OF = SF = AF = 0
1493            ccFlagBits = ccFlagBits & ~(OFBit | SFBit | AFBit |
1494                                        ZFBit | PFBit | CFBit);
1495            if (isnan(arg1) || isnan(arg2))
1496                ccFlagBits = ccFlagBits | (ZFBit | PFBit | CFBit);
1497            else if(arg1 < arg2)
1498                ccFlagBits = ccFlagBits | CFBit;
1499            else if(arg1 == arg2)
1500                ccFlagBits = ccFlagBits | ZFBit;
1501        '''
1502}};
1503