mediaop.isa revision 6799:36131e4dfb6e
1/// Copyright (c) 2009 The Regents of The University of Michigan
2// All rights reserved.
3//
4// Redistribution and use in source and binary forms, with or without
5// modification, are permitted provided that the following conditions are
6// met: redistributions of source code must retain the above copyright
7// notice, this list of conditions and the following disclaimer;
8// redistributions in binary form must reproduce the above copyright
9// notice, this list of conditions and the following disclaimer in the
10// documentation and/or other materials provided with the distribution;
11// neither the name of the copyright holders nor the names of its
12// contributors may be used to endorse or promote products derived from
13// this software without specific prior written permission.
14//
15// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
16// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
17// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
18// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
19// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
20// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
21// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
22// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
23// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
24// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
25// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
26//
27// Authors: Gabe Black
28
29def template MediaOpExecute {{
30        Fault %(class_name)s::execute(%(CPU_exec_context)s *xc,
31                Trace::InstRecord *traceData) const
32        {
33            Fault fault = NoFault;
34
35            %(op_decl)s;
36            %(op_rd)s;
37
38            %(code)s;
39
40            //Write the resulting state to the execution context
41            if(fault == NoFault)
42            {
43                %(op_wb)s;
44            }
45            return fault;
46        }
47}};
48
49def template MediaOpRegDeclare {{
50    class %(class_name)s : public %(base_class)s
51    {
52      protected:
53        void buildMe();
54
55      public:
56        %(class_name)s(ExtMachInst _machInst,
57                const char * instMnem,
58                bool isMicro, bool isDelayed, bool isFirst, bool isLast,
59                InstRegIndex _src1, InstRegIndex _src2, InstRegIndex _dest,
60                uint8_t _srcSize, uint8_t _destSize, uint16_t _ext);
61
62        %(class_name)s(ExtMachInst _machInst,
63                const char * instMnem,
64                InstRegIndex _src1, InstRegIndex _src2, InstRegIndex _dest,
65                uint8_t _srcSize, uint8_t _destSize, uint16_t _ext);
66
67        %(BasicExecDeclare)s
68    };
69}};
70
71def template MediaOpImmDeclare {{
72
73    class %(class_name)s : public %(base_class)s
74    {
75      protected:
76        void buildMe();
77
78      public:
79        %(class_name)s(ExtMachInst _machInst,
80                const char * instMnem,
81                bool isMicro, bool isDelayed, bool isFirst, bool isLast,
82                InstRegIndex _src1, uint16_t _imm8, InstRegIndex _dest,
83                uint8_t _srcSize, uint8_t _destSize, uint16_t _ext);
84
85        %(class_name)s(ExtMachInst _machInst,
86                const char * instMnem,
87                InstRegIndex _src1, uint16_t _imm8, InstRegIndex _dest,
88                uint8_t _srcSize, uint8_t _destSize, uint16_t _ext);
89
90        %(BasicExecDeclare)s
91    };
92}};
93
94def template MediaOpRegConstructor {{
95
96    inline void %(class_name)s::buildMe()
97    {
98        %(constructor)s;
99    }
100
101    inline %(class_name)s::%(class_name)s(
102            ExtMachInst machInst, const char * instMnem,
103            InstRegIndex _src1, InstRegIndex _src2, InstRegIndex _dest,
104            uint8_t _srcSize, uint8_t _destSize, uint16_t _ext) :
105        %(base_class)s(machInst, "%(mnemonic)s", instMnem,
106                false, false, false, false,
107                _src1, _src2, _dest, _srcSize, _destSize, _ext,
108                %(op_class)s)
109    {
110        buildMe();
111    }
112
113    inline %(class_name)s::%(class_name)s(
114            ExtMachInst machInst, const char * instMnem,
115            bool isMicro, bool isDelayed, bool isFirst, bool isLast,
116            InstRegIndex _src1, InstRegIndex _src2, InstRegIndex _dest,
117            uint8_t _srcSize, uint8_t _destSize, uint16_t _ext) :
118        %(base_class)s(machInst, "%(mnemonic)s", instMnem,
119                isMicro, isDelayed, isFirst, isLast,
120                _src1, _src2, _dest, _srcSize, _destSize, _ext,
121                %(op_class)s)
122    {
123        buildMe();
124    }
125}};
126
127def template MediaOpImmConstructor {{
128
129    inline void %(class_name)s::buildMe()
130    {
131        %(constructor)s;
132    }
133
134    inline %(class_name)s::%(class_name)s(
135            ExtMachInst machInst, const char * instMnem,
136            InstRegIndex _src1, uint16_t _imm8, InstRegIndex _dest,
137            uint8_t _srcSize, uint8_t _destSize, uint16_t _ext) :
138        %(base_class)s(machInst, "%(mnemonic)s", instMnem,
139                false, false, false, false,
140                _src1, _imm8, _dest, _srcSize, _destSize, _ext,
141                %(op_class)s)
142    {
143        buildMe();
144    }
145
146    inline %(class_name)s::%(class_name)s(
147            ExtMachInst machInst, const char * instMnem,
148            bool isMicro, bool isDelayed, bool isFirst, bool isLast,
149            InstRegIndex _src1, uint16_t _imm8, InstRegIndex _dest,
150            uint8_t _srcSize, uint8_t _destSize, uint16_t _ext) :
151        %(base_class)s(machInst, "%(mnemonic)s", instMnem,
152                isMicro, isDelayed, isFirst, isLast,
153                _src1, _imm8, _dest, _srcSize, _destSize, _ext,
154                %(op_class)s)
155    {
156        buildMe();
157    }
158}};
159
160let {{
161    # Make these empty strings so that concatenating onto
162    # them will always work.
163    header_output = ""
164    decoder_output = ""
165    exec_output = ""
166
167    immTemplates = (
168            MediaOpImmDeclare,
169            MediaOpImmConstructor,
170            MediaOpExecute)
171
172    regTemplates = (
173            MediaOpRegDeclare,
174            MediaOpRegConstructor,
175            MediaOpExecute)
176
177    class MediaOpMeta(type):
178        def buildCppClasses(self, name, Name, suffix, code):
179
180            # Globals to stick the output in
181            global header_output
182            global decoder_output
183            global exec_output
184
185            # If op2 is used anywhere, make register and immediate versions
186            # of this code.
187            matcher = re.compile("(?<!\\w)(?P<prefix>s?)op2(?P<typeQual>\\.\\w+)?")
188            match = matcher.search(code)
189            if match:
190                typeQual = ""
191                if match.group("typeQual"):
192                    typeQual = match.group("typeQual")
193                src2_name = "%sFpSrcReg2%s" % (match.group("prefix"), typeQual)
194                self.buildCppClasses(name, Name, suffix,
195                        matcher.sub(src2_name, code))
196                self.buildCppClasses(name + "i", Name, suffix + "Imm",
197                        matcher.sub("imm8", code))
198                return
199
200            base = "X86ISA::MediaOp"
201
202            # If imm8 shows up in the code, use the immediate templates, if
203            # not, hopefully the register ones will be correct.
204            matcher = re.compile("(?<!\w)imm8(?!\w)")
205            if matcher.search(code):
206                base += "Imm"
207                templates = immTemplates
208            else:
209                base += "Reg"
210                templates = regTemplates
211
212            # Get everything ready for the substitution
213            iop = InstObjParams(name, Name + suffix, base, {"code" : code})
214
215            # Generate the actual code (finally!)
216            header_output += templates[0].subst(iop)
217            decoder_output += templates[1].subst(iop)
218            exec_output += templates[2].subst(iop)
219
220
221        def __new__(mcls, Name, bases, dict):
222            abstract = False
223            name = Name.lower()
224            if "abstract" in dict:
225                abstract = dict['abstract']
226                del dict['abstract']
227
228            cls = super(MediaOpMeta, mcls).__new__(mcls, Name, bases, dict)
229            if not abstract:
230                cls.className = Name
231                cls.base_mnemonic = name
232                code = cls.code
233
234                # Set up the C++ classes
235                mcls.buildCppClasses(cls, name, Name, "", code)
236
237                # Hook into the microassembler dict
238                global microopClasses
239                microopClasses[name] = cls
240
241                # If op2 is used anywhere, make register and immediate versions
242                # of this code.
243                matcher = re.compile("op2(?P<typeQual>\\.\\w+)?")
244                if matcher.search(code):
245                    microopClasses[name + 'i'] = cls
246            return cls
247
248
249    class MediaOp(X86Microop):
250        __metaclass__ = MediaOpMeta
251        # This class itself doesn't act as a microop
252        abstract = True
253
254        def __init__(self, dest, src1, op2,
255                size = None, destSize = None, srcSize = None, ext = None):
256            self.dest = dest
257            self.src1 = src1
258            self.op2 = op2
259            if size is not None:
260                self.srcSize = size
261                self.destSize = size
262            if srcSize is not None:
263                self.srcSize = srcSize
264            if destSize is not None:
265                self.destSize = destSize
266            if self.srcSize is None:
267                raise Exception, "Source size not set."
268            if self.destSize is None:
269                raise Exception, "Dest size not set."
270            if ext is None:
271                self.ext = 0
272            else:
273                self.ext = ext 
274
275        def getAllocator(self, *microFlags):
276            className = self.className
277            if self.mnemonic == self.base_mnemonic + 'i':
278                className += "Imm"
279            allocator = '''new %(class_name)s(machInst, macrocodeBlock
280                    %(flags)s, %(src1)s, %(op2)s, %(dest)s,
281                    %(srcSize)s, %(destSize)s, %(ext)s)''' % {
282                "class_name" : className,
283                "flags" : self.microFlagsText(microFlags),
284                "src1" : self.src1, "op2" : self.op2,
285                "dest" : self.dest,
286                "srcSize" : self.srcSize,
287                "destSize" : self.destSize,
288                "ext" : self.ext}
289            return allocator
290
291    class Mov2int(MediaOp):
292        def __init__(self, dest, src1, src2 = 0, \
293                size = None, destSize = None, srcSize = None, ext = None):
294            super(Mov2int, self).__init__(dest, src1,\
295                    src2, size, destSize, srcSize, ext)
296        code = '''
297            int items = sizeof(FloatRegBits) / srcSize;
298            int offset = imm8;
299            if (bits(src1, 0) && (ext & 0x1))
300                offset -= items;
301            if (offset >= 0 && offset < items) {
302                uint64_t fpSrcReg1 =
303                    bits(FpSrcReg1.uqw,
304                            (offset + 1) * srcSize * 8 - 1,
305                            (offset + 0) * srcSize * 8);
306                DestReg = merge(0, fpSrcReg1, destSize);
307            } else {
308                DestReg = DestReg;
309            }
310        '''
311
312    class Mov2fp(MediaOp):
313        def __init__(self, dest, src1, src2 = 0, \
314                size = None, destSize = None, srcSize = None, ext = None):
315            super(Mov2fp, self).__init__(dest, src1,\
316                    src2, size, destSize, srcSize, ext)
317        code = '''
318            int items = sizeof(FloatRegBits) / destSize;
319            int offset = imm8;
320            if (bits(dest, 0) && (ext & 0x1))
321                offset -= items;
322            if (offset >= 0 && offset < items) {
323                uint64_t srcReg1 = pick(SrcReg1, 0, srcSize);
324                FpDestReg.uqw =
325                    insertBits(FpDestReg.uqw,
326                            (offset + 1) * destSize * 8 - 1,
327                            (offset + 0) * destSize * 8, srcReg1);
328            } else {
329                FpDestReg.uqw = FpDestReg.uqw;
330            }
331        '''
332
333    class Movsign(MediaOp):
334        def __init__(self, dest, src, \
335                size = None, destSize = None, srcSize = None, ext = None):
336            super(Movsign, self).__init__(dest, src,\
337                    "InstRegIndex(0)", size, destSize, srcSize, ext)
338        code = '''
339            int items = sizeof(FloatRegBits) / srcSize;
340            uint64_t result = 0;
341            int offset = (ext & 0x1) ? items : 0;
342            for (int i = 0; i < items; i++) {
343                uint64_t picked =
344                    bits(FpSrcReg1.uqw, (i + 1) * 8 * srcSize - 1);
345                result = insertBits(result, i + offset, i + offset, picked);
346            }
347            DestReg = DestReg | result;
348        '''
349
350    class Maskmov(MediaOp):
351        code = '''
352            assert(srcSize == destSize);
353            int size = srcSize;
354            int sizeBits = size * 8;
355            int items = numItems(size);
356            uint64_t result = FpDestReg.uqw;
357
358            for (int i = 0; i < items; i++) {
359                int hiIndex = (i + 1) * sizeBits - 1;
360                int loIndex = (i + 0) * sizeBits;
361                uint64_t arg1Bits = bits(FpSrcReg1.uqw, hiIndex, loIndex);
362                if (bits(FpSrcReg2.uqw, hiIndex))
363                    result = insertBits(result, hiIndex, loIndex, arg1Bits);
364            }
365            FpDestReg.uqw = result;
366        '''
367
368    class shuffle(MediaOp):
369        code = '''
370            assert(srcSize == destSize);
371            int size = srcSize;
372            int sizeBits = size * 8;
373            int items = sizeof(FloatRegBits) / size;
374            int options;
375            int optionBits;
376            if (size == 8) {
377                options = 2;
378                optionBits = 1;
379            } else {
380                options = 4;
381                optionBits = 2;
382            }
383
384            uint64_t result = 0;
385            uint8_t sel = ext;
386
387            for (int i = 0; i < items; i++) {
388                uint64_t resBits;
389                uint8_t lsel = sel & mask(optionBits);
390                if (lsel * size >= sizeof(FloatRegBits)) {
391                    lsel -= options / 2;
392                    resBits = bits(FpSrcReg2.uqw,
393                            (lsel + 1) * sizeBits - 1,
394                            (lsel + 0) * sizeBits);
395                }  else {
396                    resBits = bits(FpSrcReg1.uqw,
397                            (lsel + 1) * sizeBits - 1,
398                            (lsel + 0) * sizeBits);
399                }
400
401                sel >>= optionBits;
402
403                int hiIndex = (i + 1) * sizeBits - 1;
404                int loIndex = (i + 0) * sizeBits;
405                result = insertBits(result, hiIndex, loIndex, resBits);
406            }
407            FpDestReg.uqw = result;
408        '''
409
410    class Unpack(MediaOp):
411        code = '''
412            assert(srcSize == destSize);
413            int size = destSize;
414            int items = (sizeof(FloatRegBits) / size) / 2;
415            int offset = ext ? items : 0;
416            uint64_t result = 0;
417            for (int i = 0; i < items; i++) {
418                uint64_t pickedLow =
419                    bits(FpSrcReg1.uqw, (i + offset + 1) * 8 * size - 1,
420                                        (i + offset) * 8 * size);
421                result = insertBits(result,
422                                    (2 * i + 1) * 8 * size - 1,
423                                    (2 * i + 0) * 8 * size,
424                                    pickedLow);
425                uint64_t pickedHigh =
426                    bits(FpSrcReg2.uqw, (i + offset + 1) * 8 * size - 1,
427                                        (i + offset) * 8 * size);
428                result = insertBits(result,
429                                    (2 * i + 2) * 8 * size - 1,
430                                    (2 * i + 1) * 8 * size,
431                                    pickedHigh);
432            }
433            FpDestReg.uqw = result;
434        '''
435
436    class Pack(MediaOp):
437        code = '''
438            assert(srcSize == destSize * 2);
439            int items = (sizeof(FloatRegBits) / destSize);
440            int destBits = destSize * 8;
441            int srcBits = srcSize * 8;
442            uint64_t result = 0;
443            int i;
444            for (i = 0; i < items / 2; i++) {
445                uint64_t picked =
446                    bits(FpSrcReg1.uqw, (i + 1) * srcBits - 1,
447                                        (i + 0) * srcBits);
448                unsigned signBit = bits(picked, srcBits - 1);
449                uint64_t overflow = bits(picked, srcBits - 1, destBits - 1);
450
451                // Handle saturation.
452                if (signBit) {
453                    if (overflow != mask(destBits - srcBits + 1)) {
454                        if (ext & 0x1)
455                            picked = (ULL(1) << (destBits - 1));
456                        else
457                            picked = 0;
458                    }
459                } else {
460                    if (overflow != 0) {
461                        if (ext & 0x1)
462                            picked = mask(destBits - 1);
463                        else
464                            picked = mask(destBits);
465                    }
466                }
467                result = insertBits(result,
468                                    (i + 1) * destBits - 1,
469                                    (i + 0) * destBits,
470                                    picked);
471            }
472            for (;i < items; i++) {
473                uint64_t picked =
474                    bits(FpSrcReg2.uqw, (i - items + 1) * srcBits - 1,
475                                        (i - items + 0) * srcBits);
476                unsigned signBit = bits(picked, srcBits - 1);
477                uint64_t overflow = bits(picked, srcBits - 1, destBits - 1);
478
479                // Handle saturation.
480                if (signBit) {
481                    if (overflow != mask(destBits - srcBits + 1)) {
482                        if (ext & 0x1)
483                            picked = (ULL(1) << (destBits - 1));
484                        else
485                            picked = 0;
486                    }
487                } else {
488                    if (overflow != 0) {
489                        if (ext & 0x1)
490                            picked = mask(destBits - 1);
491                        else
492                            picked = mask(destBits);
493                    }
494                }
495                result = insertBits(result,
496                                    (i + 1) * destBits - 1,
497                                    (i + 0) * destBits,
498                                    picked);
499            }
500            FpDestReg.uqw = result;
501        '''
502
503    class Mxor(MediaOp):
504        def __init__(self, dest, src1, src2):
505            super(Mxor, self).__init__(dest, src1, src2, 1)
506        code = '''
507            FpDestReg.uqw = FpSrcReg1.uqw ^ FpSrcReg2.uqw;
508        '''
509
510    class Mor(MediaOp):
511        def __init__(self, dest, src1, src2):
512            super(Mor, self).__init__(dest, src1, src2, 1)
513        code = '''
514            FpDestReg.uqw = FpSrcReg1.uqw | FpSrcReg2.uqw;
515        '''
516
517    class Mand(MediaOp):
518        def __init__(self, dest, src1, src2):
519            super(Mand, self).__init__(dest, src1, src2, 1)
520        code = '''
521            FpDestReg.uqw = FpSrcReg1.uqw & FpSrcReg2.uqw;
522        '''
523
524    class Mandn(MediaOp):
525        def __init__(self, dest, src1, src2):
526            super(Mandn, self).__init__(dest, src1, src2, 1)
527        code = '''
528            FpDestReg.uqw = ~FpSrcReg1.uqw & FpSrcReg2.uqw;
529        '''
530
531    class Mminf(MediaOp):
532        code = '''
533            union floatInt
534            {
535                float f;
536                uint32_t i;
537            };
538            union doubleInt
539            {
540                double d;
541                uint64_t i;
542            };
543
544            assert(srcSize == destSize);
545            int size = srcSize;
546            int sizeBits = size * 8;
547            assert(srcSize == 4 || srcSize == 8);
548            int items = numItems(size);
549            uint64_t result = FpDestReg.uqw;
550
551            for (int i = 0; i < items; i++) {
552                double arg1, arg2;
553                int hiIndex = (i + 1) * sizeBits - 1;
554                int loIndex = (i + 0) * sizeBits;
555                uint64_t arg1Bits = bits(FpSrcReg1.uqw, hiIndex, loIndex);
556                uint64_t arg2Bits = bits(FpSrcReg2.uqw, hiIndex, loIndex);
557
558                if (size == 4) {
559                    floatInt fi;
560                    fi.i = arg1Bits;
561                    arg1 = fi.f;
562                    fi.i = arg2Bits;
563                    arg2 = fi.f;
564                } else {
565                    doubleInt di;
566                    di.i = arg1Bits;
567                    arg1 = di.d;
568                    di.i = arg2Bits;
569                    arg2 = di.d;
570                }
571
572                if (arg1 < arg2) {
573                    result = insertBits(result, hiIndex, loIndex, arg1Bits);
574                } else {
575                    result = insertBits(result, hiIndex, loIndex, arg2Bits);
576                }
577            }
578            FpDestReg.uqw = result;
579        '''
580
581    class Mmaxf(MediaOp):
582        code = '''
583            union floatInt
584            {
585                float f;
586                uint32_t i;
587            };
588            union doubleInt
589            {
590                double d;
591                uint64_t i;
592            };
593
594            assert(srcSize == destSize);
595            int size = srcSize;
596            int sizeBits = size * 8;
597            assert(srcSize == 4 || srcSize == 8);
598            int items = numItems(size);
599            uint64_t result = FpDestReg.uqw;
600
601            for (int i = 0; i < items; i++) {
602                double arg1, arg2;
603                int hiIndex = (i + 1) * sizeBits - 1;
604                int loIndex = (i + 0) * sizeBits;
605                uint64_t arg1Bits = bits(FpSrcReg1.uqw, hiIndex, loIndex);
606                uint64_t arg2Bits = bits(FpSrcReg2.uqw, hiIndex, loIndex);
607
608                if (size == 4) {
609                    floatInt fi;
610                    fi.i = arg1Bits;
611                    arg1 = fi.f;
612                    fi.i = arg2Bits;
613                    arg2 = fi.f;
614                } else {
615                    doubleInt di;
616                    di.i = arg1Bits;
617                    arg1 = di.d;
618                    di.i = arg2Bits;
619                    arg2 = di.d;
620                }
621
622                if (arg1 > arg2) {
623                    result = insertBits(result, hiIndex, loIndex, arg1Bits);
624                } else {
625                    result = insertBits(result, hiIndex, loIndex, arg2Bits);
626                }
627            }
628            FpDestReg.uqw = result;
629        '''
630
631    class Mmini(MediaOp):
632        code = '''
633
634            assert(srcSize == destSize);
635            int size = srcSize;
636            int sizeBits = size * 8;
637            int items = numItems(size);
638            uint64_t result = FpDestReg.uqw;
639
640            for (int i = 0; i < items; i++) {
641                int hiIndex = (i + 1) * sizeBits - 1;
642                int loIndex = (i + 0) * sizeBits;
643                uint64_t arg1Bits = bits(FpSrcReg1.uqw, hiIndex, loIndex);
644                int64_t arg1 = arg1Bits |
645                    (0 - (arg1Bits & (ULL(1) << (sizeBits - 1))));
646                uint64_t arg2Bits = bits(FpSrcReg2.uqw, hiIndex, loIndex);
647                int64_t arg2 = arg2Bits |
648                    (0 - (arg2Bits & (ULL(1) << (sizeBits - 1))));
649                uint64_t resBits;
650
651                if (ext & 0x2) {
652                    if (arg1 < arg2) {
653                        resBits = arg1Bits;
654                    } else {
655                        resBits = arg2Bits;
656                    }
657                } else {
658                    if (arg1Bits < arg2Bits) {
659                        resBits = arg1Bits;
660                    } else {
661                        resBits = arg2Bits;
662                    }
663                }
664                result = insertBits(result, hiIndex, loIndex, resBits);
665            }
666            FpDestReg.uqw = result;
667        '''
668
669    class Mmaxi(MediaOp):
670        code = '''
671
672            assert(srcSize == destSize);
673            int size = srcSize;
674            int sizeBits = size * 8;
675            int items = numItems(size);
676            uint64_t result = FpDestReg.uqw;
677
678            for (int i = 0; i < items; i++) {
679                int hiIndex = (i + 1) * sizeBits - 1;
680                int loIndex = (i + 0) * sizeBits;
681                uint64_t arg1Bits = bits(FpSrcReg1.uqw, hiIndex, loIndex);
682                int64_t arg1 = arg1Bits |
683                    (0 - (arg1Bits & (ULL(1) << (sizeBits - 1))));
684                uint64_t arg2Bits = bits(FpSrcReg2.uqw, hiIndex, loIndex);
685                int64_t arg2 = arg2Bits |
686                    (0 - (arg2Bits & (ULL(1) << (sizeBits - 1))));
687                uint64_t resBits;
688
689                if (ext & 0x2) {
690                    if (arg1 > arg2) {
691                        resBits = arg1Bits;
692                    } else {
693                        resBits = arg2Bits;
694                    }
695                } else {
696                    if (arg1Bits > arg2Bits) {
697                        resBits = arg1Bits;
698                    } else {
699                        resBits = arg2Bits;
700                    }
701                }
702                result = insertBits(result, hiIndex, loIndex, resBits);
703            }
704            FpDestReg.uqw = result;
705        '''
706
707    class Msqrt(MediaOp):
708        def __init__(self, dest, src, \
709                size = None, destSize = None, srcSize = None, ext = None):
710            super(Msqrt, self).__init__(dest, src,\
711                    "InstRegIndex(0)", size, destSize, srcSize, ext)
712        code = '''
713            union floatInt
714            {
715                float f;
716                uint32_t i;
717            };
718            union doubleInt
719            {
720                double d;
721                uint64_t i;
722            };
723
724            assert(srcSize == destSize);
725            int size = srcSize;
726            int sizeBits = size * 8;
727            assert(srcSize == 4 || srcSize == 8);
728            int items = numItems(size);
729            uint64_t result = FpDestReg.uqw;
730
731            for (int i = 0; i < items; i++) {
732                int hiIndex = (i + 1) * sizeBits - 1;
733                int loIndex = (i + 0) * sizeBits;
734                uint64_t argBits = bits(FpSrcReg1.uqw, hiIndex, loIndex);
735
736                if (size == 4) {
737                    floatInt fi;
738                    fi.i = argBits;
739                    fi.f = sqrt(fi.f);
740                    argBits = fi.i;
741                } else {
742                    doubleInt di;
743                    di.i = argBits;
744                    di.d = sqrt(di.d);
745                    argBits = di.i;
746                }
747                result = insertBits(result, hiIndex, loIndex, argBits);
748            }
749            FpDestReg.uqw = result;
750        '''
751
752    class Maddf(MediaOp):
753        code = '''
754            union floatInt
755            {
756                float f;
757                uint32_t i;
758            };
759            union doubleInt
760            {
761                double d;
762                uint64_t i;
763            };
764
765            assert(srcSize == destSize);
766            int size = srcSize;
767            int sizeBits = size * 8;
768            assert(srcSize == 4 || srcSize == 8);
769            int items = numItems(size);
770            uint64_t result = FpDestReg.uqw;
771
772            for (int i = 0; i < items; i++) {
773                int hiIndex = (i + 1) * sizeBits - 1;
774                int loIndex = (i + 0) * sizeBits;
775                uint64_t arg1Bits = bits(FpSrcReg1.uqw, hiIndex, loIndex);
776                uint64_t arg2Bits = bits(FpSrcReg2.uqw, hiIndex, loIndex);
777                uint64_t resBits;
778
779                if (size == 4) {
780                    floatInt arg1, arg2, res;
781                    arg1.i = arg1Bits;
782                    arg2.i = arg2Bits;
783                    res.f = arg1.f + arg2.f;
784                    resBits = res.i;
785                } else {
786                    doubleInt arg1, arg2, res;
787                    arg1.i = arg1Bits;
788                    arg2.i = arg2Bits;
789                    res.d = arg1.d + arg2.d;
790                    resBits = res.i;
791                }
792
793                result = insertBits(result, hiIndex, loIndex, resBits);
794            }
795            FpDestReg.uqw = result;
796        '''
797
798    class Msubf(MediaOp):
799        code = '''
800            union floatInt
801            {
802                float f;
803                uint32_t i;
804            };
805            union doubleInt
806            {
807                double d;
808                uint64_t i;
809            };
810
811            assert(srcSize == destSize);
812            int size = srcSize;
813            int sizeBits = size * 8;
814            assert(srcSize == 4 || srcSize == 8);
815            int items = numItems(size);
816            uint64_t result = FpDestReg.uqw;
817
818            for (int i = 0; i < items; i++) {
819                int hiIndex = (i + 1) * sizeBits - 1;
820                int loIndex = (i + 0) * sizeBits;
821                uint64_t arg1Bits = bits(FpSrcReg1.uqw, hiIndex, loIndex);
822                uint64_t arg2Bits = bits(FpSrcReg2.uqw, hiIndex, loIndex);
823                uint64_t resBits;
824
825                if (size == 4) {
826                    floatInt arg1, arg2, res;
827                    arg1.i = arg1Bits;
828                    arg2.i = arg2Bits;
829                    res.f = arg1.f - arg2.f;
830                    resBits = res.i;
831                } else {
832                    doubleInt arg1, arg2, res;
833                    arg1.i = arg1Bits;
834                    arg2.i = arg2Bits;
835                    res.d = arg1.d - arg2.d;
836                    resBits = res.i;
837                }
838
839                result = insertBits(result, hiIndex, loIndex, resBits);
840            }
841            FpDestReg.uqw = result;
842        '''
843
844    class Mmulf(MediaOp):
845        code = '''
846            union floatInt
847            {
848                float f;
849                uint32_t i;
850            };
851            union doubleInt
852            {
853                double d;
854                uint64_t i;
855            };
856
857            assert(srcSize == destSize);
858            int size = srcSize;
859            int sizeBits = size * 8;
860            assert(srcSize == 4 || srcSize == 8);
861            int items = numItems(size);
862            uint64_t result = FpDestReg.uqw;
863
864            for (int i = 0; i < items; i++) {
865                int hiIndex = (i + 1) * sizeBits - 1;
866                int loIndex = (i + 0) * sizeBits;
867                uint64_t arg1Bits = bits(FpSrcReg1.uqw, hiIndex, loIndex);
868                uint64_t arg2Bits = bits(FpSrcReg2.uqw, hiIndex, loIndex);
869                uint64_t resBits;
870
871                if (size == 4) {
872                    floatInt arg1, arg2, res;
873                    arg1.i = arg1Bits;
874                    arg2.i = arg2Bits;
875                    res.f = arg1.f * arg2.f;
876                    resBits = res.i;
877                } else {
878                    doubleInt arg1, arg2, res;
879                    arg1.i = arg1Bits;
880                    arg2.i = arg2Bits;
881                    res.d = arg1.d * arg2.d;
882                    resBits = res.i;
883                }
884
885                result = insertBits(result, hiIndex, loIndex, resBits);
886            }
887            FpDestReg.uqw = result;
888        '''
889
890    class Mdivf(MediaOp):
891        code = '''
892            union floatInt
893            {
894                float f;
895                uint32_t i;
896            };
897            union doubleInt
898            {
899                double d;
900                uint64_t i;
901            };
902
903            assert(srcSize == destSize);
904            int size = srcSize;
905            int sizeBits = size * 8;
906            assert(srcSize == 4 || srcSize == 8);
907            int items = numItems(size);
908            uint64_t result = FpDestReg.uqw;
909
910            for (int i = 0; i < items; i++) {
911                int hiIndex = (i + 1) * sizeBits - 1;
912                int loIndex = (i + 0) * sizeBits;
913                uint64_t arg1Bits = bits(FpSrcReg1.uqw, hiIndex, loIndex);
914                uint64_t arg2Bits = bits(FpSrcReg2.uqw, hiIndex, loIndex);
915                uint64_t resBits;
916
917                if (size == 4) {
918                    floatInt arg1, arg2, res;
919                    arg1.i = arg1Bits;
920                    arg2.i = arg2Bits;
921                    res.f = arg1.f / arg2.f;
922                    resBits = res.i;
923                } else {
924                    doubleInt arg1, arg2, res;
925                    arg1.i = arg1Bits;
926                    arg2.i = arg2Bits;
927                    res.d = arg1.d / arg2.d;
928                    resBits = res.i;
929                }
930
931                result = insertBits(result, hiIndex, loIndex, resBits);
932            }
933            FpDestReg.uqw = result;
934        '''
935
936    class Maddi(MediaOp):
937        code = '''
938            assert(srcSize == destSize);
939            int size = srcSize;
940            int sizeBits = size * 8;
941            int items = numItems(size);
942            uint64_t result = FpDestReg.uqw;
943
944            for (int i = 0; i < items; i++) {
945                int hiIndex = (i + 1) * sizeBits - 1;
946                int loIndex = (i + 0) * sizeBits;
947                uint64_t arg1Bits = bits(FpSrcReg1.uqw, hiIndex, loIndex);
948                uint64_t arg2Bits = bits(FpSrcReg2.uqw, hiIndex, loIndex);
949                uint64_t resBits = arg1Bits + arg2Bits;
950                
951                if (ext & 0x2) {
952                    if (findCarry(sizeBits, resBits, arg1Bits, arg2Bits))
953                        resBits = mask(sizeBits);
954                } else if (ext & 0x4) {
955                    int arg1Sign = bits(arg1Bits, sizeBits - 1);
956                    int arg2Sign = bits(arg2Bits, sizeBits - 1);
957                    int resSign = bits(resBits, sizeBits - 1);
958                    if ((arg1Sign == arg2Sign) && (arg1Sign != resSign)) {
959                        if (resSign == 0)
960                            resBits = (ULL(1) << (sizeBits - 1));
961                        else
962                            resBits = mask(sizeBits - 1);
963                    }
964                }
965
966                result = insertBits(result, hiIndex, loIndex, resBits);
967            }
968            FpDestReg.uqw = result;
969        '''
970
971    class Msubi(MediaOp):
972        code = '''
973            assert(srcSize == destSize);
974            int size = srcSize;
975            int sizeBits = size * 8;
976            int items = numItems(size);
977            uint64_t result = FpDestReg.uqw;
978
979            for (int i = 0; i < items; i++) {
980                int hiIndex = (i + 1) * sizeBits - 1;
981                int loIndex = (i + 0) * sizeBits;
982                uint64_t arg1Bits = bits(FpSrcReg1.uqw, hiIndex, loIndex);
983                uint64_t arg2Bits = bits(FpSrcReg2.uqw, hiIndex, loIndex);
984                uint64_t resBits = arg1Bits - arg2Bits;
985                
986                if (ext & 0x2) {
987                    if (arg2Bits > arg1Bits) {
988                        resBits = 0;
989                    } else if (!findCarry(sizeBits, resBits,
990                                         arg1Bits, ~arg2Bits)) {
991                        resBits = mask(sizeBits);
992                    }
993                } else if (ext & 0x4) {
994                    int arg1Sign = bits(arg1Bits, sizeBits - 1);
995                    int arg2Sign = !bits(arg2Bits, sizeBits - 1);
996                    int resSign = bits(resBits, sizeBits - 1);
997                    if ((arg1Sign == arg2Sign) && (arg1Sign != resSign)) {
998                        if (resSign == 0)
999                            resBits = (ULL(1) << (sizeBits - 1));
1000                        else
1001                            resBits = mask(sizeBits - 1);
1002                    }
1003                }
1004
1005                result = insertBits(result, hiIndex, loIndex, resBits);
1006            }
1007            FpDestReg.uqw = result;
1008        '''
1009
1010    class Mmuli(MediaOp):
1011        code = '''
1012            int srcBits = srcSize * 8;
1013            int destBits = destSize * 8;
1014            assert(destBits <= 64);
1015            assert(destSize >= srcSize);
1016            int items = numItems(destSize);
1017            uint64_t result = FpDestReg.uqw;
1018
1019            for (int i = 0; i < items; i++) {
1020                int offset = 0;
1021                if (ext & 16) {
1022                    if (ext & 32)
1023                        offset = i * (destBits - srcBits);
1024                    else
1025                        offset = i * (destBits - srcBits) + srcBits;
1026                }
1027                int srcHiIndex = (i + 1) * srcBits - 1 + offset;
1028                int srcLoIndex = (i + 0) * srcBits + offset;
1029                uint64_t arg1Bits = bits(FpSrcReg1.uqw, srcHiIndex, srcLoIndex);
1030                uint64_t arg2Bits = bits(FpSrcReg2.uqw, srcHiIndex, srcLoIndex);
1031                uint64_t resBits;
1032
1033                if (ext & 0x2) {
1034                    int64_t arg1 = arg1Bits |
1035                        (0 - (arg1Bits & (ULL(1) << (srcBits - 1))));
1036                    int64_t arg2 = arg2Bits |
1037                        (0 - (arg2Bits & (ULL(1) << (srcBits - 1))));
1038                    resBits = (uint64_t)(arg1 * arg2);
1039                } else {
1040                    resBits = arg1Bits * arg2Bits;
1041                }
1042
1043                if (ext & 0x4)
1044                    resBits += (ULL(1) << (destBits - 1));
1045                
1046                if (ext & 0x8)
1047                    resBits >>= destBits;
1048
1049                int destHiIndex = (i + 1) * destBits - 1;
1050                int destLoIndex = (i + 0) * destBits;
1051                result = insertBits(result, destHiIndex, destLoIndex, resBits);
1052            }
1053            FpDestReg.uqw = result;
1054        '''
1055
1056    class Mavg(MediaOp):
1057        code = '''
1058            assert(srcSize == destSize);
1059            int size = srcSize;
1060            int sizeBits = size * 8;
1061            int items = numItems(size);
1062            uint64_t result = FpDestReg.uqw;
1063
1064            for (int i = 0; i < items; i++) {
1065                int hiIndex = (i + 1) * sizeBits - 1;
1066                int loIndex = (i + 0) * sizeBits;
1067                uint64_t arg1Bits = bits(FpSrcReg1.uqw, hiIndex, loIndex);
1068                uint64_t arg2Bits = bits(FpSrcReg2.uqw, hiIndex, loIndex);
1069                uint64_t resBits = (arg1Bits + arg2Bits + 1) / 2;
1070                
1071                result = insertBits(result, hiIndex, loIndex, resBits);
1072            }
1073            FpDestReg.uqw = result;
1074        '''
1075
1076    class Msad(MediaOp):
1077        code = '''
1078            int srcBits = srcSize * 8;
1079            int items = sizeof(FloatRegBits) / srcSize;
1080
1081            uint64_t sum = 0;
1082            for (int i = 0; i < items; i++) {
1083                int hiIndex = (i + 1) * srcBits - 1;
1084                int loIndex = (i + 0) * srcBits;
1085                uint64_t arg1Bits = bits(FpSrcReg1.uqw, hiIndex, loIndex);
1086                uint64_t arg2Bits = bits(FpSrcReg2.uqw, hiIndex, loIndex);
1087                int64_t resBits = arg1Bits - arg2Bits;
1088                if (resBits < 0)
1089                    resBits = -resBits;
1090                sum += resBits;
1091            }
1092            FpDestReg.uqw = sum & mask(destSize * 8);
1093        '''
1094
1095    class Msrl(MediaOp):
1096        code = '''
1097
1098            assert(srcSize == destSize);
1099            int size = srcSize;
1100            int sizeBits = size * 8;
1101            int items = numItems(size);
1102            uint64_t shiftAmt = op2.uqw;
1103            uint64_t result = FpDestReg.uqw;
1104
1105            for (int i = 0; i < items; i++) {
1106                int hiIndex = (i + 1) * sizeBits - 1;
1107                int loIndex = (i + 0) * sizeBits;
1108                uint64_t arg1Bits = bits(FpSrcReg1.uqw, hiIndex, loIndex);
1109                uint64_t resBits;
1110                if (shiftAmt >= sizeBits) {
1111                    resBits = 0;
1112                } else {
1113                    resBits = (arg1Bits >> shiftAmt) &
1114                        mask(sizeBits - shiftAmt);
1115                }
1116
1117                result = insertBits(result, hiIndex, loIndex, resBits);
1118            }
1119            FpDestReg.uqw = result;
1120        '''
1121
1122    class Msra(MediaOp):
1123        code = '''
1124
1125            assert(srcSize == destSize);
1126            int size = srcSize;
1127            int sizeBits = size * 8;
1128            int items = numItems(size);
1129            uint64_t shiftAmt = op2.uqw;
1130            uint64_t result = FpDestReg.uqw;
1131
1132            for (int i = 0; i < items; i++) {
1133                int hiIndex = (i + 1) * sizeBits - 1;
1134                int loIndex = (i + 0) * sizeBits;
1135                uint64_t arg1Bits = bits(FpSrcReg1.uqw, hiIndex, loIndex);
1136                uint64_t resBits;
1137                if (shiftAmt >= sizeBits) {
1138                    if (bits(arg1Bits, sizeBits - 1))
1139                        resBits = mask(sizeBits);
1140                    else
1141                        resBits = 0;
1142                } else {
1143                    resBits = (arg1Bits >> shiftAmt);
1144                    resBits = resBits |
1145                        (0 - (resBits & (ULL(1) << (sizeBits - 1 - shiftAmt))));
1146                }
1147
1148                result = insertBits(result, hiIndex, loIndex, resBits);
1149            }
1150            FpDestReg.uqw = result;
1151        '''
1152
1153    class Msll(MediaOp):
1154        code = '''
1155
1156            assert(srcSize == destSize);
1157            int size = srcSize;
1158            int sizeBits = size * 8;
1159            int items = numItems(size);
1160            uint64_t shiftAmt = op2.uqw;
1161            uint64_t result = FpDestReg.uqw;
1162
1163            for (int i = 0; i < items; i++) {
1164                int hiIndex = (i + 1) * sizeBits - 1;
1165                int loIndex = (i + 0) * sizeBits;
1166                uint64_t arg1Bits = bits(FpSrcReg1.uqw, hiIndex, loIndex);
1167                uint64_t resBits;
1168                if (shiftAmt >= sizeBits) {
1169                    resBits = 0;
1170                } else {
1171                    resBits = (arg1Bits << shiftAmt);
1172                }
1173
1174                result = insertBits(result, hiIndex, loIndex, resBits);
1175            }
1176            FpDestReg.uqw = result;
1177        '''
1178
1179    class Cvtf2i(MediaOp):
1180        def __init__(self, dest, src, \
1181                size = None, destSize = None, srcSize = None, ext = None):
1182            super(Cvtf2i, self).__init__(dest, src,\
1183                    "InstRegIndex(0)", size, destSize, srcSize, ext)
1184        code = '''
1185            union floatInt
1186            {
1187                float f;
1188                uint32_t i;
1189            };
1190            union doubleInt
1191            {
1192                double d;
1193                uint64_t i;
1194            };
1195
1196            assert(destSize == 4 || destSize == 8);
1197            assert(srcSize == 4 || srcSize == 8);
1198            int srcSizeBits = srcSize * 8;
1199            int destSizeBits = destSize * 8;
1200            int items;
1201            int srcStart = 0;
1202            int destStart = 0;
1203            if (srcSize == 2 * destSize) {
1204                items = numItems(srcSize);
1205                if (ext & 0x2)
1206                    destStart = destSizeBits * items;
1207            } else if (destSize == 2 * srcSize) {
1208                items = numItems(destSize);
1209                if (ext & 0x2)
1210                    srcStart = srcSizeBits * items;
1211            } else {
1212                items = numItems(destSize);
1213            }
1214            uint64_t result = FpDestReg.uqw;
1215
1216            for (int i = 0; i < items; i++) {
1217                int srcHiIndex = srcStart + (i + 1) * srcSizeBits - 1;
1218                int srcLoIndex = srcStart + (i + 0) * srcSizeBits;
1219                uint64_t argBits = bits(FpSrcReg1.uqw, srcHiIndex, srcLoIndex);
1220                double arg;
1221
1222                if (srcSize == 4) {
1223                    floatInt fi;
1224                    fi.i = argBits;
1225                    arg = fi.f;
1226                } else {
1227                    doubleInt di;
1228                    di.i = argBits;
1229                    arg = di.d;
1230                }
1231
1232                if (ext & 0x4) {
1233                    if (arg >= 0)
1234                        arg += 0.5;
1235                    else
1236                        arg -= 0.5;
1237                }
1238
1239                if (destSize == 4) {
1240                    argBits = (uint32_t)arg;
1241                } else {
1242                    argBits = (uint64_t)arg;
1243                }
1244                int destHiIndex = destStart + (i + 1) * destSizeBits - 1;
1245                int destLoIndex = destStart + (i + 0) * destSizeBits;
1246                result = insertBits(result, destHiIndex, destLoIndex, argBits);
1247            }
1248            FpDestReg.uqw = result;
1249        '''
1250
1251    class Cvti2f(MediaOp):
1252        def __init__(self, dest, src, \
1253                size = None, destSize = None, srcSize = None, ext = None):
1254            super(Cvti2f, self).__init__(dest, src,\
1255                    "InstRegIndex(0)", size, destSize, srcSize, ext)
1256        code = '''
1257            union floatInt
1258            {
1259                float f;
1260                uint32_t i;
1261            };
1262            union doubleInt
1263            {
1264                double d;
1265                uint64_t i;
1266            };
1267
1268            assert(destSize == 4 || destSize == 8);
1269            assert(srcSize == 4 || srcSize == 8);
1270            int srcSizeBits = srcSize * 8;
1271            int destSizeBits = destSize * 8;
1272            int items;
1273            int srcStart = 0;
1274            int destStart = 0;
1275            if (srcSize == 2 * destSize) {
1276                items = numItems(srcSize);
1277                if (ext & 0x2)
1278                    destStart = destSizeBits * items;
1279            } else if (destSize == 2 * srcSize) {
1280                items = numItems(destSize);
1281                if (ext & 0x2)
1282                    srcStart = srcSizeBits * items;
1283            } else {
1284                items = numItems(destSize);
1285            }
1286            uint64_t result = FpDestReg.uqw;
1287
1288            for (int i = 0; i < items; i++) {
1289                int srcHiIndex = srcStart + (i + 1) * srcSizeBits - 1;
1290                int srcLoIndex = srcStart + (i + 0) * srcSizeBits;
1291                uint64_t argBits = bits(FpSrcReg1.uqw, srcHiIndex, srcLoIndex);
1292
1293                int64_t sArg = argBits | (0 - (argBits & (ULL(1) << srcHiIndex)));
1294                double arg = sArg;
1295
1296                if (destSize == 4) {
1297                    floatInt fi;
1298                    fi.f = arg;
1299                    argBits = fi.i;
1300                } else {
1301                    doubleInt di;
1302                    di.d = arg;
1303                    argBits = di.i;
1304                }
1305                int destHiIndex = destStart + (i + 1) * destSizeBits - 1;
1306                int destLoIndex = destStart + (i + 0) * destSizeBits;
1307                result = insertBits(result, destHiIndex, destLoIndex, argBits);
1308            }
1309            FpDestReg.uqw = result;
1310        '''
1311
1312    class Cvtf2f(MediaOp):
1313        def __init__(self, dest, src, \
1314                size = None, destSize = None, srcSize = None, ext = None):
1315            super(Cvtf2f, self).__init__(dest, src,\
1316                    "InstRegIndex(0)", size, destSize, srcSize, ext)
1317        code = '''
1318            union floatInt
1319            {
1320                float f;
1321                uint32_t i;
1322            };
1323            union doubleInt
1324            {
1325                double d;
1326                uint64_t i;
1327            };
1328
1329            assert(destSize == 4 || destSize == 8);
1330            assert(srcSize == 4 || srcSize == 8);
1331            int srcSizeBits = srcSize * 8;
1332            int destSizeBits = destSize * 8;
1333            int items;
1334            int srcStart = 0;
1335            int destStart = 0;
1336            if (srcSize == 2 * destSize) {
1337                items = numItems(srcSize);
1338                if (ext & 0x2)
1339                    destStart = destSizeBits * items;
1340            } else if (destSize == 2 * srcSize) {
1341                items = numItems(destSize);
1342                if (ext & 0x2)
1343                    srcStart = srcSizeBits * items;
1344            } else {
1345                items = numItems(destSize);
1346            }
1347            uint64_t result = FpDestReg.uqw;
1348
1349            for (int i = 0; i < items; i++) {
1350                int srcHiIndex = srcStart + (i + 1) * srcSizeBits - 1;
1351                int srcLoIndex = srcStart + (i + 0) * srcSizeBits;
1352                uint64_t argBits = bits(FpSrcReg1.uqw, srcHiIndex, srcLoIndex);
1353                double arg;
1354
1355                if (srcSize == 4) {
1356                    floatInt fi;
1357                    fi.i = argBits;
1358                    arg = fi.f;
1359                } else {
1360                    doubleInt di;
1361                    di.i = argBits;
1362                    arg = di.d;
1363                }
1364                if (destSize == 4) {
1365                    floatInt fi;
1366                    fi.f = arg;
1367                    argBits = fi.i;
1368                } else {
1369                    doubleInt di;
1370                    di.d = arg;
1371                    argBits = di.i;
1372                }
1373                int destHiIndex = destStart + (i + 1) * destSizeBits - 1;
1374                int destLoIndex = destStart + (i + 0) * destSizeBits;
1375                result = insertBits(result, destHiIndex, destLoIndex, argBits);
1376            }
1377            FpDestReg.uqw = result;
1378        '''
1379
1380    class Mcmpi2r(MediaOp):
1381        code = '''
1382            union floatInt
1383            {
1384                float f;
1385                uint32_t i;
1386            };
1387            union doubleInt
1388            {
1389                double d;
1390                uint64_t i;
1391            };
1392
1393            assert(srcSize == destSize);
1394            int size = srcSize;
1395            int sizeBits = size * 8;
1396            int items = numItems(size);
1397            uint64_t result = FpDestReg.uqw;
1398
1399            for (int i = 0; i < items; i++) {
1400                int hiIndex = (i + 1) * sizeBits - 1;
1401                int loIndex = (i + 0) * sizeBits;
1402                uint64_t arg1Bits = bits(FpSrcReg1.uqw, hiIndex, loIndex);
1403                int64_t arg1 = arg1Bits |
1404                    (0 - (arg1Bits & (ULL(1) << (sizeBits - 1))));
1405                uint64_t arg2Bits = bits(FpSrcReg2.uqw, hiIndex, loIndex);
1406                int64_t arg2 = arg2Bits |
1407                    (0 - (arg2Bits & (ULL(1) << (sizeBits - 1))));
1408
1409                uint64_t resBits = 0;
1410                if (((ext & 0x2) == 0 && arg1 == arg2) ||
1411                    ((ext & 0x2) == 0x2 && arg1 > arg2))
1412                    resBits = mask(sizeBits);
1413
1414                result = insertBits(result, hiIndex, loIndex, resBits);
1415            }
1416            FpDestReg.uqw = result;
1417        '''
1418
1419    class Mcmpf2r(MediaOp):
1420        code = '''
1421            union floatInt
1422            {
1423                float f;
1424                uint32_t i;
1425            };
1426            union doubleInt
1427            {
1428                double d;
1429                uint64_t i;
1430            };
1431
1432            assert(srcSize == destSize);
1433            int size = srcSize;
1434            int sizeBits = size * 8;
1435            int items = numItems(size);
1436            uint64_t result = FpDestReg.uqw;
1437
1438            for (int i = 0; i < items; i++) {
1439                int hiIndex = (i + 1) * sizeBits - 1;
1440                int loIndex = (i + 0) * sizeBits;
1441                uint64_t arg1Bits = bits(FpSrcReg1.uqw, hiIndex, loIndex);
1442                uint64_t arg2Bits = bits(FpSrcReg2.uqw, hiIndex, loIndex);
1443                double arg1, arg2;
1444
1445                if (size == 4) {
1446                    floatInt fi;
1447                    fi.i = arg1Bits;
1448                    arg1 = fi.f;
1449                    fi.i = arg2Bits;
1450                    arg2 = fi.f;
1451                } else {
1452                    doubleInt di;
1453                    di.i = arg1Bits;
1454                    arg1 = di.d;
1455                    di.i = arg2Bits;
1456                    arg2 = di.d;
1457                }
1458
1459                uint64_t resBits = 0;
1460                bool nanop = isnan(arg1) || isnan(arg2);
1461                switch (ext & mask(3)) {
1462                  case 0:
1463                    if (arg1 == arg2 && !nanop)
1464                        resBits = mask(sizeBits);
1465                    break;
1466                  case 1:
1467                    if (arg1 < arg2 && !nanop)
1468                        resBits = mask(sizeBits);
1469                    break;
1470                  case 2:
1471                    if (arg1 <= arg2 && !nanop)
1472                        resBits = mask(sizeBits);
1473                    break;
1474                  case 3:
1475                    if (nanop)
1476                        resBits = mask(sizeBits);
1477                    break;
1478                  case 4:
1479                    if (arg1 != arg2 || nanop)
1480                        resBits = mask(sizeBits);
1481                    break;
1482                  case 5:
1483                    if (!(arg1 < arg2) || nanop)
1484                        resBits = mask(sizeBits);
1485                    break;
1486                  case 6:
1487                    if (!(arg1 <= arg2) || nanop)
1488                        resBits = mask(sizeBits);
1489                    break;
1490                  case 7:
1491                    if (!nanop)
1492                        resBits = mask(sizeBits);
1493                    break;
1494                };
1495
1496                result = insertBits(result, hiIndex, loIndex, resBits);
1497            }
1498            FpDestReg.uqw = result;
1499        '''
1500
1501    class Mcmpf2rf(MediaOp):
1502        def __init__(self, src1, src2,\
1503                size = None, destSize = None, srcSize = None, ext = None):
1504            super(Mcmpf2rf, self).__init__("InstRegIndex(0)", src1,\
1505                    src2, size, destSize, srcSize, ext)
1506        code = '''
1507            union floatInt
1508            {
1509                float f;
1510                uint32_t i;
1511            };
1512            union doubleInt
1513            {
1514                double d;
1515                uint64_t i;
1516            };
1517
1518            assert(srcSize == destSize);
1519            assert(srcSize == 4 || srcSize == 8);
1520            int size = srcSize;
1521            int sizeBits = size * 8;
1522
1523            double arg1, arg2;
1524            uint64_t arg1Bits = bits(FpSrcReg1.uqw, sizeBits - 1, 0);
1525            uint64_t arg2Bits = bits(FpSrcReg2.uqw, sizeBits - 1, 0);
1526            if (size == 4) {
1527                floatInt fi;
1528                fi.i = arg1Bits;
1529                arg1 = fi.f;
1530                fi.i = arg2Bits;
1531                arg2 = fi.f;
1532            } else {
1533                doubleInt di;
1534                di.i = arg1Bits;
1535                arg1 = di.d;
1536                di.i = arg2Bits;
1537                arg2 = di.d;
1538            }
1539
1540            //               ZF PF CF
1541            // Unordered      1  1  1
1542            // Greater than   0  0  0
1543            // Less than      0  0  1
1544            // Equal          1  0  0
1545            //           OF = SF = AF = 0
1546            ccFlagBits = ccFlagBits & ~(OFBit | SFBit | AFBit |
1547                                        ZFBit | PFBit | CFBit);
1548            if (isnan(arg1) || isnan(arg2))
1549                ccFlagBits = ccFlagBits | (ZFBit | PFBit | CFBit);
1550            else if(arg1 < arg2)
1551                ccFlagBits = ccFlagBits | CFBit;
1552            else if(arg1 == arg2)
1553                ccFlagBits = ccFlagBits | ZFBit;
1554        '''
1555}};
1556