mediaop.isa revision 12234:78ece221f9f5
1// Copyright (c) 2009 The Regents of The University of Michigan
2// Copyright (c) 2015 Advanced Micro Devices, Inc.
3//
4// All rights reserved.
5//
6// Redistribution and use in source and binary forms, with or without
7// modification, are permitted provided that the following conditions are
8// met: redistributions of source code must retain the above copyright
9// notice, this list of conditions and the following disclaimer;
10// redistributions in binary form must reproduce the above copyright
11// notice, this list of conditions and the following disclaimer in the
12// documentation and/or other materials provided with the distribution;
13// neither the name of the copyright holders nor the names of its
14// contributors may be used to endorse or promote products derived from
15// this software without specific prior written permission.
16//
17// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
18// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
19// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
20// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
21// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
22// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
23// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
24// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
25// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
26// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
27// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
28//
29// Authors: Gabe Black
30
31def template MediaOpExecute {{
32        Fault %(class_name)s::execute(ExecContext *xc,
33                Trace::InstRecord *traceData) const
34        {
35            Fault fault = NoFault;
36
37            %(op_decl)s;
38            %(op_rd)s;
39
40            %(code)s;
41
42            //Write the resulting state to the execution context
43            if(fault == NoFault)
44            {
45                %(op_wb)s;
46            }
47            return fault;
48        }
49}};
50
51def template MediaOpRegDeclare {{
52    class %(class_name)s : public %(base_class)s
53    {
54      public:
55        %(class_name)s(ExtMachInst _machInst,
56                const char * instMnem, uint64_t setFlags,
57                InstRegIndex _src1, InstRegIndex _src2, InstRegIndex _dest,
58                uint8_t _srcSize, uint8_t _destSize, uint16_t _ext);
59
60        %(BasicExecDeclare)s
61    };
62}};
63
64def template MediaOpImmDeclare {{
65
66    class %(class_name)s : public %(base_class)s
67    {
68      public:
69        %(class_name)s(ExtMachInst _machInst,
70                const char * instMnem, uint64_t setFlags,
71                InstRegIndex _src1, uint16_t _imm8, InstRegIndex _dest,
72                uint8_t _srcSize, uint8_t _destSize, uint16_t _ext);
73
74        %(BasicExecDeclare)s
75    };
76}};
77
78def template MediaOpRegConstructor {{
79    %(class_name)s::%(class_name)s(
80            ExtMachInst machInst, const char * instMnem, uint64_t setFlags,
81            InstRegIndex _src1, InstRegIndex _src2, InstRegIndex _dest,
82            uint8_t _srcSize, uint8_t _destSize, uint16_t _ext) :
83        %(base_class)s(machInst, "%(mnemonic)s", instMnem, setFlags,
84                _src1, _src2, _dest, _srcSize, _destSize, _ext,
85                %(op_class)s)
86    {
87        %(constructor)s;
88    }
89}};
90
91def template MediaOpImmConstructor {{
92    %(class_name)s::%(class_name)s(
93            ExtMachInst machInst, const char * instMnem, uint64_t setFlags,
94            InstRegIndex _src1, uint16_t _imm8, InstRegIndex _dest,
95            uint8_t _srcSize, uint8_t _destSize, uint16_t _ext) :
96        %(base_class)s(machInst, "%(mnemonic)s", instMnem, setFlags,
97                _src1, _imm8, _dest, _srcSize, _destSize, _ext,
98                %(op_class)s)
99    {
100        %(constructor)s;
101    }
102}};
103
104let {{
105    # Make these empty strings so that concatenating onto
106    # them will always work.
107    header_output = ""
108    decoder_output = ""
109    exec_output = ""
110
111    immTemplates = (
112            MediaOpImmDeclare,
113            MediaOpImmConstructor,
114            MediaOpExecute)
115
116    regTemplates = (
117            MediaOpRegDeclare,
118            MediaOpRegConstructor,
119            MediaOpExecute)
120
121    class MediaOpMeta(type):
122        def buildCppClasses(self, name, Name, suffix, code):
123
124            # Globals to stick the output in
125            global header_output
126            global decoder_output
127            global exec_output
128
129            # If op2 is used anywhere, make register and immediate versions
130            # of this code.
131            matcher = re.compile(r"(?<!\w)(?P<prefix>s?)op2(?P<typeQual>_[^\W_]+)?")
132            match = matcher.search(code)
133            if match:
134                typeQual = ""
135                if match.group("typeQual"):
136                    typeQual = match.group("typeQual")
137                src2_name = "%sFpSrcReg2%s" % (match.group("prefix"), typeQual)
138                self.buildCppClasses(name, Name, suffix,
139                        matcher.sub(src2_name, code))
140                self.buildCppClasses(name + "i", Name, suffix + "Imm",
141                        matcher.sub("imm8", code))
142                return
143
144            base = "X86ISA::MediaOp"
145
146            # If imm8 shows up in the code, use the immediate templates, if
147            # not, hopefully the register ones will be correct.
148            matcher = re.compile("(?<!\w)imm8(?!\w)")
149            if matcher.search(code):
150                base += "Imm"
151                templates = immTemplates
152            else:
153                base += "Reg"
154                templates = regTemplates
155
156            # Get everything ready for the substitution
157            iop = InstObjParams(name, Name + suffix, base, {"code" : code})
158
159            # Generate the actual code (finally!)
160            header_output += templates[0].subst(iop)
161            decoder_output += templates[1].subst(iop)
162            exec_output += templates[2].subst(iop)
163
164
165        def __new__(mcls, Name, bases, dict):
166            abstract = False
167            name = Name.lower()
168            if "abstract" in dict:
169                abstract = dict['abstract']
170                del dict['abstract']
171
172            cls = super(MediaOpMeta, mcls).__new__(mcls, Name, bases, dict)
173            if not abstract:
174                cls.className = Name
175                cls.base_mnemonic = name
176                code = cls.code
177
178                # Set up the C++ classes
179                mcls.buildCppClasses(cls, name, Name, "", code)
180
181                # Hook into the microassembler dict
182                global microopClasses
183                microopClasses[name] = cls
184
185                # If op2 is used anywhere, make register and immediate versions
186                # of this code.
187                matcher = re.compile(r"op2(?P<typeQual>_[^\W_]+)?")
188                if matcher.search(code):
189                    microopClasses[name + 'i'] = cls
190            return cls
191
192
193    class MediaOp(X86Microop):
194        __metaclass__ = MediaOpMeta
195        # This class itself doesn't act as a microop
196        abstract = True
197
198        def __init__(self, dest, src1, op2,
199                size = None, destSize = None, srcSize = None, ext = None):
200            self.dest = dest
201            self.src1 = src1
202            self.op2 = op2
203            if size is not None:
204                self.srcSize = size
205                self.destSize = size
206            if srcSize is not None:
207                self.srcSize = srcSize
208            if destSize is not None:
209                self.destSize = destSize
210            if self.srcSize is None:
211                raise Exception, "Source size not set."
212            if self.destSize is None:
213                raise Exception, "Dest size not set."
214            if ext is None:
215                self.ext = 0
216            else:
217                self.ext = ext
218
219        def getAllocator(self, microFlags):
220            className = self.className
221            if self.mnemonic == self.base_mnemonic + 'i':
222                className += "Imm"
223            allocator = '''new %(class_name)s(machInst, macrocodeBlock,
224                    %(flags)s, %(src1)s, %(op2)s, %(dest)s,
225                    %(srcSize)s, %(destSize)s, %(ext)s)''' % {
226                "class_name" : className,
227                "flags" : self.microFlagsText(microFlags),
228                "src1" : self.src1, "op2" : self.op2,
229                "dest" : self.dest,
230                "srcSize" : self.srcSize,
231                "destSize" : self.destSize,
232                "ext" : self.ext}
233            return allocator
234
235    class Mov2int(MediaOp):
236        def __init__(self, dest, src1, src2 = 0, \
237                size = None, destSize = None, srcSize = None, ext = None):
238            super(Mov2int, self).__init__(dest, src1,\
239                    src2, size, destSize, srcSize, ext)
240        code = '''
241            int items = sizeof(FloatRegBits) / srcSize;
242            int offset = imm8;
243            if (bits(src1, 0) && (ext & 0x1))
244                offset -= items;
245            if (offset >= 0 && offset < items) {
246                uint64_t fpSrcReg1 =
247                    bits(FpSrcReg1_uqw,
248                            (offset + 1) * srcSize * 8 - 1,
249                            (offset + 0) * srcSize * 8);
250                DestReg = merge(0, fpSrcReg1, destSize);
251            } else {
252                DestReg = DestReg;
253            }
254        '''
255
256    class Mov2fp(MediaOp):
257        def __init__(self, dest, src1, src2 = 0, \
258                size = None, destSize = None, srcSize = None, ext = None):
259            super(Mov2fp, self).__init__(dest, src1,\
260                    src2, size, destSize, srcSize, ext)
261        code = '''
262            int items = sizeof(FloatRegBits) / destSize;
263            int offset = imm8;
264            if (bits(dest, 0) && (ext & 0x1))
265                offset -= items;
266            if (offset >= 0 && offset < items) {
267                uint64_t srcReg1 = pick(SrcReg1, 0, srcSize);
268                FpDestReg_uqw =
269                    insertBits(FpDestReg_uqw,
270                            (offset + 1) * destSize * 8 - 1,
271                            (offset + 0) * destSize * 8, srcReg1);
272            } else {
273                FpDestReg_uqw = FpDestReg_uqw;
274            }
275        '''
276
277    class Movsign(MediaOp):
278        def __init__(self, dest, src, \
279                size = None, destSize = None, srcSize = None, ext = None):
280            super(Movsign, self).__init__(dest, src,\
281                    "InstRegIndex(0)", size, destSize, srcSize, ext)
282        code = '''
283            int items = sizeof(FloatRegBits) / srcSize;
284            uint64_t result = 0;
285            int offset = (ext & 0x1) ? items : 0;
286            for (int i = 0; i < items; i++) {
287                uint64_t picked =
288                    bits(FpSrcReg1_uqw, (i + 1) * 8 * srcSize - 1);
289                result = insertBits(result, i + offset, i + offset, picked);
290            }
291            DestReg = DestReg | result;
292        '''
293
294    class Maskmov(MediaOp):
295        code = '''
296            assert(srcSize == destSize);
297            int size = srcSize;
298            int sizeBits = size * 8;
299            int items = numItems(size);
300            uint64_t result = FpDestReg_uqw;
301
302            for (int i = 0; i < items; i++) {
303                int hiIndex = (i + 1) * sizeBits - 1;
304                int loIndex = (i + 0) * sizeBits;
305                uint64_t arg1Bits = bits(FpSrcReg1_uqw, hiIndex, loIndex);
306                if (bits(FpSrcReg2_uqw, hiIndex))
307                    result = insertBits(result, hiIndex, loIndex, arg1Bits);
308            }
309            FpDestReg_uqw = result;
310        '''
311
312    class shuffle(MediaOp):
313        code = '''
314            assert(srcSize == destSize);
315            int size = srcSize;
316            int sizeBits = size * 8;
317            int items = sizeof(FloatRegBits) / size;
318            int options;
319            int optionBits;
320            if (size == 8) {
321                options = 2;
322                optionBits = 1;
323            } else {
324                options = 4;
325                optionBits = 2;
326            }
327
328            uint64_t result = 0;
329            uint8_t sel = ext;
330
331            for (int i = 0; i < items; i++) {
332                uint64_t resBits;
333                uint8_t lsel = sel & mask(optionBits);
334                if (lsel * size >= sizeof(FloatRegBits)) {
335                    lsel -= options / 2;
336                    resBits = bits(FpSrcReg2_uqw,
337                            (lsel + 1) * sizeBits - 1,
338                            (lsel + 0) * sizeBits);
339                }  else {
340                    resBits = bits(FpSrcReg1_uqw,
341                            (lsel + 1) * sizeBits - 1,
342                            (lsel + 0) * sizeBits);
343                }
344
345                sel >>= optionBits;
346
347                int hiIndex = (i + 1) * sizeBits - 1;
348                int loIndex = (i + 0) * sizeBits;
349                result = insertBits(result, hiIndex, loIndex, resBits);
350            }
351            FpDestReg_uqw = result;
352        '''
353
354    class Unpack(MediaOp):
355        code = '''
356            assert(srcSize == destSize);
357            int size = destSize;
358            int items = (sizeof(FloatRegBits) / size) / 2;
359            int offset = ext ? items : 0;
360            uint64_t result = 0;
361            for (int i = 0; i < items; i++) {
362                uint64_t pickedLow =
363                    bits(FpSrcReg1_uqw, (i + offset + 1) * 8 * size - 1,
364                                        (i + offset) * 8 * size);
365                result = insertBits(result,
366                                    (2 * i + 1) * 8 * size - 1,
367                                    (2 * i + 0) * 8 * size,
368                                    pickedLow);
369                uint64_t pickedHigh =
370                    bits(FpSrcReg2_uqw, (i + offset + 1) * 8 * size - 1,
371                                        (i + offset) * 8 * size);
372                result = insertBits(result,
373                                    (2 * i + 2) * 8 * size - 1,
374                                    (2 * i + 1) * 8 * size,
375                                    pickedHigh);
376            }
377            FpDestReg_uqw = result;
378        '''
379
380    class Pack(MediaOp):
381        code = '''
382            assert(srcSize == destSize * 2);
383            int items = (sizeof(FloatRegBits) / destSize);
384            int destBits = destSize * 8;
385            int srcBits = srcSize * 8;
386            uint64_t result = 0;
387            int i;
388            for (i = 0; i < items / 2; i++) {
389                uint64_t picked =
390                    bits(FpSrcReg1_uqw, (i + 1) * srcBits - 1,
391                                        (i + 0) * srcBits);
392                unsigned signBit = bits(picked, srcBits - 1);
393                uint64_t overflow = bits(picked, srcBits - 1, destBits - 1);
394
395                // Handle saturation.
396                if (signBit) {
397                    if (overflow != mask(destBits - srcBits + 1)) {
398                        if (signedOp())
399                            picked = (ULL(1) << (destBits - 1));
400                        else
401                            picked = 0;
402                    }
403                } else {
404                    if (overflow != 0) {
405                        if (signedOp())
406                            picked = mask(destBits - 1);
407                        else
408                            picked = mask(destBits);
409                    }
410                }
411                result = insertBits(result,
412                                    (i + 1) * destBits - 1,
413                                    (i + 0) * destBits,
414                                    picked);
415            }
416            for (;i < items; i++) {
417                uint64_t picked =
418                    bits(FpSrcReg2_uqw, (i - items + 1) * srcBits - 1,
419                                        (i - items + 0) * srcBits);
420                unsigned signBit = bits(picked, srcBits - 1);
421                uint64_t overflow = bits(picked, srcBits - 1, destBits - 1);
422
423                // Handle saturation.
424                if (signBit) {
425                    if (overflow != mask(destBits - srcBits + 1)) {
426                        if (signedOp())
427                            picked = (ULL(1) << (destBits - 1));
428                        else
429                            picked = 0;
430                    }
431                } else {
432                    if (overflow != 0) {
433                        if (signedOp())
434                            picked = mask(destBits - 1);
435                        else
436                            picked = mask(destBits);
437                    }
438                }
439                result = insertBits(result,
440                                    (i + 1) * destBits - 1,
441                                    (i + 0) * destBits,
442                                    picked);
443            }
444            FpDestReg_uqw = result;
445        '''
446
447    class Mxor(MediaOp):
448        def __init__(self, dest, src1, src2):
449            super(Mxor, self).__init__(dest, src1, src2, 1)
450        code = '''
451            FpDestReg_uqw = FpSrcReg1_uqw ^ FpSrcReg2_uqw;
452        '''
453
454    class Mor(MediaOp):
455        def __init__(self, dest, src1, src2):
456            super(Mor, self).__init__(dest, src1, src2, 1)
457        code = '''
458            FpDestReg_uqw = FpSrcReg1_uqw | FpSrcReg2_uqw;
459        '''
460
461    class Mand(MediaOp):
462        def __init__(self, dest, src1, src2):
463            super(Mand, self).__init__(dest, src1, src2, 1)
464        code = '''
465            FpDestReg_uqw = FpSrcReg1_uqw & FpSrcReg2_uqw;
466        '''
467
468    class Mandn(MediaOp):
469        def __init__(self, dest, src1, src2):
470            super(Mandn, self).__init__(dest, src1, src2, 1)
471        code = '''
472            FpDestReg_uqw = ~FpSrcReg1_uqw & FpSrcReg2_uqw;
473        '''
474
475    class Mminf(MediaOp):
476        code = '''
477            union floatInt
478            {
479                float f;
480                uint32_t i;
481            };
482            union doubleInt
483            {
484                double d;
485                uint64_t i;
486            };
487
488            assert(srcSize == destSize);
489            int size = srcSize;
490            int sizeBits = size * 8;
491            assert(srcSize == 4 || srcSize == 8);
492            int items = numItems(size);
493            uint64_t result = FpDestReg_uqw;
494
495            for (int i = 0; i < items; i++) {
496                double arg1, arg2;
497                int hiIndex = (i + 1) * sizeBits - 1;
498                int loIndex = (i + 0) * sizeBits;
499                uint64_t arg1Bits = bits(FpSrcReg1_uqw, hiIndex, loIndex);
500                uint64_t arg2Bits = bits(FpSrcReg2_uqw, hiIndex, loIndex);
501
502                if (size == 4) {
503                    floatInt fi;
504                    fi.i = arg1Bits;
505                    arg1 = fi.f;
506                    fi.i = arg2Bits;
507                    arg2 = fi.f;
508                } else {
509                    doubleInt di;
510                    di.i = arg1Bits;
511                    arg1 = di.d;
512                    di.i = arg2Bits;
513                    arg2 = di.d;
514                }
515
516                if (arg1 < arg2) {
517                    result = insertBits(result, hiIndex, loIndex, arg1Bits);
518                } else {
519                    result = insertBits(result, hiIndex, loIndex, arg2Bits);
520                }
521            }
522            FpDestReg_uqw = result;
523        '''
524
525    class Mmaxf(MediaOp):
526        code = '''
527            union floatInt
528            {
529                float f;
530                uint32_t i;
531            };
532            union doubleInt
533            {
534                double d;
535                uint64_t i;
536            };
537
538            assert(srcSize == destSize);
539            int size = srcSize;
540            int sizeBits = size * 8;
541            assert(srcSize == 4 || srcSize == 8);
542            int items = numItems(size);
543            uint64_t result = FpDestReg_uqw;
544
545            for (int i = 0; i < items; i++) {
546                double arg1, arg2;
547                int hiIndex = (i + 1) * sizeBits - 1;
548                int loIndex = (i + 0) * sizeBits;
549                uint64_t arg1Bits = bits(FpSrcReg1_uqw, hiIndex, loIndex);
550                uint64_t arg2Bits = bits(FpSrcReg2_uqw, hiIndex, loIndex);
551
552                if (size == 4) {
553                    floatInt fi;
554                    fi.i = arg1Bits;
555                    arg1 = fi.f;
556                    fi.i = arg2Bits;
557                    arg2 = fi.f;
558                } else {
559                    doubleInt di;
560                    di.i = arg1Bits;
561                    arg1 = di.d;
562                    di.i = arg2Bits;
563                    arg2 = di.d;
564                }
565
566                if (arg1 > arg2) {
567                    result = insertBits(result, hiIndex, loIndex, arg1Bits);
568                } else {
569                    result = insertBits(result, hiIndex, loIndex, arg2Bits);
570                }
571            }
572            FpDestReg_uqw = result;
573        '''
574
575    class Mmini(MediaOp):
576        code = '''
577
578            assert(srcSize == destSize);
579            int size = srcSize;
580            int sizeBits = size * 8;
581            int items = numItems(size);
582            uint64_t result = FpDestReg_uqw;
583
584            for (int i = 0; i < items; i++) {
585                int hiIndex = (i + 1) * sizeBits - 1;
586                int loIndex = (i + 0) * sizeBits;
587                uint64_t arg1Bits = bits(FpSrcReg1_uqw, hiIndex, loIndex);
588                int64_t arg1 = arg1Bits |
589                    (0 - (arg1Bits & (ULL(1) << (sizeBits - 1))));
590                uint64_t arg2Bits = bits(FpSrcReg2_uqw, hiIndex, loIndex);
591                int64_t arg2 = arg2Bits |
592                    (0 - (arg2Bits & (ULL(1) << (sizeBits - 1))));
593                uint64_t resBits;
594
595                if (signedOp()) {
596                    if (arg1 < arg2) {
597                        resBits = arg1Bits;
598                    } else {
599                        resBits = arg2Bits;
600                    }
601                } else {
602                    if (arg1Bits < arg2Bits) {
603                        resBits = arg1Bits;
604                    } else {
605                        resBits = arg2Bits;
606                    }
607                }
608                result = insertBits(result, hiIndex, loIndex, resBits);
609            }
610            FpDestReg_uqw = result;
611        '''
612
613    class Mmaxi(MediaOp):
614        code = '''
615
616            assert(srcSize == destSize);
617            int size = srcSize;
618            int sizeBits = size * 8;
619            int items = numItems(size);
620            uint64_t result = FpDestReg_uqw;
621
622            for (int i = 0; i < items; i++) {
623                int hiIndex = (i + 1) * sizeBits - 1;
624                int loIndex = (i + 0) * sizeBits;
625                uint64_t arg1Bits = bits(FpSrcReg1_uqw, hiIndex, loIndex);
626                int64_t arg1 = arg1Bits |
627                    (0 - (arg1Bits & (ULL(1) << (sizeBits - 1))));
628                uint64_t arg2Bits = bits(FpSrcReg2_uqw, hiIndex, loIndex);
629                int64_t arg2 = arg2Bits |
630                    (0 - (arg2Bits & (ULL(1) << (sizeBits - 1))));
631                uint64_t resBits;
632
633                if (signedOp()) {
634                    if (arg1 > arg2) {
635                        resBits = arg1Bits;
636                    } else {
637                        resBits = arg2Bits;
638                    }
639                } else {
640                    if (arg1Bits > arg2Bits) {
641                        resBits = arg1Bits;
642                    } else {
643                        resBits = arg2Bits;
644                    }
645                }
646                result = insertBits(result, hiIndex, loIndex, resBits);
647            }
648            FpDestReg_uqw = result;
649        '''
650
651    class Msqrt(MediaOp):
652        def __init__(self, dest, src, \
653                size = None, destSize = None, srcSize = None, ext = None):
654            super(Msqrt, self).__init__(dest, src,\
655                    "InstRegIndex(0)", size, destSize, srcSize, ext)
656        code = '''
657            union floatInt
658            {
659                float f;
660                uint32_t i;
661            };
662            union doubleInt
663            {
664                double d;
665                uint64_t i;
666            };
667
668            assert(srcSize == destSize);
669            int size = srcSize;
670            int sizeBits = size * 8;
671            assert(srcSize == 4 || srcSize == 8);
672            int items = numItems(size);
673            uint64_t result = FpDestReg_uqw;
674
675            for (int i = 0; i < items; i++) {
676                int hiIndex = (i + 1) * sizeBits - 1;
677                int loIndex = (i + 0) * sizeBits;
678                uint64_t argBits = bits(FpSrcReg1_uqw, hiIndex, loIndex);
679
680                if (size == 4) {
681                    floatInt fi;
682                    fi.i = argBits;
683                    fi.f = sqrt(fi.f);
684                    argBits = fi.i;
685                } else {
686                    doubleInt di;
687                    di.i = argBits;
688                    di.d = sqrt(di.d);
689                    argBits = di.i;
690                }
691                result = insertBits(result, hiIndex, loIndex, argBits);
692            }
693            FpDestReg_uqw = result;
694        '''
695
696    # compute approximate reciprocal --- single-precision only
697    class Mrcp(MediaOp):
698        def __init__(self, dest, src, \
699                size = None, destSize = None, srcSize = None, ext = None):
700            super(Mrcp, self).__init__(dest, src,\
701                    "InstRegIndex(0)", size, destSize, srcSize, ext)
702        code = '''
703            union floatInt
704            {
705                float f;
706                uint32_t i;
707            };
708
709            assert(srcSize == 4);  // ISA defines single-precision only
710            assert(srcSize == destSize);
711            const int size = 4;
712            const int sizeBits = size * 8;
713            int items = numItems(size);
714            uint64_t result = FpDestReg_uqw;
715
716            for (int i = 0; i < items; i++) {
717                int hiIndex = (i + 1) * sizeBits - 1;
718                int loIndex = (i + 0) * sizeBits;
719                uint64_t argBits = bits(FpSrcReg1_uqw, hiIndex, loIndex);
720
721                floatInt fi;
722                fi.i = argBits;
723                // This is more accuracy than HW provides, but oh well
724                fi.f = 1.0 / fi.f;
725                argBits = fi.i;
726                result = insertBits(result, hiIndex, loIndex, argBits);
727            }
728            FpDestReg_uqw = result;
729        '''
730
731    class Maddf(MediaOp):
732        code = '''
733            union floatInt
734            {
735                float f;
736                uint32_t i;
737            };
738            union doubleInt
739            {
740                double d;
741                uint64_t i;
742            };
743
744            assert(srcSize == destSize);
745            int size = srcSize;
746            int sizeBits = size * 8;
747            assert(srcSize == 4 || srcSize == 8);
748            int items = numItems(size);
749            uint64_t result = FpDestReg_uqw;
750
751            for (int i = 0; i < items; i++) {
752                int hiIndex = (i + 1) * sizeBits - 1;
753                int loIndex = (i + 0) * sizeBits;
754                uint64_t arg1Bits = bits(FpSrcReg1_uqw, hiIndex, loIndex);
755                uint64_t arg2Bits = bits(FpSrcReg2_uqw, hiIndex, loIndex);
756                uint64_t resBits;
757
758                if (size == 4) {
759                    floatInt arg1, arg2, res;
760                    arg1.i = arg1Bits;
761                    arg2.i = arg2Bits;
762                    res.f = arg1.f + arg2.f;
763                    resBits = res.i;
764                } else {
765                    doubleInt arg1, arg2, res;
766                    arg1.i = arg1Bits;
767                    arg2.i = arg2Bits;
768                    res.d = arg1.d + arg2.d;
769                    resBits = res.i;
770                }
771
772                result = insertBits(result, hiIndex, loIndex, resBits);
773            }
774            FpDestReg_uqw = result;
775        '''
776
777    class Msubf(MediaOp):
778        code = '''
779            union floatInt
780            {
781                float f;
782                uint32_t i;
783            };
784            union doubleInt
785            {
786                double d;
787                uint64_t i;
788            };
789
790            assert(srcSize == destSize);
791            int size = srcSize;
792            int sizeBits = size * 8;
793            assert(srcSize == 4 || srcSize == 8);
794            int items = numItems(size);
795            uint64_t result = FpDestReg_uqw;
796
797            for (int i = 0; i < items; i++) {
798                int hiIndex = (i + 1) * sizeBits - 1;
799                int loIndex = (i + 0) * sizeBits;
800                uint64_t arg1Bits = bits(FpSrcReg1_uqw, hiIndex, loIndex);
801                uint64_t arg2Bits = bits(FpSrcReg2_uqw, hiIndex, loIndex);
802                uint64_t resBits;
803
804                if (size == 4) {
805                    floatInt arg1, arg2, res;
806                    arg1.i = arg1Bits;
807                    arg2.i = arg2Bits;
808                    res.f = arg1.f - arg2.f;
809                    resBits = res.i;
810                } else {
811                    doubleInt arg1, arg2, res;
812                    arg1.i = arg1Bits;
813                    arg2.i = arg2Bits;
814                    res.d = arg1.d - arg2.d;
815                    resBits = res.i;
816                }
817
818                result = insertBits(result, hiIndex, loIndex, resBits);
819            }
820            FpDestReg_uqw = result;
821        '''
822
823    class Mmulf(MediaOp):
824        code = '''
825            union floatInt
826            {
827                float f;
828                uint32_t i;
829            };
830            union doubleInt
831            {
832                double d;
833                uint64_t i;
834            };
835
836            assert(srcSize == destSize);
837            int size = srcSize;
838            int sizeBits = size * 8;
839            assert(srcSize == 4 || srcSize == 8);
840            int items = numItems(size);
841            uint64_t result = FpDestReg_uqw;
842
843            for (int i = 0; i < items; i++) {
844                int hiIndex = (i + 1) * sizeBits - 1;
845                int loIndex = (i + 0) * sizeBits;
846                uint64_t arg1Bits = bits(FpSrcReg1_uqw, hiIndex, loIndex);
847                uint64_t arg2Bits = bits(FpSrcReg2_uqw, hiIndex, loIndex);
848                uint64_t resBits;
849
850                if (size == 4) {
851                    floatInt arg1, arg2, res;
852                    arg1.i = arg1Bits;
853                    arg2.i = arg2Bits;
854                    res.f = arg1.f * arg2.f;
855                    resBits = res.i;
856                } else {
857                    doubleInt arg1, arg2, res;
858                    arg1.i = arg1Bits;
859                    arg2.i = arg2Bits;
860                    res.d = arg1.d * arg2.d;
861                    resBits = res.i;
862                }
863
864                result = insertBits(result, hiIndex, loIndex, resBits);
865            }
866            FpDestReg_uqw = result;
867        '''
868
869    class Mdivf(MediaOp):
870        code = '''
871            union floatInt
872            {
873                float f;
874                uint32_t i;
875            };
876            union doubleInt
877            {
878                double d;
879                uint64_t i;
880            };
881
882            assert(srcSize == destSize);
883            int size = srcSize;
884            int sizeBits = size * 8;
885            assert(srcSize == 4 || srcSize == 8);
886            int items = numItems(size);
887            uint64_t result = FpDestReg_uqw;
888
889            for (int i = 0; i < items; i++) {
890                int hiIndex = (i + 1) * sizeBits - 1;
891                int loIndex = (i + 0) * sizeBits;
892                uint64_t arg1Bits = bits(FpSrcReg1_uqw, hiIndex, loIndex);
893                uint64_t arg2Bits = bits(FpSrcReg2_uqw, hiIndex, loIndex);
894                uint64_t resBits;
895
896                if (size == 4) {
897                    floatInt arg1, arg2, res;
898                    arg1.i = arg1Bits;
899                    arg2.i = arg2Bits;
900                    res.f = arg1.f / arg2.f;
901                    resBits = res.i;
902                } else {
903                    doubleInt arg1, arg2, res;
904                    arg1.i = arg1Bits;
905                    arg2.i = arg2Bits;
906                    res.d = arg1.d / arg2.d;
907                    resBits = res.i;
908                }
909
910                result = insertBits(result, hiIndex, loIndex, resBits);
911            }
912            FpDestReg_uqw = result;
913        '''
914
915    class Maddi(MediaOp):
916        code = '''
917            assert(srcSize == destSize);
918            int size = srcSize;
919            int sizeBits = size * 8;
920            int items = numItems(size);
921            uint64_t result = FpDestReg_uqw;
922
923            for (int i = 0; i < items; i++) {
924                int hiIndex = (i + 1) * sizeBits - 1;
925                int loIndex = (i + 0) * sizeBits;
926                uint64_t arg1Bits = bits(FpSrcReg1_uqw, hiIndex, loIndex);
927                uint64_t arg2Bits = bits(FpSrcReg2_uqw, hiIndex, loIndex);
928                uint64_t resBits = arg1Bits + arg2Bits;
929
930                if (ext & 0x2) {
931                    if (signedOp()) {
932                        int arg1Sign = bits(arg1Bits, sizeBits - 1);
933                        int arg2Sign = bits(arg2Bits, sizeBits - 1);
934                        int resSign = bits(resBits, sizeBits - 1);
935                        if ((arg1Sign == arg2Sign) && (arg1Sign != resSign)) {
936                            if (resSign == 0)
937                                resBits = (ULL(1) << (sizeBits - 1));
938                            else
939                                resBits = mask(sizeBits - 1);
940                        }
941                    } else {
942                        if (findCarry(sizeBits, resBits, arg1Bits, arg2Bits))
943                            resBits = mask(sizeBits);
944                    }
945                }
946
947                result = insertBits(result, hiIndex, loIndex, resBits);
948            }
949            FpDestReg_uqw = result;
950        '''
951
952    class Msubi(MediaOp):
953        code = '''
954            assert(srcSize == destSize);
955            int size = srcSize;
956            int sizeBits = size * 8;
957            int items = numItems(size);
958            uint64_t result = FpDestReg_uqw;
959
960            for (int i = 0; i < items; i++) {
961                int hiIndex = (i + 1) * sizeBits - 1;
962                int loIndex = (i + 0) * sizeBits;
963                uint64_t arg1Bits = bits(FpSrcReg1_uqw, hiIndex, loIndex);
964                uint64_t arg2Bits = bits(FpSrcReg2_uqw, hiIndex, loIndex);
965                uint64_t resBits = arg1Bits - arg2Bits;
966
967                if (ext & 0x2) {
968                    if (signedOp()) {
969                        int arg1Sign = bits(arg1Bits, sizeBits - 1);
970                        int arg2Sign = !bits(arg2Bits, sizeBits - 1);
971                        int resSign = bits(resBits, sizeBits - 1);
972                        if ((arg1Sign == arg2Sign) && (arg1Sign != resSign)) {
973                            if (resSign == 0)
974                                resBits = (ULL(1) << (sizeBits - 1));
975                            else
976                                resBits = mask(sizeBits - 1);
977                        }
978                    } else {
979                        if (arg2Bits > arg1Bits) {
980                            resBits = 0;
981                        } else if (!findCarry(sizeBits, resBits,
982                                             arg1Bits, ~arg2Bits)) {
983                            resBits = mask(sizeBits);
984                        }
985                    }
986                }
987
988                result = insertBits(result, hiIndex, loIndex, resBits);
989            }
990            FpDestReg_uqw = result;
991        '''
992
993    class Mmuli(MediaOp):
994        code = '''
995            int srcBits = srcSize * 8;
996            int destBits = destSize * 8;
997            assert(destBits <= 64);
998            assert(destSize >= srcSize);
999            int items = numItems(destSize);
1000            uint64_t result = FpDestReg_uqw;
1001
1002            for (int i = 0; i < items; i++) {
1003                int offset = 0;
1004                if (ext & 16) {
1005                    if (ext & 32)
1006                        offset = i * (destBits - srcBits);
1007                    else
1008                        offset = i * (destBits - srcBits) + srcBits;
1009                }
1010                int srcHiIndex = (i + 1) * srcBits - 1 + offset;
1011                int srcLoIndex = (i + 0) * srcBits + offset;
1012                uint64_t arg1Bits = bits(FpSrcReg1_uqw, srcHiIndex, srcLoIndex);
1013                uint64_t arg2Bits = bits(FpSrcReg2_uqw, srcHiIndex, srcLoIndex);
1014                uint64_t resBits;
1015
1016                if (signedOp()) {
1017                    int64_t arg1 = arg1Bits |
1018                        (0 - (arg1Bits & (ULL(1) << (srcBits - 1))));
1019                    int64_t arg2 = arg2Bits |
1020                        (0 - (arg2Bits & (ULL(1) << (srcBits - 1))));
1021                    resBits = (uint64_t)(arg1 * arg2);
1022                } else {
1023                    resBits = arg1Bits * arg2Bits;
1024                }
1025
1026                if (ext & 0x4)
1027                    resBits += (ULL(1) << (destBits - 1));
1028
1029                if (multHi())
1030                    resBits >>= destBits;
1031
1032                int destHiIndex = (i + 1) * destBits - 1;
1033                int destLoIndex = (i + 0) * destBits;
1034                result = insertBits(result, destHiIndex, destLoIndex, resBits);
1035            }
1036            FpDestReg_uqw = result;
1037        '''
1038
1039    class Mavg(MediaOp):
1040        code = '''
1041            assert(srcSize == destSize);
1042            int size = srcSize;
1043            int sizeBits = size * 8;
1044            int items = numItems(size);
1045            uint64_t result = FpDestReg_uqw;
1046
1047            for (int i = 0; i < items; i++) {
1048                int hiIndex = (i + 1) * sizeBits - 1;
1049                int loIndex = (i + 0) * sizeBits;
1050                uint64_t arg1Bits = bits(FpSrcReg1_uqw, hiIndex, loIndex);
1051                uint64_t arg2Bits = bits(FpSrcReg2_uqw, hiIndex, loIndex);
1052                uint64_t resBits = (arg1Bits + arg2Bits + 1) / 2;
1053
1054                result = insertBits(result, hiIndex, loIndex, resBits);
1055            }
1056            FpDestReg_uqw = result;
1057        '''
1058
1059    class Msad(MediaOp):
1060        code = '''
1061            int srcBits = srcSize * 8;
1062            int items = sizeof(FloatRegBits) / srcSize;
1063
1064            uint64_t sum = 0;
1065            for (int i = 0; i < items; i++) {
1066                int hiIndex = (i + 1) * srcBits - 1;
1067                int loIndex = (i + 0) * srcBits;
1068                uint64_t arg1Bits = bits(FpSrcReg1_uqw, hiIndex, loIndex);
1069                uint64_t arg2Bits = bits(FpSrcReg2_uqw, hiIndex, loIndex);
1070                int64_t resBits = arg1Bits - arg2Bits;
1071                if (resBits < 0)
1072                    resBits = -resBits;
1073                sum += resBits;
1074            }
1075            FpDestReg_uqw = sum & mask(destSize * 8);
1076        '''
1077
1078    class Msrl(MediaOp):
1079        code = '''
1080
1081            assert(srcSize == destSize);
1082            int size = srcSize;
1083            int sizeBits = size * 8;
1084            int items = numItems(size);
1085            uint64_t shiftAmt = op2_uqw;
1086            uint64_t result = FpDestReg_uqw;
1087
1088            for (int i = 0; i < items; i++) {
1089                int hiIndex = (i + 1) * sizeBits - 1;
1090                int loIndex = (i + 0) * sizeBits;
1091                uint64_t arg1Bits = bits(FpSrcReg1_uqw, hiIndex, loIndex);
1092                uint64_t resBits;
1093                if (shiftAmt >= sizeBits) {
1094                    resBits = 0;
1095                } else {
1096                    resBits = (arg1Bits >> shiftAmt) &
1097                        mask(sizeBits - shiftAmt);
1098                }
1099
1100                result = insertBits(result, hiIndex, loIndex, resBits);
1101            }
1102            FpDestReg_uqw = result;
1103        '''
1104
1105    class Msra(MediaOp):
1106        code = '''
1107
1108            assert(srcSize == destSize);
1109            int size = srcSize;
1110            int sizeBits = size * 8;
1111            int items = numItems(size);
1112            uint64_t shiftAmt = op2_uqw;
1113            uint64_t result = FpDestReg_uqw;
1114
1115            for (int i = 0; i < items; i++) {
1116                int hiIndex = (i + 1) * sizeBits - 1;
1117                int loIndex = (i + 0) * sizeBits;
1118                uint64_t arg1Bits = bits(FpSrcReg1_uqw, hiIndex, loIndex);
1119                uint64_t resBits;
1120                if (shiftAmt >= sizeBits) {
1121                    if (bits(arg1Bits, sizeBits - 1))
1122                        resBits = mask(sizeBits);
1123                    else
1124                        resBits = 0;
1125                } else {
1126                    resBits = (arg1Bits >> shiftAmt);
1127                    resBits = resBits |
1128                        (0 - (resBits & (ULL(1) << (sizeBits - 1 - shiftAmt))));
1129                }
1130
1131                result = insertBits(result, hiIndex, loIndex, resBits);
1132            }
1133            FpDestReg_uqw = result;
1134        '''
1135
1136    class Msll(MediaOp):
1137        code = '''
1138
1139            assert(srcSize == destSize);
1140            int size = srcSize;
1141            int sizeBits = size * 8;
1142            int items = numItems(size);
1143            uint64_t shiftAmt = op2_uqw;
1144            uint64_t result = FpDestReg_uqw;
1145
1146            for (int i = 0; i < items; i++) {
1147                int hiIndex = (i + 1) * sizeBits - 1;
1148                int loIndex = (i + 0) * sizeBits;
1149                uint64_t arg1Bits = bits(FpSrcReg1_uqw, hiIndex, loIndex);
1150                uint64_t resBits;
1151                if (shiftAmt >= sizeBits) {
1152                    resBits = 0;
1153                } else {
1154                    resBits = (arg1Bits << shiftAmt);
1155                }
1156
1157                result = insertBits(result, hiIndex, loIndex, resBits);
1158            }
1159            FpDestReg_uqw = result;
1160        '''
1161
1162    class Cvtf2i(MediaOp):
1163        def __init__(self, dest, src, \
1164                size = None, destSize = None, srcSize = None, ext = None):
1165            super(Cvtf2i, self).__init__(dest, src,\
1166                    "InstRegIndex(0)", size, destSize, srcSize, ext)
1167        code = '''
1168            union floatInt
1169            {
1170                float f;
1171                uint32_t i;
1172            };
1173            union doubleInt
1174            {
1175                double d;
1176                uint64_t i;
1177            };
1178
1179            assert(destSize == 4 || destSize == 8);
1180            assert(srcSize == 4 || srcSize == 8);
1181            int srcSizeBits = srcSize * 8;
1182            int destSizeBits = destSize * 8;
1183            int items;
1184            int srcStart = 0;
1185            int destStart = 0;
1186            if (srcSize == 2 * destSize) {
1187                items = numItems(srcSize);
1188                if (ext & 0x2)
1189                    destStart = destSizeBits * items;
1190            } else if (destSize == 2 * srcSize) {
1191                items = numItems(destSize);
1192                if (ext & 0x2)
1193                    srcStart = srcSizeBits * items;
1194            } else {
1195                items = numItems(destSize);
1196            }
1197            uint64_t result = FpDestReg_uqw;
1198
1199            for (int i = 0; i < items; i++) {
1200                int srcHiIndex = srcStart + (i + 1) * srcSizeBits - 1;
1201                int srcLoIndex = srcStart + (i + 0) * srcSizeBits;
1202                uint64_t argBits = bits(FpSrcReg1_uqw, srcHiIndex, srcLoIndex);
1203                double arg;
1204
1205                if (srcSize == 4) {
1206                    floatInt fi;
1207                    fi.i = argBits;
1208                    arg = fi.f;
1209                } else {
1210                    doubleInt di;
1211                    di.i = argBits;
1212                    arg = di.d;
1213                }
1214
1215                if (ext & 0x4) {
1216                    if (arg >= 0)
1217                        arg += 0.5;
1218                    else
1219                        arg -= 0.5;
1220                }
1221
1222                if (destSize == 4) {
1223                    int32_t i_arg = (int32_t)arg;
1224                    argBits = *((uint32_t*)&i_arg);
1225                } else {
1226                    int64_t i_arg = (int64_t)arg;
1227                    argBits = *((uint64_t*)&i_arg);
1228                }
1229                int destHiIndex = destStart + (i + 1) * destSizeBits - 1;
1230                int destLoIndex = destStart + (i + 0) * destSizeBits;
1231                result = insertBits(result, destHiIndex, destLoIndex, argBits);
1232            }
1233            FpDestReg_uqw = result;
1234        '''
1235
1236    class Cvti2f(MediaOp):
1237        def __init__(self, dest, src, \
1238                size = None, destSize = None, srcSize = None, ext = None):
1239            super(Cvti2f, self).__init__(dest, src,\
1240                    "InstRegIndex(0)", size, destSize, srcSize, ext)
1241        code = '''
1242            union floatInt
1243            {
1244                float f;
1245                uint32_t i;
1246            };
1247            union doubleInt
1248            {
1249                double d;
1250                uint64_t i;
1251            };
1252
1253            assert(destSize == 4 || destSize == 8);
1254            assert(srcSize == 4 || srcSize == 8);
1255            int srcSizeBits = srcSize * 8;
1256            int destSizeBits = destSize * 8;
1257            int items;
1258            int srcStart = 0;
1259            int destStart = 0;
1260            if (srcSize == 2 * destSize) {
1261                items = numItems(srcSize);
1262                if (ext & 0x2)
1263                    destStart = destSizeBits * items;
1264            } else if (destSize == 2 * srcSize) {
1265                items = numItems(destSize);
1266                if (ext & 0x2)
1267                    srcStart = srcSizeBits * items;
1268            } else {
1269                items = numItems(destSize);
1270            }
1271            uint64_t result = FpDestReg_uqw;
1272
1273            for (int i = 0; i < items; i++) {
1274                int srcHiIndex = srcStart + (i + 1) * srcSizeBits - 1;
1275                int srcLoIndex = srcStart + (i + 0) * srcSizeBits;
1276                uint64_t argBits = bits(FpSrcReg1_uqw, srcHiIndex, srcLoIndex);
1277
1278                int64_t sArg = argBits |
1279                    (0 - (argBits & (ULL(1) << (srcSizeBits - 1))));
1280                double arg = sArg;
1281
1282                if (destSize == 4) {
1283                    floatInt fi;
1284                    fi.f = arg;
1285                    argBits = fi.i;
1286                } else {
1287                    doubleInt di;
1288                    di.d = arg;
1289                    argBits = di.i;
1290                }
1291                int destHiIndex = destStart + (i + 1) * destSizeBits - 1;
1292                int destLoIndex = destStart + (i + 0) * destSizeBits;
1293                result = insertBits(result, destHiIndex, destLoIndex, argBits);
1294            }
1295            FpDestReg_uqw = result;
1296        '''
1297
1298    class Cvtf2f(MediaOp):
1299        def __init__(self, dest, src, \
1300                size = None, destSize = None, srcSize = None, ext = None):
1301            super(Cvtf2f, self).__init__(dest, src,\
1302                    "InstRegIndex(0)", size, destSize, srcSize, ext)
1303        code = '''
1304            union floatInt
1305            {
1306                float f;
1307                uint32_t i;
1308            };
1309            union doubleInt
1310            {
1311                double d;
1312                uint64_t i;
1313            };
1314
1315            assert(destSize == 4 || destSize == 8);
1316            assert(srcSize == 4 || srcSize == 8);
1317            int srcSizeBits = srcSize * 8;
1318            int destSizeBits = destSize * 8;
1319            int items;
1320            int srcStart = 0;
1321            int destStart = 0;
1322            if (srcSize == 2 * destSize) {
1323                items = numItems(srcSize);
1324                if (ext & 0x2)
1325                    destStart = destSizeBits * items;
1326            } else if (destSize == 2 * srcSize) {
1327                items = numItems(destSize);
1328                if (ext & 0x2)
1329                    srcStart = srcSizeBits * items;
1330            } else {
1331                items = numItems(destSize);
1332            }
1333            uint64_t result = FpDestReg_uqw;
1334
1335            for (int i = 0; i < items; i++) {
1336                int srcHiIndex = srcStart + (i + 1) * srcSizeBits - 1;
1337                int srcLoIndex = srcStart + (i + 0) * srcSizeBits;
1338                uint64_t argBits = bits(FpSrcReg1_uqw, srcHiIndex, srcLoIndex);
1339                double arg;
1340
1341                if (srcSize == 4) {
1342                    floatInt fi;
1343                    fi.i = argBits;
1344                    arg = fi.f;
1345                } else {
1346                    doubleInt di;
1347                    di.i = argBits;
1348                    arg = di.d;
1349                }
1350                if (destSize == 4) {
1351                    floatInt fi;
1352                    fi.f = arg;
1353                    argBits = fi.i;
1354                } else {
1355                    doubleInt di;
1356                    di.d = arg;
1357                    argBits = di.i;
1358                }
1359                int destHiIndex = destStart + (i + 1) * destSizeBits - 1;
1360                int destLoIndex = destStart + (i + 0) * destSizeBits;
1361                result = insertBits(result, destHiIndex, destLoIndex, argBits);
1362            }
1363            FpDestReg_uqw = result;
1364        '''
1365
1366    class Mcmpi2r(MediaOp):
1367        code = '''
1368            union floatInt
1369            {
1370                float f;
1371                uint32_t i;
1372            };
1373            union doubleInt
1374            {
1375                double d;
1376                uint64_t i;
1377            };
1378
1379            assert(srcSize == destSize);
1380            int size = srcSize;
1381            int sizeBits = size * 8;
1382            int items = numItems(size);
1383            uint64_t result = FpDestReg_uqw;
1384
1385            for (int i = 0; i < items; i++) {
1386                int hiIndex = (i + 1) * sizeBits - 1;
1387                int loIndex = (i + 0) * sizeBits;
1388                uint64_t arg1Bits = bits(FpSrcReg1_uqw, hiIndex, loIndex);
1389                int64_t arg1 = arg1Bits |
1390                    (0 - (arg1Bits & (ULL(1) << (sizeBits - 1))));
1391                uint64_t arg2Bits = bits(FpSrcReg2_uqw, hiIndex, loIndex);
1392                int64_t arg2 = arg2Bits |
1393                    (0 - (arg2Bits & (ULL(1) << (sizeBits - 1))));
1394
1395                uint64_t resBits = 0;
1396                if (((ext & 0x2) == 0 && arg1 == arg2) ||
1397                    ((ext & 0x2) == 0x2 && arg1 > arg2))
1398                    resBits = mask(sizeBits);
1399
1400                result = insertBits(result, hiIndex, loIndex, resBits);
1401            }
1402            FpDestReg_uqw = result;
1403        '''
1404
1405    class Mcmpf2r(MediaOp):
1406        code = '''
1407            union floatInt
1408            {
1409                float f;
1410                uint32_t i;
1411            };
1412            union doubleInt
1413            {
1414                double d;
1415                uint64_t i;
1416            };
1417
1418            assert(srcSize == destSize);
1419            int size = srcSize;
1420            int sizeBits = size * 8;
1421            int items = numItems(size);
1422            uint64_t result = FpDestReg_uqw;
1423
1424            for (int i = 0; i < items; i++) {
1425                int hiIndex = (i + 1) * sizeBits - 1;
1426                int loIndex = (i + 0) * sizeBits;
1427                uint64_t arg1Bits = bits(FpSrcReg1_uqw, hiIndex, loIndex);
1428                uint64_t arg2Bits = bits(FpSrcReg2_uqw, hiIndex, loIndex);
1429                double arg1, arg2;
1430
1431                if (size == 4) {
1432                    floatInt fi;
1433                    fi.i = arg1Bits;
1434                    arg1 = fi.f;
1435                    fi.i = arg2Bits;
1436                    arg2 = fi.f;
1437                } else {
1438                    doubleInt di;
1439                    di.i = arg1Bits;
1440                    arg1 = di.d;
1441                    di.i = arg2Bits;
1442                    arg2 = di.d;
1443                }
1444
1445                uint64_t resBits = 0;
1446                bool nanop = std::isnan(arg1) || std::isnan(arg2);
1447                switch (ext & mask(3)) {
1448                  case 0:
1449                    if (arg1 == arg2 && !nanop)
1450                        resBits = mask(sizeBits);
1451                    break;
1452                  case 1:
1453                    if (arg1 < arg2 && !nanop)
1454                        resBits = mask(sizeBits);
1455                    break;
1456                  case 2:
1457                    if (arg1 <= arg2 && !nanop)
1458                        resBits = mask(sizeBits);
1459                    break;
1460                  case 3:
1461                    if (nanop)
1462                        resBits = mask(sizeBits);
1463                    break;
1464                  case 4:
1465                    if (arg1 != arg2 || nanop)
1466                        resBits = mask(sizeBits);
1467                    break;
1468                  case 5:
1469                    if (!(arg1 < arg2) || nanop)
1470                        resBits = mask(sizeBits);
1471                    break;
1472                  case 6:
1473                    if (!(arg1 <= arg2) || nanop)
1474                        resBits = mask(sizeBits);
1475                    break;
1476                  case 7:
1477                    if (!nanop)
1478                        resBits = mask(sizeBits);
1479                    break;
1480                };
1481
1482                result = insertBits(result, hiIndex, loIndex, resBits);
1483            }
1484            FpDestReg_uqw = result;
1485        '''
1486
1487    class Mcmpf2rf(MediaOp):
1488        def __init__(self, src1, src2,\
1489                size = None, destSize = None, srcSize = None, ext = None):
1490            super(Mcmpf2rf, self).__init__("InstRegIndex(0)", src1,\
1491                    src2, size, destSize, srcSize, ext)
1492        code = '''
1493            union floatInt
1494            {
1495                float f;
1496                uint32_t i;
1497            };
1498            union doubleInt
1499            {
1500                double d;
1501                uint64_t i;
1502            };
1503
1504            assert(srcSize == destSize);
1505            assert(srcSize == 4 || srcSize == 8);
1506            int size = srcSize;
1507            int sizeBits = size * 8;
1508
1509            double arg1, arg2;
1510            uint64_t arg1Bits = bits(FpSrcReg1_uqw, sizeBits - 1, 0);
1511            uint64_t arg2Bits = bits(FpSrcReg2_uqw, sizeBits - 1, 0);
1512            if (size == 4) {
1513                floatInt fi;
1514                fi.i = arg1Bits;
1515                arg1 = fi.f;
1516                fi.i = arg2Bits;
1517                arg2 = fi.f;
1518            } else {
1519                doubleInt di;
1520                di.i = arg1Bits;
1521                arg1 = di.d;
1522                di.i = arg2Bits;
1523                arg2 = di.d;
1524            }
1525
1526            //               ZF PF CF
1527            // Unordered      1  1  1
1528            // Greater than   0  0  0
1529            // Less than      0  0  1
1530            // Equal          1  0  0
1531            //           OF = SF = AF = 0
1532            ccFlagBits = ccFlagBits & ~(SFBit | AFBit | ZFBit | PFBit);
1533            cfofBits   = cfofBits   & ~(OFBit | CFBit);
1534
1535            if (std::isnan(arg1) || std::isnan(arg2)) {
1536                ccFlagBits = ccFlagBits | (ZFBit | PFBit);
1537                cfofBits = cfofBits | CFBit;
1538            }
1539            else if(arg1 < arg2)
1540                cfofBits = cfofBits | CFBit;
1541            else if(arg1 == arg2)
1542                ccFlagBits = ccFlagBits | ZFBit;
1543        '''
1544
1545    class Emms(MediaOp):
1546        def __init__(self):
1547            super(Emms, self).__init__('InstRegIndex(MISCREG_FTW)',
1548                    'InstRegIndex(0)', 'InstRegIndex(0)', 2)
1549        code = 'FTW = 0xFFFF;'
1550}};
1551