mediaop.isa revision 7620:3d8a23caa1ef
1/// Copyright (c) 2009 The Regents of The University of Michigan
2// All rights reserved.
3//
4// Redistribution and use in source and binary forms, with or without
5// modification, are permitted provided that the following conditions are
6// met: redistributions of source code must retain the above copyright
7// notice, this list of conditions and the following disclaimer;
8// redistributions in binary form must reproduce the above copyright
9// notice, this list of conditions and the following disclaimer in the
10// documentation and/or other materials provided with the distribution;
11// neither the name of the copyright holders nor the names of its
12// contributors may be used to endorse or promote products derived from
13// this software without specific prior written permission.
14//
15// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
16// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
17// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
18// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
19// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
20// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
21// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
22// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
23// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
24// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
25// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
26//
27// Authors: Gabe Black
28
29def template MediaOpExecute {{
30        Fault %(class_name)s::execute(%(CPU_exec_context)s *xc,
31                Trace::InstRecord *traceData) const
32        {
33            Fault fault = NoFault;
34
35            %(op_decl)s;
36            %(op_rd)s;
37
38            %(code)s;
39
40            //Write the resulting state to the execution context
41            if(fault == NoFault)
42            {
43                %(op_wb)s;
44            }
45            return fault;
46        }
47}};
48
49def template MediaOpRegDeclare {{
50    class %(class_name)s : public %(base_class)s
51    {
52      protected:
53        void buildMe();
54
55      public:
56        %(class_name)s(ExtMachInst _machInst,
57                const char * instMnem, uint64_t setFlags,
58                InstRegIndex _src1, InstRegIndex _src2, InstRegIndex _dest,
59                uint8_t _srcSize, uint8_t _destSize, uint16_t _ext);
60
61        %(class_name)s(ExtMachInst _machInst,
62                const char * instMnem,
63                InstRegIndex _src1, InstRegIndex _src2, InstRegIndex _dest,
64                uint8_t _srcSize, uint8_t _destSize, uint16_t _ext);
65
66        %(BasicExecDeclare)s
67    };
68}};
69
70def template MediaOpImmDeclare {{
71
72    class %(class_name)s : public %(base_class)s
73    {
74      protected:
75        void buildMe();
76
77      public:
78        %(class_name)s(ExtMachInst _machInst,
79                const char * instMnem, uint64_t setFlags,
80                InstRegIndex _src1, uint16_t _imm8, InstRegIndex _dest,
81                uint8_t _srcSize, uint8_t _destSize, uint16_t _ext);
82
83        %(class_name)s(ExtMachInst _machInst,
84                const char * instMnem,
85                InstRegIndex _src1, uint16_t _imm8, InstRegIndex _dest,
86                uint8_t _srcSize, uint8_t _destSize, uint16_t _ext);
87
88        %(BasicExecDeclare)s
89    };
90}};
91
92def template MediaOpRegConstructor {{
93
94    inline void %(class_name)s::buildMe()
95    {
96        %(constructor)s;
97    }
98
99    inline %(class_name)s::%(class_name)s(
100            ExtMachInst machInst, const char * instMnem,
101            InstRegIndex _src1, InstRegIndex _src2, InstRegIndex _dest,
102            uint8_t _srcSize, uint8_t _destSize, uint16_t _ext) :
103        %(base_class)s(machInst, "%(mnemonic)s", instMnem, 0,
104                _src1, _src2, _dest, _srcSize, _destSize, _ext,
105                %(op_class)s)
106    {
107        buildMe();
108    }
109
110    inline %(class_name)s::%(class_name)s(
111            ExtMachInst machInst, const char * instMnem, uint64_t setFlags,
112            InstRegIndex _src1, InstRegIndex _src2, InstRegIndex _dest,
113            uint8_t _srcSize, uint8_t _destSize, uint16_t _ext) :
114        %(base_class)s(machInst, "%(mnemonic)s", instMnem, setFlags,
115                _src1, _src2, _dest, _srcSize, _destSize, _ext,
116                %(op_class)s)
117    {
118        buildMe();
119    }
120}};
121
122def template MediaOpImmConstructor {{
123
124    inline void %(class_name)s::buildMe()
125    {
126        %(constructor)s;
127    }
128
129    inline %(class_name)s::%(class_name)s(
130            ExtMachInst machInst, const char * instMnem,
131            InstRegIndex _src1, uint16_t _imm8, InstRegIndex _dest,
132            uint8_t _srcSize, uint8_t _destSize, uint16_t _ext) :
133        %(base_class)s(machInst, "%(mnemonic)s", instMnem, 0,
134                _src1, _imm8, _dest, _srcSize, _destSize, _ext,
135                %(op_class)s)
136    {
137        buildMe();
138    }
139
140    inline %(class_name)s::%(class_name)s(
141            ExtMachInst machInst, const char * instMnem, uint64_t setFlags,
142            InstRegIndex _src1, uint16_t _imm8, InstRegIndex _dest,
143            uint8_t _srcSize, uint8_t _destSize, uint16_t _ext) :
144        %(base_class)s(machInst, "%(mnemonic)s", instMnem, setFlags,
145                _src1, _imm8, _dest, _srcSize, _destSize, _ext,
146                %(op_class)s)
147    {
148        buildMe();
149    }
150}};
151
152let {{
153    # Make these empty strings so that concatenating onto
154    # them will always work.
155    header_output = ""
156    decoder_output = ""
157    exec_output = ""
158
159    immTemplates = (
160            MediaOpImmDeclare,
161            MediaOpImmConstructor,
162            MediaOpExecute)
163
164    regTemplates = (
165            MediaOpRegDeclare,
166            MediaOpRegConstructor,
167            MediaOpExecute)
168
169    class MediaOpMeta(type):
170        def buildCppClasses(self, name, Name, suffix, code):
171
172            # Globals to stick the output in
173            global header_output
174            global decoder_output
175            global exec_output
176
177            # If op2 is used anywhere, make register and immediate versions
178            # of this code.
179            matcher = re.compile("(?<!\\w)(?P<prefix>s?)op2(?P<typeQual>\\.\\w+)?")
180            match = matcher.search(code)
181            if match:
182                typeQual = ""
183                if match.group("typeQual"):
184                    typeQual = match.group("typeQual")
185                src2_name = "%sFpSrcReg2%s" % (match.group("prefix"), typeQual)
186                self.buildCppClasses(name, Name, suffix,
187                        matcher.sub(src2_name, code))
188                self.buildCppClasses(name + "i", Name, suffix + "Imm",
189                        matcher.sub("imm8", code))
190                return
191
192            base = "X86ISA::MediaOp"
193
194            # If imm8 shows up in the code, use the immediate templates, if
195            # not, hopefully the register ones will be correct.
196            matcher = re.compile("(?<!\w)imm8(?!\w)")
197            if matcher.search(code):
198                base += "Imm"
199                templates = immTemplates
200            else:
201                base += "Reg"
202                templates = regTemplates
203
204            # Get everything ready for the substitution
205            iop = InstObjParams(name, Name + suffix, base, {"code" : code})
206
207            # Generate the actual code (finally!)
208            header_output += templates[0].subst(iop)
209            decoder_output += templates[1].subst(iop)
210            exec_output += templates[2].subst(iop)
211
212
213        def __new__(mcls, Name, bases, dict):
214            abstract = False
215            name = Name.lower()
216            if "abstract" in dict:
217                abstract = dict['abstract']
218                del dict['abstract']
219
220            cls = super(MediaOpMeta, mcls).__new__(mcls, Name, bases, dict)
221            if not abstract:
222                cls.className = Name
223                cls.base_mnemonic = name
224                code = cls.code
225
226                # Set up the C++ classes
227                mcls.buildCppClasses(cls, name, Name, "", code)
228
229                # Hook into the microassembler dict
230                global microopClasses
231                microopClasses[name] = cls
232
233                # If op2 is used anywhere, make register and immediate versions
234                # of this code.
235                matcher = re.compile("op2(?P<typeQual>\\.\\w+)?")
236                if matcher.search(code):
237                    microopClasses[name + 'i'] = cls
238            return cls
239
240
241    class MediaOp(X86Microop):
242        __metaclass__ = MediaOpMeta
243        # This class itself doesn't act as a microop
244        abstract = True
245
246        def __init__(self, dest, src1, op2,
247                size = None, destSize = None, srcSize = None, ext = None):
248            self.dest = dest
249            self.src1 = src1
250            self.op2 = op2
251            if size is not None:
252                self.srcSize = size
253                self.destSize = size
254            if srcSize is not None:
255                self.srcSize = srcSize
256            if destSize is not None:
257                self.destSize = destSize
258            if self.srcSize is None:
259                raise Exception, "Source size not set."
260            if self.destSize is None:
261                raise Exception, "Dest size not set."
262            if ext is None:
263                self.ext = 0
264            else:
265                self.ext = ext 
266
267        def getAllocator(self, microFlags):
268            className = self.className
269            if self.mnemonic == self.base_mnemonic + 'i':
270                className += "Imm"
271            allocator = '''new %(class_name)s(machInst, macrocodeBlock,
272                    %(flags)s, %(src1)s, %(op2)s, %(dest)s,
273                    %(srcSize)s, %(destSize)s, %(ext)s)''' % {
274                "class_name" : className,
275                "flags" : self.microFlagsText(microFlags),
276                "src1" : self.src1, "op2" : self.op2,
277                "dest" : self.dest,
278                "srcSize" : self.srcSize,
279                "destSize" : self.destSize,
280                "ext" : self.ext}
281            return allocator
282
283    class Mov2int(MediaOp):
284        def __init__(self, dest, src1, src2 = 0, \
285                size = None, destSize = None, srcSize = None, ext = None):
286            super(Mov2int, self).__init__(dest, src1,\
287                    src2, size, destSize, srcSize, ext)
288        code = '''
289            int items = sizeof(FloatRegBits) / srcSize;
290            int offset = imm8;
291            if (bits(src1, 0) && (ext & 0x1))
292                offset -= items;
293            if (offset >= 0 && offset < items) {
294                uint64_t fpSrcReg1 =
295                    bits(FpSrcReg1.uqw,
296                            (offset + 1) * srcSize * 8 - 1,
297                            (offset + 0) * srcSize * 8);
298                DestReg = merge(0, fpSrcReg1, destSize);
299            } else {
300                DestReg = DestReg;
301            }
302        '''
303
304    class Mov2fp(MediaOp):
305        def __init__(self, dest, src1, src2 = 0, \
306                size = None, destSize = None, srcSize = None, ext = None):
307            super(Mov2fp, self).__init__(dest, src1,\
308                    src2, size, destSize, srcSize, ext)
309        code = '''
310            int items = sizeof(FloatRegBits) / destSize;
311            int offset = imm8;
312            if (bits(dest, 0) && (ext & 0x1))
313                offset -= items;
314            if (offset >= 0 && offset < items) {
315                uint64_t srcReg1 = pick(SrcReg1, 0, srcSize);
316                FpDestReg.uqw =
317                    insertBits(FpDestReg.uqw,
318                            (offset + 1) * destSize * 8 - 1,
319                            (offset + 0) * destSize * 8, srcReg1);
320            } else {
321                FpDestReg.uqw = FpDestReg.uqw;
322            }
323        '''
324
325    class Movsign(MediaOp):
326        def __init__(self, dest, src, \
327                size = None, destSize = None, srcSize = None, ext = None):
328            super(Movsign, self).__init__(dest, src,\
329                    "InstRegIndex(0)", size, destSize, srcSize, ext)
330        code = '''
331            int items = sizeof(FloatRegBits) / srcSize;
332            uint64_t result = 0;
333            int offset = (ext & 0x1) ? items : 0;
334            for (int i = 0; i < items; i++) {
335                uint64_t picked =
336                    bits(FpSrcReg1.uqw, (i + 1) * 8 * srcSize - 1);
337                result = insertBits(result, i + offset, i + offset, picked);
338            }
339            DestReg = DestReg | result;
340        '''
341
342    class Maskmov(MediaOp):
343        code = '''
344            assert(srcSize == destSize);
345            int size = srcSize;
346            int sizeBits = size * 8;
347            int items = numItems(size);
348            uint64_t result = FpDestReg.uqw;
349
350            for (int i = 0; i < items; i++) {
351                int hiIndex = (i + 1) * sizeBits - 1;
352                int loIndex = (i + 0) * sizeBits;
353                uint64_t arg1Bits = bits(FpSrcReg1.uqw, hiIndex, loIndex);
354                if (bits(FpSrcReg2.uqw, hiIndex))
355                    result = insertBits(result, hiIndex, loIndex, arg1Bits);
356            }
357            FpDestReg.uqw = result;
358        '''
359
360    class shuffle(MediaOp):
361        code = '''
362            assert(srcSize == destSize);
363            int size = srcSize;
364            int sizeBits = size * 8;
365            int items = sizeof(FloatRegBits) / size;
366            int options;
367            int optionBits;
368            if (size == 8) {
369                options = 2;
370                optionBits = 1;
371            } else {
372                options = 4;
373                optionBits = 2;
374            }
375
376            uint64_t result = 0;
377            uint8_t sel = ext;
378
379            for (int i = 0; i < items; i++) {
380                uint64_t resBits;
381                uint8_t lsel = sel & mask(optionBits);
382                if (lsel * size >= sizeof(FloatRegBits)) {
383                    lsel -= options / 2;
384                    resBits = bits(FpSrcReg2.uqw,
385                            (lsel + 1) * sizeBits - 1,
386                            (lsel + 0) * sizeBits);
387                }  else {
388                    resBits = bits(FpSrcReg1.uqw,
389                            (lsel + 1) * sizeBits - 1,
390                            (lsel + 0) * sizeBits);
391                }
392
393                sel >>= optionBits;
394
395                int hiIndex = (i + 1) * sizeBits - 1;
396                int loIndex = (i + 0) * sizeBits;
397                result = insertBits(result, hiIndex, loIndex, resBits);
398            }
399            FpDestReg.uqw = result;
400        '''
401
402    class Unpack(MediaOp):
403        code = '''
404            assert(srcSize == destSize);
405            int size = destSize;
406            int items = (sizeof(FloatRegBits) / size) / 2;
407            int offset = ext ? items : 0;
408            uint64_t result = 0;
409            for (int i = 0; i < items; i++) {
410                uint64_t pickedLow =
411                    bits(FpSrcReg1.uqw, (i + offset + 1) * 8 * size - 1,
412                                        (i + offset) * 8 * size);
413                result = insertBits(result,
414                                    (2 * i + 1) * 8 * size - 1,
415                                    (2 * i + 0) * 8 * size,
416                                    pickedLow);
417                uint64_t pickedHigh =
418                    bits(FpSrcReg2.uqw, (i + offset + 1) * 8 * size - 1,
419                                        (i + offset) * 8 * size);
420                result = insertBits(result,
421                                    (2 * i + 2) * 8 * size - 1,
422                                    (2 * i + 1) * 8 * size,
423                                    pickedHigh);
424            }
425            FpDestReg.uqw = result;
426        '''
427
428    class Pack(MediaOp):
429        code = '''
430            assert(srcSize == destSize * 2);
431            int items = (sizeof(FloatRegBits) / destSize);
432            int destBits = destSize * 8;
433            int srcBits = srcSize * 8;
434            uint64_t result = 0;
435            int i;
436            for (i = 0; i < items / 2; i++) {
437                uint64_t picked =
438                    bits(FpSrcReg1.uqw, (i + 1) * srcBits - 1,
439                                        (i + 0) * srcBits);
440                unsigned signBit = bits(picked, srcBits - 1);
441                uint64_t overflow = bits(picked, srcBits - 1, destBits - 1);
442
443                // Handle saturation.
444                if (signBit) {
445                    if (overflow != mask(destBits - srcBits + 1)) {
446                        if (signedOp())
447                            picked = (ULL(1) << (destBits - 1));
448                        else
449                            picked = 0;
450                    }
451                } else {
452                    if (overflow != 0) {
453                        if (signedOp())
454                            picked = mask(destBits - 1);
455                        else
456                            picked = mask(destBits);
457                    }
458                }
459                result = insertBits(result,
460                                    (i + 1) * destBits - 1,
461                                    (i + 0) * destBits,
462                                    picked);
463            }
464            for (;i < items; i++) {
465                uint64_t picked =
466                    bits(FpSrcReg2.uqw, (i - items + 1) * srcBits - 1,
467                                        (i - items + 0) * srcBits);
468                unsigned signBit = bits(picked, srcBits - 1);
469                uint64_t overflow = bits(picked, srcBits - 1, destBits - 1);
470
471                // Handle saturation.
472                if (signBit) {
473                    if (overflow != mask(destBits - srcBits + 1)) {
474                        if (signedOp())
475                            picked = (ULL(1) << (destBits - 1));
476                        else
477                            picked = 0;
478                    }
479                } else {
480                    if (overflow != 0) {
481                        if (signedOp())
482                            picked = mask(destBits - 1);
483                        else
484                            picked = mask(destBits);
485                    }
486                }
487                result = insertBits(result,
488                                    (i + 1) * destBits - 1,
489                                    (i + 0) * destBits,
490                                    picked);
491            }
492            FpDestReg.uqw = result;
493        '''
494
495    class Mxor(MediaOp):
496        def __init__(self, dest, src1, src2):
497            super(Mxor, self).__init__(dest, src1, src2, 1)
498        code = '''
499            FpDestReg.uqw = FpSrcReg1.uqw ^ FpSrcReg2.uqw;
500        '''
501
502    class Mor(MediaOp):
503        def __init__(self, dest, src1, src2):
504            super(Mor, self).__init__(dest, src1, src2, 1)
505        code = '''
506            FpDestReg.uqw = FpSrcReg1.uqw | FpSrcReg2.uqw;
507        '''
508
509    class Mand(MediaOp):
510        def __init__(self, dest, src1, src2):
511            super(Mand, self).__init__(dest, src1, src2, 1)
512        code = '''
513            FpDestReg.uqw = FpSrcReg1.uqw & FpSrcReg2.uqw;
514        '''
515
516    class Mandn(MediaOp):
517        def __init__(self, dest, src1, src2):
518            super(Mandn, self).__init__(dest, src1, src2, 1)
519        code = '''
520            FpDestReg.uqw = ~FpSrcReg1.uqw & FpSrcReg2.uqw;
521        '''
522
523    class Mminf(MediaOp):
524        code = '''
525            union floatInt
526            {
527                float f;
528                uint32_t i;
529            };
530            union doubleInt
531            {
532                double d;
533                uint64_t i;
534            };
535
536            assert(srcSize == destSize);
537            int size = srcSize;
538            int sizeBits = size * 8;
539            assert(srcSize == 4 || srcSize == 8);
540            int items = numItems(size);
541            uint64_t result = FpDestReg.uqw;
542
543            for (int i = 0; i < items; i++) {
544                double arg1, arg2;
545                int hiIndex = (i + 1) * sizeBits - 1;
546                int loIndex = (i + 0) * sizeBits;
547                uint64_t arg1Bits = bits(FpSrcReg1.uqw, hiIndex, loIndex);
548                uint64_t arg2Bits = bits(FpSrcReg2.uqw, hiIndex, loIndex);
549
550                if (size == 4) {
551                    floatInt fi;
552                    fi.i = arg1Bits;
553                    arg1 = fi.f;
554                    fi.i = arg2Bits;
555                    arg2 = fi.f;
556                } else {
557                    doubleInt di;
558                    di.i = arg1Bits;
559                    arg1 = di.d;
560                    di.i = arg2Bits;
561                    arg2 = di.d;
562                }
563
564                if (arg1 < arg2) {
565                    result = insertBits(result, hiIndex, loIndex, arg1Bits);
566                } else {
567                    result = insertBits(result, hiIndex, loIndex, arg2Bits);
568                }
569            }
570            FpDestReg.uqw = result;
571        '''
572
573    class Mmaxf(MediaOp):
574        code = '''
575            union floatInt
576            {
577                float f;
578                uint32_t i;
579            };
580            union doubleInt
581            {
582                double d;
583                uint64_t i;
584            };
585
586            assert(srcSize == destSize);
587            int size = srcSize;
588            int sizeBits = size * 8;
589            assert(srcSize == 4 || srcSize == 8);
590            int items = numItems(size);
591            uint64_t result = FpDestReg.uqw;
592
593            for (int i = 0; i < items; i++) {
594                double arg1, arg2;
595                int hiIndex = (i + 1) * sizeBits - 1;
596                int loIndex = (i + 0) * sizeBits;
597                uint64_t arg1Bits = bits(FpSrcReg1.uqw, hiIndex, loIndex);
598                uint64_t arg2Bits = bits(FpSrcReg2.uqw, hiIndex, loIndex);
599
600                if (size == 4) {
601                    floatInt fi;
602                    fi.i = arg1Bits;
603                    arg1 = fi.f;
604                    fi.i = arg2Bits;
605                    arg2 = fi.f;
606                } else {
607                    doubleInt di;
608                    di.i = arg1Bits;
609                    arg1 = di.d;
610                    di.i = arg2Bits;
611                    arg2 = di.d;
612                }
613
614                if (arg1 > arg2) {
615                    result = insertBits(result, hiIndex, loIndex, arg1Bits);
616                } else {
617                    result = insertBits(result, hiIndex, loIndex, arg2Bits);
618                }
619            }
620            FpDestReg.uqw = result;
621        '''
622
623    class Mmini(MediaOp):
624        code = '''
625
626            assert(srcSize == destSize);
627            int size = srcSize;
628            int sizeBits = size * 8;
629            int items = numItems(size);
630            uint64_t result = FpDestReg.uqw;
631
632            for (int i = 0; i < items; i++) {
633                int hiIndex = (i + 1) * sizeBits - 1;
634                int loIndex = (i + 0) * sizeBits;
635                uint64_t arg1Bits = bits(FpSrcReg1.uqw, hiIndex, loIndex);
636                int64_t arg1 = arg1Bits |
637                    (0 - (arg1Bits & (ULL(1) << (sizeBits - 1))));
638                uint64_t arg2Bits = bits(FpSrcReg2.uqw, hiIndex, loIndex);
639                int64_t arg2 = arg2Bits |
640                    (0 - (arg2Bits & (ULL(1) << (sizeBits - 1))));
641                uint64_t resBits;
642
643                if (signedOp()) {
644                    if (arg1 < arg2) {
645                        resBits = arg1Bits;
646                    } else {
647                        resBits = arg2Bits;
648                    }
649                } else {
650                    if (arg1Bits < arg2Bits) {
651                        resBits = arg1Bits;
652                    } else {
653                        resBits = arg2Bits;
654                    }
655                }
656                result = insertBits(result, hiIndex, loIndex, resBits);
657            }
658            FpDestReg.uqw = result;
659        '''
660
661    class Mmaxi(MediaOp):
662        code = '''
663
664            assert(srcSize == destSize);
665            int size = srcSize;
666            int sizeBits = size * 8;
667            int items = numItems(size);
668            uint64_t result = FpDestReg.uqw;
669
670            for (int i = 0; i < items; i++) {
671                int hiIndex = (i + 1) * sizeBits - 1;
672                int loIndex = (i + 0) * sizeBits;
673                uint64_t arg1Bits = bits(FpSrcReg1.uqw, hiIndex, loIndex);
674                int64_t arg1 = arg1Bits |
675                    (0 - (arg1Bits & (ULL(1) << (sizeBits - 1))));
676                uint64_t arg2Bits = bits(FpSrcReg2.uqw, hiIndex, loIndex);
677                int64_t arg2 = arg2Bits |
678                    (0 - (arg2Bits & (ULL(1) << (sizeBits - 1))));
679                uint64_t resBits;
680
681                if (signedOp()) {
682                    if (arg1 > arg2) {
683                        resBits = arg1Bits;
684                    } else {
685                        resBits = arg2Bits;
686                    }
687                } else {
688                    if (arg1Bits > arg2Bits) {
689                        resBits = arg1Bits;
690                    } else {
691                        resBits = arg2Bits;
692                    }
693                }
694                result = insertBits(result, hiIndex, loIndex, resBits);
695            }
696            FpDestReg.uqw = result;
697        '''
698
699    class Msqrt(MediaOp):
700        def __init__(self, dest, src, \
701                size = None, destSize = None, srcSize = None, ext = None):
702            super(Msqrt, self).__init__(dest, src,\
703                    "InstRegIndex(0)", size, destSize, srcSize, ext)
704        code = '''
705            union floatInt
706            {
707                float f;
708                uint32_t i;
709            };
710            union doubleInt
711            {
712                double d;
713                uint64_t i;
714            };
715
716            assert(srcSize == destSize);
717            int size = srcSize;
718            int sizeBits = size * 8;
719            assert(srcSize == 4 || srcSize == 8);
720            int items = numItems(size);
721            uint64_t result = FpDestReg.uqw;
722
723            for (int i = 0; i < items; i++) {
724                int hiIndex = (i + 1) * sizeBits - 1;
725                int loIndex = (i + 0) * sizeBits;
726                uint64_t argBits = bits(FpSrcReg1.uqw, hiIndex, loIndex);
727
728                if (size == 4) {
729                    floatInt fi;
730                    fi.i = argBits;
731                    fi.f = sqrt(fi.f);
732                    argBits = fi.i;
733                } else {
734                    doubleInt di;
735                    di.i = argBits;
736                    di.d = sqrt(di.d);
737                    argBits = di.i;
738                }
739                result = insertBits(result, hiIndex, loIndex, argBits);
740            }
741            FpDestReg.uqw = result;
742        '''
743
744    class Maddf(MediaOp):
745        code = '''
746            union floatInt
747            {
748                float f;
749                uint32_t i;
750            };
751            union doubleInt
752            {
753                double d;
754                uint64_t i;
755            };
756
757            assert(srcSize == destSize);
758            int size = srcSize;
759            int sizeBits = size * 8;
760            assert(srcSize == 4 || srcSize == 8);
761            int items = numItems(size);
762            uint64_t result = FpDestReg.uqw;
763
764            for (int i = 0; i < items; i++) {
765                int hiIndex = (i + 1) * sizeBits - 1;
766                int loIndex = (i + 0) * sizeBits;
767                uint64_t arg1Bits = bits(FpSrcReg1.uqw, hiIndex, loIndex);
768                uint64_t arg2Bits = bits(FpSrcReg2.uqw, hiIndex, loIndex);
769                uint64_t resBits;
770
771                if (size == 4) {
772                    floatInt arg1, arg2, res;
773                    arg1.i = arg1Bits;
774                    arg2.i = arg2Bits;
775                    res.f = arg1.f + arg2.f;
776                    resBits = res.i;
777                } else {
778                    doubleInt arg1, arg2, res;
779                    arg1.i = arg1Bits;
780                    arg2.i = arg2Bits;
781                    res.d = arg1.d + arg2.d;
782                    resBits = res.i;
783                }
784
785                result = insertBits(result, hiIndex, loIndex, resBits);
786            }
787            FpDestReg.uqw = result;
788        '''
789
790    class Msubf(MediaOp):
791        code = '''
792            union floatInt
793            {
794                float f;
795                uint32_t i;
796            };
797            union doubleInt
798            {
799                double d;
800                uint64_t i;
801            };
802
803            assert(srcSize == destSize);
804            int size = srcSize;
805            int sizeBits = size * 8;
806            assert(srcSize == 4 || srcSize == 8);
807            int items = numItems(size);
808            uint64_t result = FpDestReg.uqw;
809
810            for (int i = 0; i < items; i++) {
811                int hiIndex = (i + 1) * sizeBits - 1;
812                int loIndex = (i + 0) * sizeBits;
813                uint64_t arg1Bits = bits(FpSrcReg1.uqw, hiIndex, loIndex);
814                uint64_t arg2Bits = bits(FpSrcReg2.uqw, hiIndex, loIndex);
815                uint64_t resBits;
816
817                if (size == 4) {
818                    floatInt arg1, arg2, res;
819                    arg1.i = arg1Bits;
820                    arg2.i = arg2Bits;
821                    res.f = arg1.f - arg2.f;
822                    resBits = res.i;
823                } else {
824                    doubleInt arg1, arg2, res;
825                    arg1.i = arg1Bits;
826                    arg2.i = arg2Bits;
827                    res.d = arg1.d - arg2.d;
828                    resBits = res.i;
829                }
830
831                result = insertBits(result, hiIndex, loIndex, resBits);
832            }
833            FpDestReg.uqw = result;
834        '''
835
836    class Mmulf(MediaOp):
837        code = '''
838            union floatInt
839            {
840                float f;
841                uint32_t i;
842            };
843            union doubleInt
844            {
845                double d;
846                uint64_t i;
847            };
848
849            assert(srcSize == destSize);
850            int size = srcSize;
851            int sizeBits = size * 8;
852            assert(srcSize == 4 || srcSize == 8);
853            int items = numItems(size);
854            uint64_t result = FpDestReg.uqw;
855
856            for (int i = 0; i < items; i++) {
857                int hiIndex = (i + 1) * sizeBits - 1;
858                int loIndex = (i + 0) * sizeBits;
859                uint64_t arg1Bits = bits(FpSrcReg1.uqw, hiIndex, loIndex);
860                uint64_t arg2Bits = bits(FpSrcReg2.uqw, hiIndex, loIndex);
861                uint64_t resBits;
862
863                if (size == 4) {
864                    floatInt arg1, arg2, res;
865                    arg1.i = arg1Bits;
866                    arg2.i = arg2Bits;
867                    res.f = arg1.f * arg2.f;
868                    resBits = res.i;
869                } else {
870                    doubleInt arg1, arg2, res;
871                    arg1.i = arg1Bits;
872                    arg2.i = arg2Bits;
873                    res.d = arg1.d * arg2.d;
874                    resBits = res.i;
875                }
876
877                result = insertBits(result, hiIndex, loIndex, resBits);
878            }
879            FpDestReg.uqw = result;
880        '''
881
882    class Mdivf(MediaOp):
883        code = '''
884            union floatInt
885            {
886                float f;
887                uint32_t i;
888            };
889            union doubleInt
890            {
891                double d;
892                uint64_t i;
893            };
894
895            assert(srcSize == destSize);
896            int size = srcSize;
897            int sizeBits = size * 8;
898            assert(srcSize == 4 || srcSize == 8);
899            int items = numItems(size);
900            uint64_t result = FpDestReg.uqw;
901
902            for (int i = 0; i < items; i++) {
903                int hiIndex = (i + 1) * sizeBits - 1;
904                int loIndex = (i + 0) * sizeBits;
905                uint64_t arg1Bits = bits(FpSrcReg1.uqw, hiIndex, loIndex);
906                uint64_t arg2Bits = bits(FpSrcReg2.uqw, hiIndex, loIndex);
907                uint64_t resBits;
908
909                if (size == 4) {
910                    floatInt arg1, arg2, res;
911                    arg1.i = arg1Bits;
912                    arg2.i = arg2Bits;
913                    res.f = arg1.f / arg2.f;
914                    resBits = res.i;
915                } else {
916                    doubleInt arg1, arg2, res;
917                    arg1.i = arg1Bits;
918                    arg2.i = arg2Bits;
919                    res.d = arg1.d / arg2.d;
920                    resBits = res.i;
921                }
922
923                result = insertBits(result, hiIndex, loIndex, resBits);
924            }
925            FpDestReg.uqw = result;
926        '''
927
928    class Maddi(MediaOp):
929        code = '''
930            assert(srcSize == destSize);
931            int size = srcSize;
932            int sizeBits = size * 8;
933            int items = numItems(size);
934            uint64_t result = FpDestReg.uqw;
935
936            for (int i = 0; i < items; i++) {
937                int hiIndex = (i + 1) * sizeBits - 1;
938                int loIndex = (i + 0) * sizeBits;
939                uint64_t arg1Bits = bits(FpSrcReg1.uqw, hiIndex, loIndex);
940                uint64_t arg2Bits = bits(FpSrcReg2.uqw, hiIndex, loIndex);
941                uint64_t resBits = arg1Bits + arg2Bits;
942                
943                if (ext & 0x2) {
944                    if (signedOp()) {
945                        int arg1Sign = bits(arg1Bits, sizeBits - 1);
946                        int arg2Sign = bits(arg2Bits, sizeBits - 1);
947                        int resSign = bits(resBits, sizeBits - 1);
948                        if ((arg1Sign == arg2Sign) && (arg1Sign != resSign)) {
949                            if (resSign == 0)
950                                resBits = (ULL(1) << (sizeBits - 1));
951                            else
952                                resBits = mask(sizeBits - 1);
953                        }
954                    } else {
955                        if (findCarry(sizeBits, resBits, arg1Bits, arg2Bits))
956                            resBits = mask(sizeBits);
957                    }
958                }
959
960                result = insertBits(result, hiIndex, loIndex, resBits);
961            }
962            FpDestReg.uqw = result;
963        '''
964
965    class Msubi(MediaOp):
966        code = '''
967            assert(srcSize == destSize);
968            int size = srcSize;
969            int sizeBits = size * 8;
970            int items = numItems(size);
971            uint64_t result = FpDestReg.uqw;
972
973            for (int i = 0; i < items; i++) {
974                int hiIndex = (i + 1) * sizeBits - 1;
975                int loIndex = (i + 0) * sizeBits;
976                uint64_t arg1Bits = bits(FpSrcReg1.uqw, hiIndex, loIndex);
977                uint64_t arg2Bits = bits(FpSrcReg2.uqw, hiIndex, loIndex);
978                uint64_t resBits = arg1Bits - arg2Bits;
979                
980                if (ext & 0x2) {
981                    if (signedOp()) {
982                        int arg1Sign = bits(arg1Bits, sizeBits - 1);
983                        int arg2Sign = !bits(arg2Bits, sizeBits - 1);
984                        int resSign = bits(resBits, sizeBits - 1);
985                        if ((arg1Sign == arg2Sign) && (arg1Sign != resSign)) {
986                            if (resSign == 0)
987                                resBits = (ULL(1) << (sizeBits - 1));
988                            else
989                                resBits = mask(sizeBits - 1);
990                        }
991                    } else {
992                        if (arg2Bits > arg1Bits) {
993                            resBits = 0;
994                        } else if (!findCarry(sizeBits, resBits,
995                                             arg1Bits, ~arg2Bits)) {
996                            resBits = mask(sizeBits);
997                        }
998                    }
999                }
1000
1001                result = insertBits(result, hiIndex, loIndex, resBits);
1002            }
1003            FpDestReg.uqw = result;
1004        '''
1005
1006    class Mmuli(MediaOp):
1007        code = '''
1008            int srcBits = srcSize * 8;
1009            int destBits = destSize * 8;
1010            assert(destBits <= 64);
1011            assert(destSize >= srcSize);
1012            int items = numItems(destSize);
1013            uint64_t result = FpDestReg.uqw;
1014
1015            for (int i = 0; i < items; i++) {
1016                int offset = 0;
1017                if (ext & 16) {
1018                    if (ext & 32)
1019                        offset = i * (destBits - srcBits);
1020                    else
1021                        offset = i * (destBits - srcBits) + srcBits;
1022                }
1023                int srcHiIndex = (i + 1) * srcBits - 1 + offset;
1024                int srcLoIndex = (i + 0) * srcBits + offset;
1025                uint64_t arg1Bits = bits(FpSrcReg1.uqw, srcHiIndex, srcLoIndex);
1026                uint64_t arg2Bits = bits(FpSrcReg2.uqw, srcHiIndex, srcLoIndex);
1027                uint64_t resBits;
1028
1029                if (signedOp()) {
1030                    int64_t arg1 = arg1Bits |
1031                        (0 - (arg1Bits & (ULL(1) << (srcBits - 1))));
1032                    int64_t arg2 = arg2Bits |
1033                        (0 - (arg2Bits & (ULL(1) << (srcBits - 1))));
1034                    resBits = (uint64_t)(arg1 * arg2);
1035                } else {
1036                    resBits = arg1Bits * arg2Bits;
1037                }
1038
1039                if (ext & 0x4)
1040                    resBits += (ULL(1) << (destBits - 1));
1041                
1042                if (multHi())
1043                    resBits >>= destBits;
1044
1045                int destHiIndex = (i + 1) * destBits - 1;
1046                int destLoIndex = (i + 0) * destBits;
1047                result = insertBits(result, destHiIndex, destLoIndex, resBits);
1048            }
1049            FpDestReg.uqw = result;
1050        '''
1051
1052    class Mavg(MediaOp):
1053        code = '''
1054            assert(srcSize == destSize);
1055            int size = srcSize;
1056            int sizeBits = size * 8;
1057            int items = numItems(size);
1058            uint64_t result = FpDestReg.uqw;
1059
1060            for (int i = 0; i < items; i++) {
1061                int hiIndex = (i + 1) * sizeBits - 1;
1062                int loIndex = (i + 0) * sizeBits;
1063                uint64_t arg1Bits = bits(FpSrcReg1.uqw, hiIndex, loIndex);
1064                uint64_t arg2Bits = bits(FpSrcReg2.uqw, hiIndex, loIndex);
1065                uint64_t resBits = (arg1Bits + arg2Bits + 1) / 2;
1066                
1067                result = insertBits(result, hiIndex, loIndex, resBits);
1068            }
1069            FpDestReg.uqw = result;
1070        '''
1071
1072    class Msad(MediaOp):
1073        code = '''
1074            int srcBits = srcSize * 8;
1075            int items = sizeof(FloatRegBits) / srcSize;
1076
1077            uint64_t sum = 0;
1078            for (int i = 0; i < items; i++) {
1079                int hiIndex = (i + 1) * srcBits - 1;
1080                int loIndex = (i + 0) * srcBits;
1081                uint64_t arg1Bits = bits(FpSrcReg1.uqw, hiIndex, loIndex);
1082                uint64_t arg2Bits = bits(FpSrcReg2.uqw, hiIndex, loIndex);
1083                int64_t resBits = arg1Bits - arg2Bits;
1084                if (resBits < 0)
1085                    resBits = -resBits;
1086                sum += resBits;
1087            }
1088            FpDestReg.uqw = sum & mask(destSize * 8);
1089        '''
1090
1091    class Msrl(MediaOp):
1092        code = '''
1093
1094            assert(srcSize == destSize);
1095            int size = srcSize;
1096            int sizeBits = size * 8;
1097            int items = numItems(size);
1098            uint64_t shiftAmt = op2.uqw;
1099            uint64_t result = FpDestReg.uqw;
1100
1101            for (int i = 0; i < items; i++) {
1102                int hiIndex = (i + 1) * sizeBits - 1;
1103                int loIndex = (i + 0) * sizeBits;
1104                uint64_t arg1Bits = bits(FpSrcReg1.uqw, hiIndex, loIndex);
1105                uint64_t resBits;
1106                if (shiftAmt >= sizeBits) {
1107                    resBits = 0;
1108                } else {
1109                    resBits = (arg1Bits >> shiftAmt) &
1110                        mask(sizeBits - shiftAmt);
1111                }
1112
1113                result = insertBits(result, hiIndex, loIndex, resBits);
1114            }
1115            FpDestReg.uqw = result;
1116        '''
1117
1118    class Msra(MediaOp):
1119        code = '''
1120
1121            assert(srcSize == destSize);
1122            int size = srcSize;
1123            int sizeBits = size * 8;
1124            int items = numItems(size);
1125            uint64_t shiftAmt = op2.uqw;
1126            uint64_t result = FpDestReg.uqw;
1127
1128            for (int i = 0; i < items; i++) {
1129                int hiIndex = (i + 1) * sizeBits - 1;
1130                int loIndex = (i + 0) * sizeBits;
1131                uint64_t arg1Bits = bits(FpSrcReg1.uqw, hiIndex, loIndex);
1132                uint64_t resBits;
1133                if (shiftAmt >= sizeBits) {
1134                    if (bits(arg1Bits, sizeBits - 1))
1135                        resBits = mask(sizeBits);
1136                    else
1137                        resBits = 0;
1138                } else {
1139                    resBits = (arg1Bits >> shiftAmt);
1140                    resBits = resBits |
1141                        (0 - (resBits & (ULL(1) << (sizeBits - 1 - shiftAmt))));
1142                }
1143
1144                result = insertBits(result, hiIndex, loIndex, resBits);
1145            }
1146            FpDestReg.uqw = result;
1147        '''
1148
1149    class Msll(MediaOp):
1150        code = '''
1151
1152            assert(srcSize == destSize);
1153            int size = srcSize;
1154            int sizeBits = size * 8;
1155            int items = numItems(size);
1156            uint64_t shiftAmt = op2.uqw;
1157            uint64_t result = FpDestReg.uqw;
1158
1159            for (int i = 0; i < items; i++) {
1160                int hiIndex = (i + 1) * sizeBits - 1;
1161                int loIndex = (i + 0) * sizeBits;
1162                uint64_t arg1Bits = bits(FpSrcReg1.uqw, hiIndex, loIndex);
1163                uint64_t resBits;
1164                if (shiftAmt >= sizeBits) {
1165                    resBits = 0;
1166                } else {
1167                    resBits = (arg1Bits << shiftAmt);
1168                }
1169
1170                result = insertBits(result, hiIndex, loIndex, resBits);
1171            }
1172            FpDestReg.uqw = result;
1173        '''
1174
1175    class Cvtf2i(MediaOp):
1176        def __init__(self, dest, src, \
1177                size = None, destSize = None, srcSize = None, ext = None):
1178            super(Cvtf2i, self).__init__(dest, src,\
1179                    "InstRegIndex(0)", size, destSize, srcSize, ext)
1180        code = '''
1181            union floatInt
1182            {
1183                float f;
1184                uint32_t i;
1185            };
1186            union doubleInt
1187            {
1188                double d;
1189                uint64_t i;
1190            };
1191
1192            assert(destSize == 4 || destSize == 8);
1193            assert(srcSize == 4 || srcSize == 8);
1194            int srcSizeBits = srcSize * 8;
1195            int destSizeBits = destSize * 8;
1196            int items;
1197            int srcStart = 0;
1198            int destStart = 0;
1199            if (srcSize == 2 * destSize) {
1200                items = numItems(srcSize);
1201                if (ext & 0x2)
1202                    destStart = destSizeBits * items;
1203            } else if (destSize == 2 * srcSize) {
1204                items = numItems(destSize);
1205                if (ext & 0x2)
1206                    srcStart = srcSizeBits * items;
1207            } else {
1208                items = numItems(destSize);
1209            }
1210            uint64_t result = FpDestReg.uqw;
1211
1212            for (int i = 0; i < items; i++) {
1213                int srcHiIndex = srcStart + (i + 1) * srcSizeBits - 1;
1214                int srcLoIndex = srcStart + (i + 0) * srcSizeBits;
1215                uint64_t argBits = bits(FpSrcReg1.uqw, srcHiIndex, srcLoIndex);
1216                double arg;
1217
1218                if (srcSize == 4) {
1219                    floatInt fi;
1220                    fi.i = argBits;
1221                    arg = fi.f;
1222                } else {
1223                    doubleInt di;
1224                    di.i = argBits;
1225                    arg = di.d;
1226                }
1227
1228                if (ext & 0x4) {
1229                    if (arg >= 0)
1230                        arg += 0.5;
1231                    else
1232                        arg -= 0.5;
1233                }
1234
1235                if (destSize == 4) {
1236                    argBits = (uint32_t)arg;
1237                } else {
1238                    argBits = (uint64_t)arg;
1239                }
1240                int destHiIndex = destStart + (i + 1) * destSizeBits - 1;
1241                int destLoIndex = destStart + (i + 0) * destSizeBits;
1242                result = insertBits(result, destHiIndex, destLoIndex, argBits);
1243            }
1244            FpDestReg.uqw = result;
1245        '''
1246
1247    class Cvti2f(MediaOp):
1248        def __init__(self, dest, src, \
1249                size = None, destSize = None, srcSize = None, ext = None):
1250            super(Cvti2f, self).__init__(dest, src,\
1251                    "InstRegIndex(0)", size, destSize, srcSize, ext)
1252        code = '''
1253            union floatInt
1254            {
1255                float f;
1256                uint32_t i;
1257            };
1258            union doubleInt
1259            {
1260                double d;
1261                uint64_t i;
1262            };
1263
1264            assert(destSize == 4 || destSize == 8);
1265            assert(srcSize == 4 || srcSize == 8);
1266            int srcSizeBits = srcSize * 8;
1267            int destSizeBits = destSize * 8;
1268            int items;
1269            int srcStart = 0;
1270            int destStart = 0;
1271            if (srcSize == 2 * destSize) {
1272                items = numItems(srcSize);
1273                if (ext & 0x2)
1274                    destStart = destSizeBits * items;
1275            } else if (destSize == 2 * srcSize) {
1276                items = numItems(destSize);
1277                if (ext & 0x2)
1278                    srcStart = srcSizeBits * items;
1279            } else {
1280                items = numItems(destSize);
1281            }
1282            uint64_t result = FpDestReg.uqw;
1283
1284            for (int i = 0; i < items; i++) {
1285                int srcHiIndex = srcStart + (i + 1) * srcSizeBits - 1;
1286                int srcLoIndex = srcStart + (i + 0) * srcSizeBits;
1287                uint64_t argBits = bits(FpSrcReg1.uqw, srcHiIndex, srcLoIndex);
1288
1289                int64_t sArg = argBits |
1290                    (0 - (argBits & (ULL(1) << (srcSizeBits - 1))));
1291                double arg = sArg;
1292
1293                if (destSize == 4) {
1294                    floatInt fi;
1295                    fi.f = arg;
1296                    argBits = fi.i;
1297                } else {
1298                    doubleInt di;
1299                    di.d = arg;
1300                    argBits = di.i;
1301                }
1302                int destHiIndex = destStart + (i + 1) * destSizeBits - 1;
1303                int destLoIndex = destStart + (i + 0) * destSizeBits;
1304                result = insertBits(result, destHiIndex, destLoIndex, argBits);
1305            }
1306            FpDestReg.uqw = result;
1307        '''
1308
1309    class Cvtf2f(MediaOp):
1310        def __init__(self, dest, src, \
1311                size = None, destSize = None, srcSize = None, ext = None):
1312            super(Cvtf2f, self).__init__(dest, src,\
1313                    "InstRegIndex(0)", size, destSize, srcSize, ext)
1314        code = '''
1315            union floatInt
1316            {
1317                float f;
1318                uint32_t i;
1319            };
1320            union doubleInt
1321            {
1322                double d;
1323                uint64_t i;
1324            };
1325
1326            assert(destSize == 4 || destSize == 8);
1327            assert(srcSize == 4 || srcSize == 8);
1328            int srcSizeBits = srcSize * 8;
1329            int destSizeBits = destSize * 8;
1330            int items;
1331            int srcStart = 0;
1332            int destStart = 0;
1333            if (srcSize == 2 * destSize) {
1334                items = numItems(srcSize);
1335                if (ext & 0x2)
1336                    destStart = destSizeBits * items;
1337            } else if (destSize == 2 * srcSize) {
1338                items = numItems(destSize);
1339                if (ext & 0x2)
1340                    srcStart = srcSizeBits * items;
1341            } else {
1342                items = numItems(destSize);
1343            }
1344            uint64_t result = FpDestReg.uqw;
1345
1346            for (int i = 0; i < items; i++) {
1347                int srcHiIndex = srcStart + (i + 1) * srcSizeBits - 1;
1348                int srcLoIndex = srcStart + (i + 0) * srcSizeBits;
1349                uint64_t argBits = bits(FpSrcReg1.uqw, srcHiIndex, srcLoIndex);
1350                double arg;
1351
1352                if (srcSize == 4) {
1353                    floatInt fi;
1354                    fi.i = argBits;
1355                    arg = fi.f;
1356                } else {
1357                    doubleInt di;
1358                    di.i = argBits;
1359                    arg = di.d;
1360                }
1361                if (destSize == 4) {
1362                    floatInt fi;
1363                    fi.f = arg;
1364                    argBits = fi.i;
1365                } else {
1366                    doubleInt di;
1367                    di.d = arg;
1368                    argBits = di.i;
1369                }
1370                int destHiIndex = destStart + (i + 1) * destSizeBits - 1;
1371                int destLoIndex = destStart + (i + 0) * destSizeBits;
1372                result = insertBits(result, destHiIndex, destLoIndex, argBits);
1373            }
1374            FpDestReg.uqw = result;
1375        '''
1376
1377    class Mcmpi2r(MediaOp):
1378        code = '''
1379            union floatInt
1380            {
1381                float f;
1382                uint32_t i;
1383            };
1384            union doubleInt
1385            {
1386                double d;
1387                uint64_t i;
1388            };
1389
1390            assert(srcSize == destSize);
1391            int size = srcSize;
1392            int sizeBits = size * 8;
1393            int items = numItems(size);
1394            uint64_t result = FpDestReg.uqw;
1395
1396            for (int i = 0; i < items; i++) {
1397                int hiIndex = (i + 1) * sizeBits - 1;
1398                int loIndex = (i + 0) * sizeBits;
1399                uint64_t arg1Bits = bits(FpSrcReg1.uqw, hiIndex, loIndex);
1400                int64_t arg1 = arg1Bits |
1401                    (0 - (arg1Bits & (ULL(1) << (sizeBits - 1))));
1402                uint64_t arg2Bits = bits(FpSrcReg2.uqw, hiIndex, loIndex);
1403                int64_t arg2 = arg2Bits |
1404                    (0 - (arg2Bits & (ULL(1) << (sizeBits - 1))));
1405
1406                uint64_t resBits = 0;
1407                if (((ext & 0x2) == 0 && arg1 == arg2) ||
1408                    ((ext & 0x2) == 0x2 && arg1 > arg2))
1409                    resBits = mask(sizeBits);
1410
1411                result = insertBits(result, hiIndex, loIndex, resBits);
1412            }
1413            FpDestReg.uqw = result;
1414        '''
1415
1416    class Mcmpf2r(MediaOp):
1417        code = '''
1418            union floatInt
1419            {
1420                float f;
1421                uint32_t i;
1422            };
1423            union doubleInt
1424            {
1425                double d;
1426                uint64_t i;
1427            };
1428
1429            assert(srcSize == destSize);
1430            int size = srcSize;
1431            int sizeBits = size * 8;
1432            int items = numItems(size);
1433            uint64_t result = FpDestReg.uqw;
1434
1435            for (int i = 0; i < items; i++) {
1436                int hiIndex = (i + 1) * sizeBits - 1;
1437                int loIndex = (i + 0) * sizeBits;
1438                uint64_t arg1Bits = bits(FpSrcReg1.uqw, hiIndex, loIndex);
1439                uint64_t arg2Bits = bits(FpSrcReg2.uqw, hiIndex, loIndex);
1440                double arg1, arg2;
1441
1442                if (size == 4) {
1443                    floatInt fi;
1444                    fi.i = arg1Bits;
1445                    arg1 = fi.f;
1446                    fi.i = arg2Bits;
1447                    arg2 = fi.f;
1448                } else {
1449                    doubleInt di;
1450                    di.i = arg1Bits;
1451                    arg1 = di.d;
1452                    di.i = arg2Bits;
1453                    arg2 = di.d;
1454                }
1455
1456                uint64_t resBits = 0;
1457                bool nanop = isnan(arg1) || isnan(arg2);
1458                switch (ext & mask(3)) {
1459                  case 0:
1460                    if (arg1 == arg2 && !nanop)
1461                        resBits = mask(sizeBits);
1462                    break;
1463                  case 1:
1464                    if (arg1 < arg2 && !nanop)
1465                        resBits = mask(sizeBits);
1466                    break;
1467                  case 2:
1468                    if (arg1 <= arg2 && !nanop)
1469                        resBits = mask(sizeBits);
1470                    break;
1471                  case 3:
1472                    if (nanop)
1473                        resBits = mask(sizeBits);
1474                    break;
1475                  case 4:
1476                    if (arg1 != arg2 || nanop)
1477                        resBits = mask(sizeBits);
1478                    break;
1479                  case 5:
1480                    if (!(arg1 < arg2) || nanop)
1481                        resBits = mask(sizeBits);
1482                    break;
1483                  case 6:
1484                    if (!(arg1 <= arg2) || nanop)
1485                        resBits = mask(sizeBits);
1486                    break;
1487                  case 7:
1488                    if (!nanop)
1489                        resBits = mask(sizeBits);
1490                    break;
1491                };
1492
1493                result = insertBits(result, hiIndex, loIndex, resBits);
1494            }
1495            FpDestReg.uqw = result;
1496        '''
1497
1498    class Mcmpf2rf(MediaOp):
1499        def __init__(self, src1, src2,\
1500                size = None, destSize = None, srcSize = None, ext = None):
1501            super(Mcmpf2rf, self).__init__("InstRegIndex(0)", src1,\
1502                    src2, size, destSize, srcSize, ext)
1503        code = '''
1504            union floatInt
1505            {
1506                float f;
1507                uint32_t i;
1508            };
1509            union doubleInt
1510            {
1511                double d;
1512                uint64_t i;
1513            };
1514
1515            assert(srcSize == destSize);
1516            assert(srcSize == 4 || srcSize == 8);
1517            int size = srcSize;
1518            int sizeBits = size * 8;
1519
1520            double arg1, arg2;
1521            uint64_t arg1Bits = bits(FpSrcReg1.uqw, sizeBits - 1, 0);
1522            uint64_t arg2Bits = bits(FpSrcReg2.uqw, sizeBits - 1, 0);
1523            if (size == 4) {
1524                floatInt fi;
1525                fi.i = arg1Bits;
1526                arg1 = fi.f;
1527                fi.i = arg2Bits;
1528                arg2 = fi.f;
1529            } else {
1530                doubleInt di;
1531                di.i = arg1Bits;
1532                arg1 = di.d;
1533                di.i = arg2Bits;
1534                arg2 = di.d;
1535            }
1536
1537            //               ZF PF CF
1538            // Unordered      1  1  1
1539            // Greater than   0  0  0
1540            // Less than      0  0  1
1541            // Equal          1  0  0
1542            //           OF = SF = AF = 0
1543            ccFlagBits = ccFlagBits & ~(OFBit | SFBit | AFBit |
1544                                        ZFBit | PFBit | CFBit);
1545            if (isnan(arg1) || isnan(arg2))
1546                ccFlagBits = ccFlagBits | (ZFBit | PFBit | CFBit);
1547            else if(arg1 < arg2)
1548                ccFlagBits = ccFlagBits | CFBit;
1549            else if(arg1 == arg2)
1550                ccFlagBits = ccFlagBits | ZFBit;
1551        '''
1552}};
1553