mediaop.isa revision 6596:e60eaef99523
1/// Copyright (c) 2009 The Regents of The University of Michigan
2// All rights reserved.
3//
4// Redistribution and use in source and binary forms, with or without
5// modification, are permitted provided that the following conditions are
6// met: redistributions of source code must retain the above copyright
7// notice, this list of conditions and the following disclaimer;
8// redistributions in binary form must reproduce the above copyright
9// notice, this list of conditions and the following disclaimer in the
10// documentation and/or other materials provided with the distribution;
11// neither the name of the copyright holders nor the names of its
12// contributors may be used to endorse or promote products derived from
13// this software without specific prior written permission.
14//
15// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
16// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
17// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
18// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
19// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
20// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
21// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
22// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
23// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
24// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
25// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
26//
27// Authors: Gabe Black
28
29def template MediaOpExecute {{
30        Fault %(class_name)s::execute(%(CPU_exec_context)s *xc,
31                Trace::InstRecord *traceData) const
32        {
33            Fault fault = NoFault;
34
35            %(op_decl)s;
36            %(op_rd)s;
37
38            %(code)s;
39
40            //Write the resulting state to the execution context
41            if(fault == NoFault)
42            {
43                %(op_wb)s;
44            }
45            return fault;
46        }
47}};
48
49def template MediaOpRegDeclare {{
50    class %(class_name)s : public %(base_class)s
51    {
52      protected:
53        void buildMe();
54
55      public:
56        %(class_name)s(ExtMachInst _machInst,
57                const char * instMnem,
58                bool isMicro, bool isDelayed, bool isFirst, bool isLast,
59                InstRegIndex _src1, InstRegIndex _src2, InstRegIndex _dest,
60                uint8_t _srcSize, uint8_t _destSize, uint16_t _ext);
61
62        %(class_name)s(ExtMachInst _machInst,
63                const char * instMnem,
64                InstRegIndex _src1, InstRegIndex _src2, InstRegIndex _dest,
65                uint8_t _srcSize, uint8_t _destSize, uint16_t _ext);
66
67        %(BasicExecDeclare)s
68    };
69}};
70
71def template MediaOpImmDeclare {{
72
73    class %(class_name)s : public %(base_class)s
74    {
75      protected:
76        void buildMe();
77
78      public:
79        %(class_name)s(ExtMachInst _machInst,
80                const char * instMnem,
81                bool isMicro, bool isDelayed, bool isFirst, bool isLast,
82                InstRegIndex _src1, uint16_t _imm8, InstRegIndex _dest,
83                uint8_t _srcSize, uint8_t _destSize, uint16_t _ext);
84
85        %(class_name)s(ExtMachInst _machInst,
86                const char * instMnem,
87                InstRegIndex _src1, uint16_t _imm8, InstRegIndex _dest,
88                uint8_t _srcSize, uint8_t _destSize, uint16_t _ext);
89
90        %(BasicExecDeclare)s
91    };
92}};
93
94def template MediaOpRegConstructor {{
95
96    inline void %(class_name)s::buildMe()
97    {
98        %(constructor)s;
99    }
100
101    inline %(class_name)s::%(class_name)s(
102            ExtMachInst machInst, const char * instMnem,
103            InstRegIndex _src1, InstRegIndex _src2, InstRegIndex _dest,
104            uint8_t _srcSize, uint8_t _destSize, uint16_t _ext) :
105        %(base_class)s(machInst, "%(mnemonic)s", instMnem,
106                false, false, false, false,
107                _src1, _src2, _dest, _srcSize, _destSize, _ext,
108                %(op_class)s)
109    {
110        buildMe();
111    }
112
113    inline %(class_name)s::%(class_name)s(
114            ExtMachInst machInst, const char * instMnem,
115            bool isMicro, bool isDelayed, bool isFirst, bool isLast,
116            InstRegIndex _src1, InstRegIndex _src2, InstRegIndex _dest,
117            uint8_t _srcSize, uint8_t _destSize, uint16_t _ext) :
118        %(base_class)s(machInst, "%(mnemonic)s", instMnem,
119                isMicro, isDelayed, isFirst, isLast,
120                _src1, _src2, _dest, _srcSize, _destSize, _ext,
121                %(op_class)s)
122    {
123        buildMe();
124    }
125}};
126
127def template MediaOpImmConstructor {{
128
129    inline void %(class_name)s::buildMe()
130    {
131        %(constructor)s;
132    }
133
134    inline %(class_name)s::%(class_name)s(
135            ExtMachInst machInst, const char * instMnem,
136            InstRegIndex _src1, uint16_t _imm8, InstRegIndex _dest,
137            uint8_t _srcSize, uint8_t _destSize, uint16_t _ext) :
138        %(base_class)s(machInst, "%(mnemonic)s", instMnem,
139                false, false, false, false,
140                _src1, _imm8, _dest, _srcSize, _destSize, _ext,
141                %(op_class)s)
142    {
143        buildMe();
144    }
145
146    inline %(class_name)s::%(class_name)s(
147            ExtMachInst machInst, const char * instMnem,
148            bool isMicro, bool isDelayed, bool isFirst, bool isLast,
149            InstRegIndex _src1, uint16_t _imm8, InstRegIndex _dest,
150            uint8_t _srcSize, uint8_t _destSize, uint16_t _ext) :
151        %(base_class)s(machInst, "%(mnemonic)s", instMnem,
152                isMicro, isDelayed, isFirst, isLast,
153                _src1, _imm8, _dest, _srcSize, _destSize, _ext,
154                %(op_class)s)
155    {
156        buildMe();
157    }
158}};
159
160let {{
161    # Make these empty strings so that concatenating onto
162    # them will always work.
163    header_output = ""
164    decoder_output = ""
165    exec_output = ""
166
167    immTemplates = (
168            MediaOpImmDeclare,
169            MediaOpImmConstructor,
170            MediaOpExecute)
171
172    regTemplates = (
173            MediaOpRegDeclare,
174            MediaOpRegConstructor,
175            MediaOpExecute)
176
177    class MediaOpMeta(type):
178        def buildCppClasses(self, name, Name, suffix, code):
179
180            # Globals to stick the output in
181            global header_output
182            global decoder_output
183            global exec_output
184
185            # If op2 is used anywhere, make register and immediate versions
186            # of this code.
187            matcher = re.compile("(?<!\\w)(?P<prefix>s?)op2(?P<typeQual>\\.\\w+)?")
188            match = matcher.search(code)
189            if match:
190                typeQual = ""
191                if match.group("typeQual"):
192                    typeQual = match.group("typeQual")
193                src2_name = "%sFpSrcReg2%s" % (match.group("prefix"), typeQual)
194                self.buildCppClasses(name, Name, suffix,
195                        matcher.sub(src2_name, code))
196                self.buildCppClasses(name + "i", Name, suffix + "Imm",
197                        matcher.sub("imm8", code))
198                return
199
200            base = "X86ISA::MediaOp"
201
202            # If imm8 shows up in the code, use the immediate templates, if
203            # not, hopefully the register ones will be correct.
204            matcher = re.compile("(?<!\w)imm8(?!\w)")
205            if matcher.search(code):
206                base += "Imm"
207                templates = immTemplates
208            else:
209                base += "Reg"
210                templates = regTemplates
211
212            # Get everything ready for the substitution
213            iop = InstObjParams(name, Name + suffix, base, {"code" : code})
214
215            # Generate the actual code (finally!)
216            header_output += templates[0].subst(iop)
217            decoder_output += templates[1].subst(iop)
218            exec_output += templates[2].subst(iop)
219
220
221        def __new__(mcls, Name, bases, dict):
222            abstract = False
223            name = Name.lower()
224            if "abstract" in dict:
225                abstract = dict['abstract']
226                del dict['abstract']
227
228            cls = super(MediaOpMeta, mcls).__new__(mcls, Name, bases, dict)
229            if not abstract:
230                cls.className = Name
231                cls.base_mnemonic = name
232                code = cls.code
233
234                # Set up the C++ classes
235                mcls.buildCppClasses(cls, name, Name, "", code)
236
237                # Hook into the microassembler dict
238                global microopClasses
239                microopClasses[name] = cls
240
241                # If op2 is used anywhere, make register and immediate versions
242                # of this code.
243                matcher = re.compile("op2(?P<typeQual>\\.\\w+)?")
244                if matcher.search(code):
245                    microopClasses[name + 'i'] = cls
246            return cls
247
248
249    class MediaOp(X86Microop):
250        __metaclass__ = MediaOpMeta
251        # This class itself doesn't act as a microop
252        abstract = True
253
254        def __init__(self, dest, src1, op2,
255                size = None, destSize = None, srcSize = None, ext = None):
256            self.dest = dest
257            self.src1 = src1
258            self.op2 = op2
259            if size is not None:
260                self.srcSize = size
261                self.destSize = size
262            if srcSize is not None:
263                self.srcSize = srcSize
264            if destSize is not None:
265                self.destSize = destSize
266            if self.srcSize is None:
267                raise Exception, "Source size not set."
268            if self.destSize is None:
269                raise Exception, "Dest size not set."
270            if ext is None:
271                self.ext = 0
272            else:
273                self.ext = ext 
274
275        def getAllocator(self, *microFlags):
276            className = self.className
277            if self.mnemonic == self.base_mnemonic + 'i':
278                className += "Imm"
279            allocator = '''new %(class_name)s(machInst, macrocodeBlock
280                    %(flags)s, %(src1)s, %(op2)s, %(dest)s,
281                    %(srcSize)s, %(destSize)s, %(ext)s)''' % {
282                "class_name" : className,
283                "flags" : self.microFlagsText(microFlags),
284                "src1" : self.src1, "op2" : self.op2,
285                "dest" : self.dest,
286                "srcSize" : self.srcSize,
287                "destSize" : self.destSize,
288                "ext" : self.ext}
289            return allocator
290
291    class Mov2int(MediaOp):
292        def __init__(self, dest, src1, src2 = 0, \
293                size = None, destSize = None, srcSize = None, ext = None):
294            super(Mov2int, self).__init__(dest, src1,\
295                    src2, size, destSize, srcSize, ext)
296        code = '''
297            int items = sizeof(FloatRegBits) / srcSize;
298            int offset = imm8;
299            if (bits(src1, 0) && (ext & 0x1))
300                offset -= items;
301            if (offset >= 0 && offset < items) {
302                uint64_t fpSrcReg1 =
303                    bits(FpSrcReg1.uqw,
304                            (offset + 1) * srcSize * 8 - 1,
305                            (offset + 0) * srcSize * 8);
306                DestReg = merge(0, fpSrcReg1, destSize);
307            } else {
308                DestReg = DestReg;
309            }
310        '''
311
312    class Mov2fp(MediaOp):
313        def __init__(self, dest, src1, src2 = 0, \
314                size = None, destSize = None, srcSize = None, ext = None):
315            super(Mov2fp, self).__init__(dest, src1,\
316                    src2, size, destSize, srcSize, ext)
317        code = '''
318            int items = sizeof(FloatRegBits) / destSize;
319            int offset = imm8;
320            if (bits(dest, 0) && (ext & 0x1))
321                offset -= items;
322            if (offset >= 0 && offset < items) {
323                uint64_t srcReg1 = pick(SrcReg1, 0, srcSize);
324                FpDestReg.uqw =
325                    insertBits(FpDestReg.uqw,
326                            (offset + 1) * destSize * 8 - 1,
327                            (offset + 0) * destSize * 8, srcReg1);
328            } else {
329                FpDestReg.uqw = FpDestReg.uqw;
330            }
331        '''
332
333    class Movsign(MediaOp):
334        def __init__(self, dest, src, \
335                size = None, destSize = None, srcSize = None, ext = None):
336            super(Movsign, self).__init__(dest, src,\
337                    "InstRegIndex(0)", size, destSize, srcSize, ext)
338        code = '''
339            int items = sizeof(FloatRegBits) / srcSize;
340            uint64_t result = 0;
341            int offset = (ext & 0x1) ? items : 0;
342            for (int i = 0; i < items; i++) {
343                uint64_t picked =
344                    bits(FpSrcReg1.uqw, (i + 1) * 8 * srcSize - 1);
345                result = insertBits(result, i + offset, i + offset, picked);
346            }
347            DestReg = DestReg | result;
348        '''
349
350    class Maskmov(MediaOp):
351        code = '''
352            assert(srcSize == destSize);
353            int size = srcSize;
354            int sizeBits = size * 8;
355            int items = (ext & 0x1) ? 1: (sizeof(FloatRegBits) / size);
356            uint64_t result = FpDestReg.uqw;
357
358            for (int i = 0; i < items; i++) {
359                int hiIndex = (i + 1) * sizeBits - 1;
360                int loIndex = (i + 0) * sizeBits;
361                uint64_t arg1Bits = bits(FpSrcReg1.uqw, hiIndex, loIndex);
362                if (bits(FpSrcReg2.uqw, hiIndex))
363                    result = insertBits(result, hiIndex, loIndex, arg1Bits);
364            }
365            FpDestReg.uqw = result;
366        '''
367
368    class shuffle(MediaOp):
369        code = '''
370            assert(srcSize == destSize);
371            int size = srcSize;
372            int sizeBits = size * 8;
373            int items = sizeof(FloatRegBits) / size;
374            int options;
375            int optionBits;
376            if (size == 8) {
377                options = 2;
378                optionBits = 1;
379            } else {
380                options = 4;
381                optionBits = 2;
382            }
383
384            uint64_t result = 0;
385            uint8_t sel = ext;
386
387            for (int i = 0; i < items; i++) {
388                uint64_t resBits;
389                uint8_t lsel = sel & mask(optionBits);
390                if (lsel * size >= sizeof(FloatRegBits)) {
391                    lsel -= options / 2;
392                    resBits = bits(FpSrcReg2.uqw,
393                            (lsel + 1) * sizeBits - 1,
394                            (lsel + 0) * sizeBits);
395                }  else {
396                    resBits = bits(FpSrcReg1.uqw,
397                            (lsel + 1) * sizeBits - 1,
398                            (lsel + 0) * sizeBits);
399                }
400
401                sel >>= optionBits;
402
403                int hiIndex = (i + 1) * sizeBits - 1;
404                int loIndex = (i + 0) * sizeBits;
405                result = insertBits(result, hiIndex, loIndex, resBits);
406            }
407            FpDestReg.uqw = result;
408        '''
409
410    class Unpack(MediaOp):
411        code = '''
412            assert(srcSize == destSize);
413            int size = destSize;
414            int items = (sizeof(FloatRegBits) / size) / 2;
415            int offset = ext ? items : 0;
416            uint64_t result = 0;
417            for (int i = 0; i < items; i++) {
418                uint64_t pickedLow =
419                    bits(FpSrcReg1.uqw, (i + offset + 1) * 8 * size - 1,
420                                        (i + offset) * 8 * size);
421                result = insertBits(result,
422                                    (2 * i + 1) * 8 * size - 1,
423                                    (2 * i + 0) * 8 * size,
424                                    pickedLow);
425                uint64_t pickedHigh =
426                    bits(FpSrcReg2.uqw, (i + offset + 1) * 8 * size - 1,
427                                        (i + offset) * 8 * size);
428                result = insertBits(result,
429                                    (2 * i + 2) * 8 * size - 1,
430                                    (2 * i + 1) * 8 * size,
431                                    pickedHigh);
432            }
433            FpDestReg.uqw = result;
434        '''
435
436    class Pack(MediaOp):
437        code = '''
438            assert(srcSize == destSize * 2);
439            int items = (sizeof(FloatRegBits) / destSize);
440            int destBits = destSize * 8;
441            int srcBits = srcSize * 8;
442            uint64_t result = 0;
443            int i;
444            for (i = 0; i < items / 2; i++) {
445                uint64_t picked =
446                    bits(FpSrcReg1.uqw, (i + 1) * srcBits - 1,
447                                        (i + 0) * srcBits);
448                unsigned signBit = bits(picked, srcBits - 1);
449                uint64_t overflow = bits(picked, srcBits - 1, destBits - 1);
450
451                // Handle saturation.
452                if (signBit) {
453                    if (overflow != mask(destBits - srcBits + 1)) {
454                        if (ext & 0x1)
455                            picked = (1 << (destBits - 1));
456                        else
457                            picked = 0;
458                    }
459                } else {
460                    if (overflow != 0) {
461                        if (ext & 0x1)
462                            picked = mask(destBits - 1);
463                        else
464                            picked = mask(destBits);
465                    }
466                }
467                result = insertBits(result,
468                                    (i + 1) * destBits - 1,
469                                    (i + 0) * destBits,
470                                    picked);
471            }
472            for (;i < items; i++) {
473                uint64_t picked =
474                    bits(FpSrcReg2.uqw, (i - items + 1) * srcBits - 1,
475                                        (i - items + 0) * srcBits);
476                unsigned signBit = bits(picked, srcBits - 1);
477                uint64_t overflow = bits(picked, srcBits - 1, destBits - 1);
478
479                // Handle saturation.
480                if (signBit) {
481                    if (overflow != mask(destBits - srcBits + 1)) {
482                        if (ext & 0x1)
483                            picked = (1 << (destBits - 1));
484                        else
485                            picked = 0;
486                    }
487                } else {
488                    if (overflow != 0) {
489                        if (ext & 0x1)
490                            picked = mask(destBits - 1);
491                        else
492                            picked = mask(destBits);
493                    }
494                }
495                result = insertBits(result,
496                                    (i + 1) * destBits - 1,
497                                    (i + 0) * destBits,
498                                    picked);
499            }
500            FpDestReg.uqw = result;
501        '''
502
503    class Mxor(MediaOp):
504        def __init__(self, dest, src1, src2):
505            super(Mxor, self).__init__(dest, src1, src2, 1)
506        code = '''
507            FpDestReg.uqw = FpSrcReg1.uqw ^ FpSrcReg2.uqw;
508        '''
509
510    class Mor(MediaOp):
511        def __init__(self, dest, src1, src2):
512            super(Mor, self).__init__(dest, src1, src2, 1)
513        code = '''
514            FpDestReg.uqw = FpSrcReg1.uqw | FpSrcReg2.uqw;
515        '''
516
517    class Mand(MediaOp):
518        def __init__(self, dest, src1, src2):
519            super(Mand, self).__init__(dest, src1, src2, 1)
520        code = '''
521            FpDestReg.uqw = FpSrcReg1.uqw & FpSrcReg2.uqw;
522        '''
523
524    class Mandn(MediaOp):
525        def __init__(self, dest, src1, src2):
526            super(Mandn, self).__init__(dest, src1, src2, 1)
527        code = '''
528            FpDestReg.uqw = ~FpSrcReg1.uqw & FpSrcReg2.uqw;
529        '''
530
531    class Mminf(MediaOp):
532        code = '''
533            union floatInt
534            {
535                float f;
536                uint32_t i;
537            };
538            union doubleInt
539            {
540                double d;
541                uint64_t i;
542            };
543
544            assert(srcSize == destSize);
545            int size = srcSize;
546            int sizeBits = size * 8;
547            assert(srcSize == 4 || srcSize == 8);
548            int items = (ext & 0x1) ? 1: (sizeof(FloatRegBits) / size);
549            uint64_t result = FpDestReg.uqw;
550
551            for (int i = 0; i < items; i++) {
552                double arg1, arg2;
553                int hiIndex = (i + 1) * sizeBits - 1;
554                int loIndex = (i + 0) * sizeBits;
555                uint64_t arg1Bits = bits(FpSrcReg1.uqw, hiIndex, loIndex);
556                uint64_t arg2Bits = bits(FpSrcReg2.uqw, hiIndex, loIndex);
557
558                if (size == 4) {
559                    floatInt fi;
560                    fi.i = arg1Bits;
561                    arg1 = fi.f;
562                    fi.i = arg2Bits;
563                    arg2 = fi.f;
564                } else {
565                    doubleInt di;
566                    di.i = arg1Bits;
567                    arg1 = di.d;
568                    di.i = arg2Bits;
569                    arg2 = di.d;
570                }
571
572                if (arg1 < arg2) {
573                    result = insertBits(result, hiIndex, loIndex, arg1Bits);
574                } else {
575                    result = insertBits(result, hiIndex, loIndex, arg2Bits);
576                }
577            }
578            FpDestReg.uqw = result;
579        '''
580
581    class Mmaxf(MediaOp):
582        code = '''
583            union floatInt
584            {
585                float f;
586                uint32_t i;
587            };
588            union doubleInt
589            {
590                double d;
591                uint64_t i;
592            };
593
594            assert(srcSize == destSize);
595            int size = srcSize;
596            int sizeBits = size * 8;
597            assert(srcSize == 4 || srcSize == 8);
598            int items = (ext & 0x1) ? 1: (sizeof(FloatRegBits) / size);
599            uint64_t result = FpDestReg.uqw;
600
601            for (int i = 0; i < items; i++) {
602                double arg1, arg2;
603                int hiIndex = (i + 1) * sizeBits - 1;
604                int loIndex = (i + 0) * sizeBits;
605                uint64_t arg1Bits = bits(FpSrcReg1.uqw, hiIndex, loIndex);
606                uint64_t arg2Bits = bits(FpSrcReg2.uqw, hiIndex, loIndex);
607
608                if (size == 4) {
609                    floatInt fi;
610                    fi.i = arg1Bits;
611                    arg1 = fi.f;
612                    fi.i = arg2Bits;
613                    arg2 = fi.f;
614                } else {
615                    doubleInt di;
616                    di.i = arg1Bits;
617                    arg1 = di.d;
618                    di.i = arg2Bits;
619                    arg2 = di.d;
620                }
621
622                if (arg1 > arg2) {
623                    result = insertBits(result, hiIndex, loIndex, arg1Bits);
624                } else {
625                    result = insertBits(result, hiIndex, loIndex, arg2Bits);
626                }
627            }
628            FpDestReg.uqw = result;
629        '''
630
631    class Mmini(MediaOp):
632        code = '''
633
634            assert(srcSize == destSize);
635            int size = srcSize;
636            int sizeBits = size * 8;
637            int items = (ext & 0x1) ? 1: (sizeof(FloatRegBits) / size);
638            uint64_t result = FpDestReg.uqw;
639
640            for (int i = 0; i < items; i++) {
641                int hiIndex = (i + 1) * sizeBits - 1;
642                int loIndex = (i + 0) * sizeBits;
643                uint64_t arg1Bits = bits(FpSrcReg1.uqw, hiIndex, loIndex);
644                int64_t arg1 = arg1Bits |
645                    (0 - (arg1Bits & (1 << (sizeBits - 1))));
646                uint64_t arg2Bits = bits(FpSrcReg2.uqw, hiIndex, loIndex);
647                int64_t arg2 = arg2Bits |
648                    (0 - (arg2Bits & (1 << (sizeBits - 1))));
649                uint64_t resBits;
650
651                if (ext & 0x2) {
652                    if (arg1 < arg2) {
653                        resBits = arg1Bits;
654                    } else {
655                        resBits = arg2Bits;
656                    }
657                } else {
658                    if (arg1Bits < arg2Bits) {
659                        resBits = arg1Bits;
660                    } else {
661                        resBits = arg2Bits;
662                    }
663                }
664                result = insertBits(result, hiIndex, loIndex, resBits);
665            }
666            FpDestReg.uqw = result;
667        '''
668
669    class Mmaxi(MediaOp):
670        code = '''
671
672            assert(srcSize == destSize);
673            int size = srcSize;
674            int sizeBits = size * 8;
675            int items = (ext & 0x1) ? 1: (sizeof(FloatRegBits) / size);
676            uint64_t result = FpDestReg.uqw;
677
678            for (int i = 0; i < items; i++) {
679                int hiIndex = (i + 1) * sizeBits - 1;
680                int loIndex = (i + 0) * sizeBits;
681                uint64_t arg1Bits = bits(FpSrcReg1.uqw, hiIndex, loIndex);
682                int64_t arg1 = arg1Bits |
683                    (0 - (arg1Bits & (1 << (sizeBits - 1))));
684                uint64_t arg2Bits = bits(FpSrcReg2.uqw, hiIndex, loIndex);
685                int64_t arg2 = arg2Bits |
686                    (0 - (arg2Bits & (1 << (sizeBits - 1))));
687                uint64_t resBits;
688
689                if (ext & 0x2) {
690                    if (arg1 > arg2) {
691                        resBits = arg1Bits;
692                    } else {
693                        resBits = arg2Bits;
694                    }
695                } else {
696                    if (arg1Bits > arg2Bits) {
697                        resBits = arg1Bits;
698                    } else {
699                        resBits = arg2Bits;
700                    }
701                }
702                result = insertBits(result, hiIndex, loIndex, resBits);
703            }
704            FpDestReg.uqw = result;
705        '''
706
707    class Msqrt(MediaOp):
708        def __init__(self, dest, src, \
709                size = None, destSize = None, srcSize = None, ext = None):
710            super(Msqrt, self).__init__(dest, src,\
711                    "InstRegIndex(0)", size, destSize, srcSize, ext)
712        code = '''
713            union floatInt
714            {
715                float f;
716                uint32_t i;
717            };
718            union doubleInt
719            {
720                double d;
721                uint64_t i;
722            };
723
724            assert(srcSize == destSize);
725            int size = srcSize;
726            int sizeBits = size * 8;
727            assert(srcSize == 4 || srcSize == 8);
728            int items = (ext & 0x1) ? 1: (sizeof(FloatRegBits) / size);
729            uint64_t result = FpDestReg.uqw;
730
731            for (int i = 0; i < items; i++) {
732                int hiIndex = (i + 1) * sizeBits - 1;
733                int loIndex = (i + 0) * sizeBits;
734                uint64_t argBits = bits(FpSrcReg1.uqw, hiIndex, loIndex);
735
736                if (size == 4) {
737                    floatInt fi;
738                    fi.i = argBits;
739                    fi.f = sqrt(fi.f);
740                    argBits = fi.i;
741                } else {
742                    doubleInt di;
743                    di.i = argBits;
744                    di.d = sqrt(di.d);
745                    argBits = di.i;
746                }
747                result = insertBits(result, hiIndex, loIndex, argBits);
748            }
749            FpDestReg.uqw = result;
750        '''
751
752    class Maddf(MediaOp):
753        code = '''
754            union floatInt
755            {
756                float f;
757                uint32_t i;
758            };
759            union doubleInt
760            {
761                double d;
762                uint64_t i;
763            };
764
765            assert(srcSize == destSize);
766            int size = srcSize;
767            int sizeBits = size * 8;
768            assert(srcSize == 4 || srcSize == 8);
769            int items = (ext & 0x1) ? 1: (sizeof(FloatRegBits) / size);
770            uint64_t result = FpDestReg.uqw;
771
772            for (int i = 0; i < items; i++) {
773                int hiIndex = (i + 1) * sizeBits - 1;
774                int loIndex = (i + 0) * sizeBits;
775                uint64_t arg1Bits = bits(FpSrcReg1.uqw, hiIndex, loIndex);
776                uint64_t arg2Bits = bits(FpSrcReg2.uqw, hiIndex, loIndex);
777                uint64_t resBits;
778
779                if (size == 4) {
780                    floatInt arg1, arg2, res;
781                    arg1.i = arg1Bits;
782                    arg2.i = arg2Bits;
783                    res.f = arg1.f + arg2.f;
784                    resBits = res.i;
785                } else {
786                    doubleInt arg1, arg2, res;
787                    arg1.i = arg1Bits;
788                    arg2.i = arg2Bits;
789                    res.d = arg1.d + arg2.d;
790                    resBits = res.i;
791                }
792
793                result = insertBits(result, hiIndex, loIndex, resBits);
794            }
795            FpDestReg.uqw = result;
796        '''
797
798    class Msubf(MediaOp):
799        code = '''
800            union floatInt
801            {
802                float f;
803                uint32_t i;
804            };
805            union doubleInt
806            {
807                double d;
808                uint64_t i;
809            };
810
811            assert(srcSize == destSize);
812            int size = srcSize;
813            int sizeBits = size * 8;
814            assert(srcSize == 4 || srcSize == 8);
815            int items = (ext & 0x1) ? 1: (sizeof(FloatRegBits) / size);
816            uint64_t result = FpDestReg.uqw;
817
818            for (int i = 0; i < items; i++) {
819                int hiIndex = (i + 1) * sizeBits - 1;
820                int loIndex = (i + 0) * sizeBits;
821                uint64_t arg1Bits = bits(FpSrcReg1.uqw, hiIndex, loIndex);
822                uint64_t arg2Bits = bits(FpSrcReg2.uqw, hiIndex, loIndex);
823                uint64_t resBits;
824
825                if (size == 4) {
826                    floatInt arg1, arg2, res;
827                    arg1.i = arg1Bits;
828                    arg2.i = arg2Bits;
829                    res.f = arg1.f - arg2.f;
830                    resBits = res.i;
831                } else {
832                    doubleInt arg1, arg2, res;
833                    arg1.i = arg1Bits;
834                    arg2.i = arg2Bits;
835                    res.d = arg1.d - arg2.d;
836                    resBits = res.i;
837                }
838
839                result = insertBits(result, hiIndex, loIndex, resBits);
840            }
841            FpDestReg.uqw = result;
842        '''
843
844    class Mmulf(MediaOp):
845        code = '''
846            union floatInt
847            {
848                float f;
849                uint32_t i;
850            };
851            union doubleInt
852            {
853                double d;
854                uint64_t i;
855            };
856
857            assert(srcSize == destSize);
858            int size = srcSize;
859            int sizeBits = size * 8;
860            assert(srcSize == 4 || srcSize == 8);
861            int items = (ext & 0x1) ? 1: (sizeof(FloatRegBits) / size);
862            uint64_t result = FpDestReg.uqw;
863
864            for (int i = 0; i < items; i++) {
865                int hiIndex = (i + 1) * sizeBits - 1;
866                int loIndex = (i + 0) * sizeBits;
867                uint64_t arg1Bits = bits(FpSrcReg1.uqw, hiIndex, loIndex);
868                uint64_t arg2Bits = bits(FpSrcReg2.uqw, hiIndex, loIndex);
869                uint64_t resBits;
870
871                if (size == 4) {
872                    floatInt arg1, arg2, res;
873                    arg1.i = arg1Bits;
874                    arg2.i = arg2Bits;
875                    res.f = arg1.f * arg2.f;
876                    resBits = res.i;
877                } else {
878                    doubleInt arg1, arg2, res;
879                    arg1.i = arg1Bits;
880                    arg2.i = arg2Bits;
881                    res.d = arg1.d * arg2.d;
882                    resBits = res.i;
883                }
884
885                result = insertBits(result, hiIndex, loIndex, resBits);
886            }
887            FpDestReg.uqw = result;
888        '''
889
890    class Mdivf(MediaOp):
891        code = '''
892            union floatInt
893            {
894                float f;
895                uint32_t i;
896            };
897            union doubleInt
898            {
899                double d;
900                uint64_t i;
901            };
902
903            assert(srcSize == destSize);
904            int size = srcSize;
905            int sizeBits = size * 8;
906            assert(srcSize == 4 || srcSize == 8);
907            int items = (ext & 0x1) ? 1: (sizeof(FloatRegBits) / size);
908            uint64_t result = FpDestReg.uqw;
909
910            for (int i = 0; i < items; i++) {
911                int hiIndex = (i + 1) * sizeBits - 1;
912                int loIndex = (i + 0) * sizeBits;
913                uint64_t arg1Bits = bits(FpSrcReg1.uqw, hiIndex, loIndex);
914                uint64_t arg2Bits = bits(FpSrcReg2.uqw, hiIndex, loIndex);
915                uint64_t resBits;
916
917                if (size == 4) {
918                    floatInt arg1, arg2, res;
919                    arg1.i = arg1Bits;
920                    arg2.i = arg2Bits;
921                    res.f = arg1.f / arg2.f;
922                    resBits = res.i;
923                } else {
924                    doubleInt arg1, arg2, res;
925                    arg1.i = arg1Bits;
926                    arg2.i = arg2Bits;
927                    res.d = arg1.d / arg2.d;
928                    resBits = res.i;
929                }
930
931                result = insertBits(result, hiIndex, loIndex, resBits);
932            }
933            FpDestReg.uqw = result;
934        '''
935
936    class Maddi(MediaOp):
937        code = '''
938            assert(srcSize == destSize);
939            int size = srcSize;
940            int sizeBits = size * 8;
941            int items = (ext & 0x1) ? 1: (sizeof(FloatRegBits) / size);
942            uint64_t result = FpDestReg.uqw;
943
944            for (int i = 0; i < items; i++) {
945                int hiIndex = (i + 1) * sizeBits - 1;
946                int loIndex = (i + 0) * sizeBits;
947                uint64_t arg1Bits = bits(FpSrcReg1.uqw, hiIndex, loIndex);
948                uint64_t arg2Bits = bits(FpSrcReg2.uqw, hiIndex, loIndex);
949                uint64_t resBits = arg1Bits + arg2Bits;
950                
951                if (ext & 0x2) {
952                    if (findCarry(sizeBits, resBits, arg1Bits, arg2Bits))
953                        resBits = mask(sizeBits);
954                } else if (ext & 0x4) {
955                    int arg1Sign = bits(arg1Bits, sizeBits - 1);
956                    int arg2Sign = bits(arg2Bits, sizeBits - 1);
957                    int resSign = bits(resBits, sizeBits - 1);
958                    if ((arg1Sign == arg2Sign) && (arg1Sign != resSign)) {
959                        if (resSign == 0)
960                            resBits = (1 << (sizeBits - 1));
961                        else
962                            resBits = mask(sizeBits - 1);
963                    }
964                }
965
966                result = insertBits(result, hiIndex, loIndex, resBits);
967            }
968            FpDestReg.uqw = result;
969        '''
970
971    class Msubi(MediaOp):
972        code = '''
973            assert(srcSize == destSize);
974            int size = srcSize;
975            int sizeBits = size * 8;
976            int items = (ext & 0x1) ? 1: (sizeof(FloatRegBits) / size);
977            uint64_t result = FpDestReg.uqw;
978
979            for (int i = 0; i < items; i++) {
980                int hiIndex = (i + 1) * sizeBits - 1;
981                int loIndex = (i + 0) * sizeBits;
982                uint64_t arg1Bits = bits(FpSrcReg1.uqw, hiIndex, loIndex);
983                uint64_t arg2Bits = bits(FpSrcReg2.uqw, hiIndex, loIndex);
984                uint64_t resBits = arg1Bits - arg2Bits;
985                
986                if (ext & 0x2) {
987                    if (arg2Bits > arg1Bits) {
988                        resBits = 0;
989                    } else if (!findCarry(sizeBits, resBits,
990                                         arg1Bits, ~arg2Bits)) {
991                        resBits = mask(sizeBits);
992                    }
993                } else if (ext & 0x4) {
994                    int arg1Sign = bits(arg1Bits, sizeBits - 1);
995                    int arg2Sign = !bits(arg2Bits, sizeBits - 1);
996                    int resSign = bits(resBits, sizeBits - 1);
997                    if ((arg1Sign == arg2Sign) && (arg1Sign != resSign)) {
998                        if (resSign == 0)
999                            resBits = (1 << (sizeBits - 1));
1000                        else
1001                            resBits = mask(sizeBits - 1);
1002                    }
1003                }
1004
1005                result = insertBits(result, hiIndex, loIndex, resBits);
1006            }
1007            FpDestReg.uqw = result;
1008        '''
1009
1010    class Mmuli(MediaOp):
1011        code = '''
1012            int srcBits = srcSize * 8;
1013            int destBits = destSize * 8;
1014            assert(destBits <= 64);
1015            assert(destSize >= srcSize);
1016            int items = (ext & 0x1) ? 1: (sizeof(FloatRegBits) / destSize);
1017            uint64_t result = FpDestReg.uqw;
1018
1019            for (int i = 0; i < items; i++) {
1020                int offset = 0;
1021                if (ext & 16) {
1022                    if (ext & 32)
1023                        offset = i * (destBits - srcBits);
1024                    else
1025                        offset = i * (destBits - srcBits) + srcBits;
1026                }
1027                int srcHiIndex = (i + 1) * srcBits - 1 + offset;
1028                int srcLoIndex = (i + 0) * srcBits + offset;
1029                uint64_t arg1Bits = bits(FpSrcReg1.uqw, srcHiIndex, srcLoIndex);
1030                uint64_t arg2Bits = bits(FpSrcReg2.uqw, srcHiIndex, srcLoIndex);
1031                uint64_t resBits;
1032
1033                if (ext & 0x2) {
1034                    int64_t arg1 = arg1Bits |
1035                        (0 - (arg1Bits & (1 << (srcBits - 1))));
1036                    int64_t arg2 = arg2Bits |
1037                        (0 - (arg2Bits & (1 << (srcBits - 1))));
1038                    resBits = (uint64_t)(arg1 * arg2);
1039                } else {
1040                    resBits = arg1Bits * arg2Bits;
1041                }
1042
1043                if (ext & 0x4)
1044                    resBits += (1 << (destBits - 1));
1045                
1046                if (ext & 0x8)
1047                    resBits >>= destBits;
1048
1049                int destHiIndex = (i + 1) * destBits - 1;
1050                int destLoIndex = (i + 0) * destBits;
1051                result = insertBits(result, destHiIndex, destLoIndex, resBits);
1052            }
1053            FpDestReg.uqw = result;
1054        '''
1055
1056    class Mavg(MediaOp):
1057        code = '''
1058            assert(srcSize == destSize);
1059            int size = srcSize;
1060            int sizeBits = size * 8;
1061            int items = (ext & 0x1) ? 1: (sizeof(FloatRegBits) / size);
1062            uint64_t result = FpDestReg.uqw;
1063
1064            for (int i = 0; i < items; i++) {
1065                int hiIndex = (i + 1) * sizeBits - 1;
1066                int loIndex = (i + 0) * sizeBits;
1067                uint64_t arg1Bits = bits(FpSrcReg1.uqw, hiIndex, loIndex);
1068                uint64_t arg2Bits = bits(FpSrcReg2.uqw, hiIndex, loIndex);
1069                uint64_t resBits = (arg1Bits + arg2Bits + 1) / 2;
1070                
1071                result = insertBits(result, hiIndex, loIndex, resBits);
1072            }
1073            FpDestReg.uqw = result;
1074        '''
1075
1076    class Msad(MediaOp):
1077        code = '''
1078            int srcBits = srcSize * 8;
1079            int items = sizeof(FloatRegBits) / srcSize;
1080
1081            uint64_t sum = 0;
1082            for (int i = 0; i < items; i++) {
1083                int hiIndex = (i + 1) * srcBits - 1;
1084                int loIndex = (i + 0) * srcBits;
1085                uint64_t arg1Bits = bits(FpSrcReg1.uqw, hiIndex, loIndex);
1086                uint64_t arg2Bits = bits(FpSrcReg2.uqw, hiIndex, loIndex);
1087                int64_t resBits = arg1Bits - arg2Bits;
1088                if (resBits < 0)
1089                    resBits = -resBits;
1090                sum += resBits;
1091            }
1092            FpDestReg.uqw = sum & mask(destSize * 8);
1093        '''
1094
1095    class Msrl(MediaOp):
1096        code = '''
1097
1098            assert(srcSize == destSize);
1099            int size = srcSize;
1100            int sizeBits = size * 8;
1101            int items = (ext & 0x1) ? 1: (sizeof(FloatRegBits) / size);
1102            uint64_t shiftAmt = op2.uqw;
1103            uint64_t result = FpDestReg.uqw;
1104
1105            for (int i = 0; i < items; i++) {
1106                int hiIndex = (i + 1) * sizeBits - 1;
1107                int loIndex = (i + 0) * sizeBits;
1108                uint64_t arg1Bits = bits(FpSrcReg1.uqw, hiIndex, loIndex);
1109                uint64_t resBits;
1110                if (shiftAmt >= sizeBits) {
1111                    resBits = 0;
1112                } else {
1113                    resBits = (arg1Bits >> shiftAmt) &
1114                        mask(sizeBits - shiftAmt);
1115                }
1116
1117                result = insertBits(result, hiIndex, loIndex, resBits);
1118            }
1119            FpDestReg.uqw = result;
1120        '''
1121
1122    class Msra(MediaOp):
1123        code = '''
1124
1125            assert(srcSize == destSize);
1126            int size = srcSize;
1127            int sizeBits = size * 8;
1128            int items = (ext & 0x1) ? 1: (sizeof(FloatRegBits) / size);
1129            uint64_t shiftAmt = op2.uqw;
1130            uint64_t result = FpDestReg.uqw;
1131
1132            for (int i = 0; i < items; i++) {
1133                int hiIndex = (i + 1) * sizeBits - 1;
1134                int loIndex = (i + 0) * sizeBits;
1135                uint64_t arg1Bits = bits(FpSrcReg1.uqw, hiIndex, loIndex);
1136                uint64_t resBits;
1137                if (shiftAmt >= sizeBits) {
1138                    if (bits(arg1Bits, sizeBits - 1))
1139                        resBits = mask(sizeBits);
1140                    else
1141                        resBits = 0;
1142                } else {
1143                    resBits = (arg1Bits >> shiftAmt);
1144                    resBits = resBits |
1145                        (0 - (resBits & (1 << (sizeBits - 1 - shiftAmt))));
1146                }
1147
1148                result = insertBits(result, hiIndex, loIndex, resBits);
1149            }
1150            FpDestReg.uqw = result;
1151        '''
1152
1153    class Msll(MediaOp):
1154        code = '''
1155
1156            assert(srcSize == destSize);
1157            int size = srcSize;
1158            int sizeBits = size * 8;
1159            int items = (ext & 0x1) ? 1: (sizeof(FloatRegBits) / size);
1160            uint64_t shiftAmt = op2.uqw;
1161            uint64_t result = FpDestReg.uqw;
1162
1163            for (int i = 0; i < items; i++) {
1164                int hiIndex = (i + 1) * sizeBits - 1;
1165                int loIndex = (i + 0) * sizeBits;
1166                uint64_t arg1Bits = bits(FpSrcReg1.uqw, hiIndex, loIndex);
1167                uint64_t resBits;
1168                if (shiftAmt >= sizeBits) {
1169                    resBits = 0;
1170                } else {
1171                    resBits = (arg1Bits << shiftAmt);
1172                }
1173
1174                result = insertBits(result, hiIndex, loIndex, resBits);
1175            }
1176            FpDestReg.uqw = result;
1177        '''
1178
1179    class Cvti2f(MediaOp):
1180        def __init__(self, dest, src, \
1181                size = None, destSize = None, srcSize = None, ext = None):
1182            super(Cvti2f, self).__init__(dest, src,\
1183                    "InstRegIndex(0)", size, destSize, srcSize, ext)
1184        code = '''
1185            union floatInt
1186            {
1187                float f;
1188                uint32_t i;
1189            };
1190            union doubleInt
1191            {
1192                double d;
1193                uint64_t i;
1194            };
1195
1196            assert(destSize == 4 || destSize == 8);
1197            assert(srcSize == 4 || srcSize == 8);
1198            int srcSizeBits = srcSize * 8;
1199            int destSizeBits = destSize * 8;
1200            int items;
1201            int srcStart = 0;
1202            int destStart = 0;
1203            if (srcSize == 2 * destSize) {
1204                items = (ext & 0x1) ? 1: sizeof(FloatRegBits) / srcSize;
1205                if (ext & 0x2)
1206                    destStart = destSizeBits * items;
1207            } else if (destSize == 2 * srcSize) {
1208                items = (ext & 0x1) ? 1: sizeof(FloatRegBits) / destSize;
1209                if (ext & 0x2)
1210                    srcStart = srcSizeBits * items;
1211            } else {
1212                items = (ext & 0x1) ? 1: sizeof(FloatRegBits) / destSize;
1213            }
1214            uint64_t result = FpDestReg.uqw;
1215
1216            for (int i = 0; i < items; i++) {
1217                int srcHiIndex = srcStart + (i + 1) * srcSizeBits - 1;
1218                int srcLoIndex = srcStart + (i + 0) * srcSizeBits;
1219                uint64_t argBits = bits(FpSrcReg1.uqw, srcHiIndex, srcLoIndex);
1220                int64_t sArg = argBits | (0 - (argBits & (1 << srcHiIndex)));
1221                double arg = sArg;
1222
1223                if (destSize == 4) {
1224                    floatInt fi;
1225                    fi.f = arg;
1226                    argBits = fi.i;
1227                } else {
1228                    doubleInt di;
1229                    di.d = arg;
1230                    argBits = di.i;
1231                }
1232                int destHiIndex = destStart + (i + 1) * destSizeBits - 1;
1233                int destLoIndex = destStart + (i + 0) * destSizeBits;
1234                result = insertBits(result, destHiIndex, destLoIndex, argBits);
1235            }
1236            FpDestReg.uqw = result;
1237        '''
1238
1239    class Cvtf2f(MediaOp):
1240        def __init__(self, dest, src, \
1241                size = None, destSize = None, srcSize = None, ext = None):
1242            super(Cvtf2f, self).__init__(dest, src,\
1243                    "InstRegIndex(0)", size, destSize, srcSize, ext)
1244        code = '''
1245            union floatInt
1246            {
1247                float f;
1248                uint32_t i;
1249            };
1250            union doubleInt
1251            {
1252                double d;
1253                uint64_t i;
1254            };
1255
1256            assert(destSize == 4 || destSize == 8);
1257            assert(srcSize == 4 || srcSize == 8);
1258            int srcSizeBits = srcSize * 8;
1259            int destSizeBits = destSize * 8;
1260            int items;
1261            int srcStart = 0;
1262            int destStart = 0;
1263            if (srcSize == 2 * destSize) {
1264                items = (ext & 0x1) ? 1: sizeof(FloatRegBits) / srcSize;
1265                if (ext & 0x2)
1266                    destStart = destSizeBits * items;
1267            } else if (destSize == 2 * srcSize) {
1268                items = (ext & 0x1) ? 1: sizeof(FloatRegBits) / destSize;
1269                if (ext & 0x2)
1270                    srcStart = srcSizeBits * items;
1271            } else {
1272                items = (ext & 0x1) ? 1: sizeof(FloatRegBits) / destSize;
1273            }
1274            uint64_t result = FpDestReg.uqw;
1275
1276            for (int i = 0; i < items; i++) {
1277                int srcHiIndex = srcStart + (i + 1) * srcSizeBits - 1;
1278                int srcLoIndex = srcStart + (i + 0) * srcSizeBits;
1279                uint64_t argBits = bits(FpSrcReg1.uqw, srcHiIndex, srcLoIndex);
1280                double arg;
1281
1282                if (srcSize == 4) {
1283                    floatInt fi;
1284                    fi.i = argBits;
1285                    arg = fi.f;
1286                } else {
1287                    doubleInt di;
1288                    di.i = argBits;
1289                    arg = di.d;
1290                }
1291                if (destSize == 4) {
1292                    floatInt fi;
1293                    fi.f = arg;
1294                    argBits = fi.i;
1295                } else {
1296                    doubleInt di;
1297                    di.d = arg;
1298                    argBits = di.i;
1299                }
1300                int destHiIndex = destStart + (i + 1) * destSizeBits - 1;
1301                int destLoIndex = destStart + (i + 0) * destSizeBits;
1302                result = insertBits(result, destHiIndex, destLoIndex, argBits);
1303            }
1304            FpDestReg.uqw = result;
1305        '''
1306
1307    class Mcmpi2r(MediaOp):
1308        code = '''
1309            union floatInt
1310            {
1311                float f;
1312                uint32_t i;
1313            };
1314            union doubleInt
1315            {
1316                double d;
1317                uint64_t i;
1318            };
1319
1320            assert(srcSize == destSize);
1321            int size = srcSize;
1322            int sizeBits = size * 8;
1323            int items = (ext & 0x1) ? 1: (sizeof(FloatRegBits) / size);
1324            uint64_t result = FpDestReg.uqw;
1325
1326            for (int i = 0; i < items; i++) {
1327                int hiIndex = (i + 1) * sizeBits - 1;
1328                int loIndex = (i + 0) * sizeBits;
1329                uint64_t arg1Bits = bits(FpSrcReg1.uqw, hiIndex, loIndex);
1330                int64_t arg1 = arg1Bits |
1331                    (0 - (arg1Bits & (1 << (sizeBits - 1))));
1332                uint64_t arg2Bits = bits(FpSrcReg2.uqw, hiIndex, loIndex);
1333                int64_t arg2 = arg2Bits |
1334                    (0 - (arg2Bits & (1 << (sizeBits - 1))));
1335
1336                uint64_t resBits = 0;
1337                if ((ext & 0x2) == 0 && arg1 == arg2 ||
1338                        (ext & 0x2) == 0x2 && arg1 > arg2)
1339                    resBits = mask(sizeBits);
1340
1341                result = insertBits(result, hiIndex, loIndex, resBits);
1342            }
1343            FpDestReg.uqw = result;
1344        '''
1345}};
1346