mediaop.isa revision 6577:cfe4a8f16e5f
1/// Copyright (c) 2009 The Regents of The University of Michigan
2// All rights reserved.
3//
4// Redistribution and use in source and binary forms, with or without
5// modification, are permitted provided that the following conditions are
6// met: redistributions of source code must retain the above copyright
7// notice, this list of conditions and the following disclaimer;
8// redistributions in binary form must reproduce the above copyright
9// notice, this list of conditions and the following disclaimer in the
10// documentation and/or other materials provided with the distribution;
11// neither the name of the copyright holders nor the names of its
12// contributors may be used to endorse or promote products derived from
13// this software without specific prior written permission.
14//
15// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
16// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
17// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
18// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
19// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
20// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
21// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
22// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
23// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
24// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
25// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
26//
27// Authors: Gabe Black
28
29def template MediaOpExecute {{
30        Fault %(class_name)s::execute(%(CPU_exec_context)s *xc,
31                Trace::InstRecord *traceData) const
32        {
33            Fault fault = NoFault;
34
35            %(op_decl)s;
36            %(op_rd)s;
37
38            %(code)s;
39
40            //Write the resulting state to the execution context
41            if(fault == NoFault)
42            {
43                %(op_wb)s;
44            }
45            return fault;
46        }
47}};
48
49def template MediaOpRegDeclare {{
50    class %(class_name)s : public %(base_class)s
51    {
52      protected:
53        void buildMe();
54
55      public:
56        %(class_name)s(ExtMachInst _machInst,
57                const char * instMnem,
58                bool isMicro, bool isDelayed, bool isFirst, bool isLast,
59                InstRegIndex _src1, InstRegIndex _src2, InstRegIndex _dest,
60                uint8_t _srcSize, uint8_t _destSize, uint16_t _ext);
61
62        %(class_name)s(ExtMachInst _machInst,
63                const char * instMnem,
64                InstRegIndex _src1, InstRegIndex _src2, InstRegIndex _dest,
65                uint8_t _srcSize, uint8_t _destSize, uint16_t _ext);
66
67        %(BasicExecDeclare)s
68    };
69}};
70
71def template MediaOpImmDeclare {{
72
73    class %(class_name)s : public %(base_class)s
74    {
75      protected:
76        void buildMe();
77
78      public:
79        %(class_name)s(ExtMachInst _machInst,
80                const char * instMnem,
81                bool isMicro, bool isDelayed, bool isFirst, bool isLast,
82                InstRegIndex _src1, uint16_t _imm8, InstRegIndex _dest,
83                uint8_t _srcSize, uint8_t _destSize, uint16_t _ext);
84
85        %(class_name)s(ExtMachInst _machInst,
86                const char * instMnem,
87                InstRegIndex _src1, uint16_t _imm8, InstRegIndex _dest,
88                uint8_t _srcSize, uint8_t _destSize, uint16_t _ext);
89
90        %(BasicExecDeclare)s
91    };
92}};
93
94def template MediaOpRegConstructor {{
95
96    inline void %(class_name)s::buildMe()
97    {
98        %(constructor)s;
99    }
100
101    inline %(class_name)s::%(class_name)s(
102            ExtMachInst machInst, const char * instMnem,
103            InstRegIndex _src1, InstRegIndex _src2, InstRegIndex _dest,
104            uint8_t _srcSize, uint8_t _destSize, uint16_t _ext) :
105        %(base_class)s(machInst, "%(mnemonic)s", instMnem,
106                false, false, false, false,
107                _src1, _src2, _dest, _srcSize, _destSize, _ext,
108                %(op_class)s)
109    {
110        buildMe();
111    }
112
113    inline %(class_name)s::%(class_name)s(
114            ExtMachInst machInst, const char * instMnem,
115            bool isMicro, bool isDelayed, bool isFirst, bool isLast,
116            InstRegIndex _src1, InstRegIndex _src2, InstRegIndex _dest,
117            uint8_t _srcSize, uint8_t _destSize, uint16_t _ext) :
118        %(base_class)s(machInst, "%(mnemonic)s", instMnem,
119                isMicro, isDelayed, isFirst, isLast,
120                _src1, _src2, _dest, _srcSize, _destSize, _ext,
121                %(op_class)s)
122    {
123        buildMe();
124    }
125}};
126
127def template MediaOpImmConstructor {{
128
129    inline void %(class_name)s::buildMe()
130    {
131        %(constructor)s;
132    }
133
134    inline %(class_name)s::%(class_name)s(
135            ExtMachInst machInst, const char * instMnem,
136            InstRegIndex _src1, uint16_t _imm8, InstRegIndex _dest,
137            uint8_t _srcSize, uint8_t _destSize, uint16_t _ext) :
138        %(base_class)s(machInst, "%(mnemonic)s", instMnem,
139                false, false, false, false,
140                _src1, _imm8, _dest, _srcSize, _destSize, _ext,
141                %(op_class)s)
142    {
143        buildMe();
144    }
145
146    inline %(class_name)s::%(class_name)s(
147            ExtMachInst machInst, const char * instMnem,
148            bool isMicro, bool isDelayed, bool isFirst, bool isLast,
149            InstRegIndex _src1, uint16_t _imm8, InstRegIndex _dest,
150            uint8_t _srcSize, uint8_t _destSize, uint16_t _ext) :
151        %(base_class)s(machInst, "%(mnemonic)s", instMnem,
152                isMicro, isDelayed, isFirst, isLast,
153                _src1, _imm8, _dest, _srcSize, _destSize, _ext,
154                %(op_class)s)
155    {
156        buildMe();
157    }
158}};
159
160let {{
161    # Make these empty strings so that concatenating onto
162    # them will always work.
163    header_output = ""
164    decoder_output = ""
165    exec_output = ""
166
167    immTemplates = (
168            MediaOpImmDeclare,
169            MediaOpImmConstructor,
170            MediaOpExecute)
171
172    regTemplates = (
173            MediaOpRegDeclare,
174            MediaOpRegConstructor,
175            MediaOpExecute)
176
177    class MediaOpMeta(type):
178        def buildCppClasses(self, name, Name, suffix, code):
179
180            # Globals to stick the output in
181            global header_output
182            global decoder_output
183            global exec_output
184
185            # If op2 is used anywhere, make register and immediate versions
186            # of this code.
187            matcher = re.compile("(?<!\\w)(?P<prefix>s?)op2(?P<typeQual>\\.\\w+)?")
188            match = matcher.search(code)
189            if match:
190                typeQual = ""
191                if match.group("typeQual"):
192                    typeQual = match.group("typeQual")
193                src2_name = "%spsrc2%s" % (match.group("prefix"), typeQual)
194                self.buildCppClasses(name, Name, suffix,
195                        matcher.sub(src2_name, code))
196                self.buildCppClasses(name + "i", Name, suffix + "Imm",
197                        matcher.sub("imm8", code))
198                return
199
200            base = "X86ISA::MediaOp"
201
202            # If imm8 shows up in the code, use the immediate templates, if
203            # not, hopefully the register ones will be correct.
204            matcher = re.compile("(?<!\w)imm8(?!\w)")
205            if matcher.search(code):
206                base += "Imm"
207                templates = immTemplates
208            else:
209                base += "Reg"
210                templates = regTemplates
211
212            # Get everything ready for the substitution
213            iop = InstObjParams(name, Name + suffix, base, {"code" : code})
214
215            # Generate the actual code (finally!)
216            header_output += templates[0].subst(iop)
217            decoder_output += templates[1].subst(iop)
218            exec_output += templates[2].subst(iop)
219
220
221        def __new__(mcls, Name, bases, dict):
222            abstract = False
223            name = Name.lower()
224            if "abstract" in dict:
225                abstract = dict['abstract']
226                del dict['abstract']
227
228            cls = super(MediaOpMeta, mcls).__new__(mcls, Name, bases, dict)
229            if not abstract:
230                cls.className = Name
231                cls.base_mnemonic = name
232                code = cls.code
233
234                # Set up the C++ classes
235                mcls.buildCppClasses(cls, name, Name, "", code)
236
237                # Hook into the microassembler dict
238                global microopClasses
239                microopClasses[name] = cls
240
241                # If op2 is used anywhere, make register and immediate versions
242                # of this code.
243                matcher = re.compile("op2(?P<typeQual>\\.\\w+)?")
244                if matcher.search(code):
245                    microopClasses[name + 'i'] = cls
246            return cls
247
248
249    class MediaOp(X86Microop):
250        __metaclass__ = MediaOpMeta
251        # This class itself doesn't act as a microop
252        abstract = True
253
254        def __init__(self, dest, src1, op2,
255                size = None, destSize = None, srcSize = None, ext = None):
256            self.dest = dest
257            self.src1 = src1
258            self.op2 = op2
259            if size is not None:
260                self.srcSize = size
261                self.destSize = size
262            if srcSize is not None:
263                self.srcSize = srcSize
264            if destSize is not None:
265                self.destSize = destSize
266            if self.srcSize is None:
267                raise Exception, "Source size not set."
268            if self.destSize is None:
269                raise Exception, "Dest size not set."
270            if ext is None:
271                self.ext = 0
272            else:
273                self.ext = ext 
274
275        def getAllocator(self, *microFlags):
276            className = self.className
277            if self.mnemonic == self.base_mnemonic + 'i':
278                className += "Imm"
279            allocator = '''new %(class_name)s(machInst, macrocodeBlock
280                    %(flags)s, %(src1)s, %(op2)s, %(dest)s,
281                    %(srcSize)s, %(destSize)s, %(ext)s)''' % {
282                "class_name" : className,
283                "flags" : self.microFlagsText(microFlags),
284                "src1" : self.src1, "op2" : self.op2,
285                "dest" : self.dest,
286                "srcSize" : self.srcSize,
287                "destSize" : self.destSize,
288                "ext" : self.ext}
289            return allocator
290
291    class Mov2int(MediaOp):
292        def __init__(self, dest, src, \
293                size = None, destSize = None, srcSize = None, ext = None):
294            super(Mov2int, self).__init__(dest, src,\
295                    "InstRegIndex(0)", size, destSize, srcSize, ext)
296        code = '''
297            uint64_t fpSrcReg1 = bits(FpSrcReg1.uqw, srcSize * 8 - 1, 0);
298            DestReg = merge(DestReg, fpSrcReg1, destSize);
299        '''
300
301    class Mov2fp(MediaOp):
302        def __init__(self, dest, src, \
303                size = None, destSize = None, srcSize = None, ext = None):
304            super(Mov2fp, self).__init__(dest, src,\
305                    "InstRegIndex(0)", size, destSize, srcSize, ext)
306        code = '''
307            uint64_t srcReg1 = pick(SrcReg1, 0, srcSize);
308            FpDestReg.uqw =
309                insertBits(FpDestReg.uqw, destSize * 8 - 1, 0, srcReg1);
310        '''
311
312    class Unpack(MediaOp):
313        code = '''
314            assert(srcSize == destSize);
315            int size = destSize;
316            int items = (sizeof(FloatRegBits) / size) / 2;
317            int offset = ext ? items : 0;
318            uint64_t result = 0;
319            for (int i = 0; i < items; i++) {
320                uint64_t pickedLow =
321                    bits(FpSrcReg1.uqw, (i + offset + 1) * 8 * size - 1,
322                                        (i + offset) * 8 * size);
323                result = insertBits(result,
324                                    (2 * i + 1) * 8 * size - 1,
325                                    (2 * i + 0) * 8 * size,
326                                    pickedLow);
327                uint64_t pickedHigh =
328                    bits(FpSrcReg2.uqw, (i + offset + 1) * 8 * size - 1,
329                                        (i + offset) * 8 * size);
330                result = insertBits(result,
331                                    (2 * i + 2) * 8 * size - 1,
332                                    (2 * i + 1) * 8 * size,
333                                    pickedHigh);
334            }
335            FpDestReg.uqw = result;
336        '''
337
338    class Pack(MediaOp):
339        code = '''
340            assert(srcSize == destSize * 2);
341            int items = (sizeof(FloatRegBits) / destSize);
342            int destBits = destSize * 8;
343            int srcBits = srcSize * 8;
344            uint64_t result = 0;
345            int i;
346            for (i = 0; i < items / 2; i++) {
347                uint64_t picked =
348                    bits(FpSrcReg1.uqw, (i + 1) * srcBits - 1,
349                                        (i + 0) * srcBits);
350                unsigned signBit = bits(picked, srcBits - 1);
351                uint64_t overflow = bits(picked, srcBits - 1, destBits - 1);
352
353                // Handle saturation.
354                if (signBit) {
355                    if (overflow != mask(destBits - srcBits + 1)) {
356                        if (ext & 0x1)
357                            picked = (1 << (destBits - 1));
358                        else
359                            picked = 0;
360                    }
361                } else {
362                    if (overflow != 0) {
363                        if (ext & 0x1)
364                            picked = mask(destBits - 1);
365                        else
366                            picked = mask(destBits);
367                    }
368                }
369                result = insertBits(result,
370                                    (i + 1) * destBits - 1,
371                                    (i + 0) * destBits,
372                                    picked);
373            }
374            for (;i < items; i++) {
375                uint64_t picked =
376                    bits(FpSrcReg2.uqw, (i - items + 1) * srcBits - 1,
377                                        (i - items + 0) * srcBits);
378                unsigned signBit = bits(picked, srcBits - 1);
379                uint64_t overflow = bits(picked, srcBits - 1, destBits - 1);
380
381                // Handle saturation.
382                if (signBit) {
383                    if (overflow != mask(destBits - srcBits + 1)) {
384                        if (ext & 0x1)
385                            picked = (1 << (destBits - 1));
386                        else
387                            picked = 0;
388                    }
389                } else {
390                    if (overflow != 0) {
391                        if (ext & 0x1)
392                            picked = mask(destBits - 1);
393                        else
394                            picked = mask(destBits);
395                    }
396                }
397                result = insertBits(result,
398                                    (i + 1) * destBits - 1,
399                                    (i + 0) * destBits,
400                                    picked);
401            }
402            FpDestReg.uqw = result;
403        '''
404
405    class Mxor(MediaOp):
406        def __init__(self, dest, src1, src2):
407            super(Mxor, self).__init__(dest, src1, src2, 1)
408        code = '''
409            FpDestReg.uqw = FpSrcReg1.uqw ^ FpSrcReg2.uqw;
410        '''
411
412    class Mor(MediaOp):
413        def __init__(self, dest, src1, src2):
414            super(Mor, self).__init__(dest, src1, src2, 1)
415        code = '''
416            FpDestReg.uqw = FpSrcReg1.uqw | FpSrcReg2.uqw;
417        '''
418
419    class Mand(MediaOp):
420        def __init__(self, dest, src1, src2):
421            super(Mand, self).__init__(dest, src1, src2, 1)
422        code = '''
423            FpDestReg.uqw = FpSrcReg1.uqw & FpSrcReg2.uqw;
424        '''
425
426    class Mandn(MediaOp):
427        def __init__(self, dest, src1, src2):
428            super(Mandn, self).__init__(dest, src1, src2, 1)
429        code = '''
430            FpDestReg.uqw = ~FpSrcReg1.uqw & FpSrcReg2.uqw;
431        '''
432
433    class Mminf(MediaOp):
434        code = '''
435            union floatInt
436            {
437                float f;
438                uint32_t i;
439            };
440            union doubleInt
441            {
442                double d;
443                uint64_t i;
444            };
445
446            assert(srcSize == destSize);
447            int size = srcSize;
448            int sizeBits = size * 8;
449            assert(srcSize == 4 || srcSize == 8);
450            int items = (ext & 0x1) ? 1: (sizeof(FloatRegBits) / size);
451            uint64_t result = FpDestReg.uqw;
452
453            for (int i = 0; i < items; i++) {
454                double arg1, arg2;
455                int hiIndex = (i + 1) * sizeBits - 1;
456                int loIndex = (i + 0) * sizeBits;
457                uint64_t arg1Bits = bits(FpSrcReg1.uqw, hiIndex, loIndex);
458                uint64_t arg2Bits = bits(FpSrcReg2.uqw, hiIndex, loIndex);
459
460                if (size == 4) {
461                    floatInt fi;
462                    fi.i = arg1Bits;
463                    arg1 = fi.f;
464                    fi.i = arg2Bits;
465                    arg2 = fi.f;
466                } else {
467                    doubleInt di;
468                    di.i = arg1Bits;
469                    arg1 = di.d;
470                    di.i = arg2Bits;
471                    arg2 = di.d;
472                }
473
474                if (arg1 < arg2) {
475                    result = insertBits(result, hiIndex, loIndex, arg1Bits);
476                } else {
477                    result = insertBits(result, hiIndex, loIndex, arg2Bits);
478                }
479            }
480            FpDestReg.uqw = result;
481        '''
482
483    class Mmaxf(MediaOp):
484        code = '''
485            union floatInt
486            {
487                float f;
488                uint32_t i;
489            };
490            union doubleInt
491            {
492                double d;
493                uint64_t i;
494            };
495
496            assert(srcSize == destSize);
497            int size = srcSize;
498            int sizeBits = size * 8;
499            assert(srcSize == 4 || srcSize == 8);
500            int items = (ext & 0x1) ? 1: (sizeof(FloatRegBits) / size);
501            uint64_t result = FpDestReg.uqw;
502
503            for (int i = 0; i < items; i++) {
504                double arg1, arg2;
505                int hiIndex = (i + 1) * sizeBits - 1;
506                int loIndex = (i + 0) * sizeBits;
507                uint64_t arg1Bits = bits(FpSrcReg1.uqw, hiIndex, loIndex);
508                uint64_t arg2Bits = bits(FpSrcReg2.uqw, hiIndex, loIndex);
509
510                if (size == 4) {
511                    floatInt fi;
512                    fi.i = arg1Bits;
513                    arg1 = fi.f;
514                    fi.i = arg2Bits;
515                    arg2 = fi.f;
516                } else {
517                    doubleInt di;
518                    di.i = arg1Bits;
519                    arg1 = di.d;
520                    di.i = arg2Bits;
521                    arg2 = di.d;
522                }
523
524                if (arg1 > arg2) {
525                    result = insertBits(result, hiIndex, loIndex, arg1Bits);
526                } else {
527                    result = insertBits(result, hiIndex, loIndex, arg2Bits);
528                }
529            }
530            FpDestReg.uqw = result;
531        '''
532
533    class Mmini(MediaOp):
534        code = '''
535
536            assert(srcSize == destSize);
537            int size = srcSize;
538            int sizeBits = size * 8;
539            int items = (ext & 0x1) ? 1: (sizeof(FloatRegBits) / size);
540            uint64_t result = FpDestReg.uqw;
541
542            for (int i = 0; i < items; i++) {
543                int hiIndex = (i + 1) * sizeBits - 1;
544                int loIndex = (i + 0) * sizeBits;
545                uint64_t arg1Bits = bits(FpSrcReg1.uqw, hiIndex, loIndex);
546                int64_t arg1 = arg1Bits |
547                    (0 - (arg1Bits & (1 << (sizeBits - 1))));
548                uint64_t arg2Bits = bits(FpSrcReg2.uqw, hiIndex, loIndex);
549                int64_t arg2 = arg2Bits |
550                    (0 - (arg2Bits & (1 << (sizeBits - 1))));
551                uint64_t resBits;
552
553                if (ext & 0x2) {
554                    if (arg1 < arg2) {
555                        resBits = arg1Bits;
556                    } else {
557                        resBits = arg2Bits;
558                    }
559                } else {
560                    if (arg1Bits < arg2Bits) {
561                        resBits = arg1Bits;
562                    } else {
563                        resBits = arg2Bits;
564                    }
565                }
566                result = insertBits(result, hiIndex, loIndex, resBits);
567            }
568            FpDestReg.uqw = result;
569        '''
570
571    class Mmaxi(MediaOp):
572        code = '''
573
574            assert(srcSize == destSize);
575            int size = srcSize;
576            int sizeBits = size * 8;
577            int items = (ext & 0x1) ? 1: (sizeof(FloatRegBits) / size);
578            uint64_t result = FpDestReg.uqw;
579
580            for (int i = 0; i < items; i++) {
581                int hiIndex = (i + 1) * sizeBits - 1;
582                int loIndex = (i + 0) * sizeBits;
583                uint64_t arg1Bits = bits(FpSrcReg1.uqw, hiIndex, loIndex);
584                int64_t arg1 = arg1Bits |
585                    (0 - (arg1Bits & (1 << (sizeBits - 1))));
586                uint64_t arg2Bits = bits(FpSrcReg2.uqw, hiIndex, loIndex);
587                int64_t arg2 = arg2Bits |
588                    (0 - (arg2Bits & (1 << (sizeBits - 1))));
589                uint64_t resBits;
590
591                if (ext & 0x2) {
592                    if (arg1 > arg2) {
593                        resBits = arg1Bits;
594                    } else {
595                        resBits = arg2Bits;
596                    }
597                } else {
598                    if (arg1Bits > arg2Bits) {
599                        resBits = arg1Bits;
600                    } else {
601                        resBits = arg2Bits;
602                    }
603                }
604                result = insertBits(result, hiIndex, loIndex, resBits);
605            }
606            FpDestReg.uqw = result;
607        '''
608
609    class Msqrt(MediaOp):
610        def __init__(self, dest, src, \
611                size = None, destSize = None, srcSize = None, ext = None):
612            super(Msqrt, self).__init__(dest, src,\
613                    "InstRegIndex(0)", size, destSize, srcSize, ext)
614        code = '''
615            union floatInt
616            {
617                float f;
618                uint32_t i;
619            };
620            union doubleInt
621            {
622                double d;
623                uint64_t i;
624            };
625
626            assert(srcSize == destSize);
627            int size = srcSize;
628            int sizeBits = size * 8;
629            assert(srcSize == 4 || srcSize == 8);
630            int items = (ext & 0x1) ? 1: (sizeof(FloatRegBits) / size);
631            uint64_t result = FpDestReg.uqw;
632
633            for (int i = 0; i < items; i++) {
634                int hiIndex = (i + 1) * sizeBits - 1;
635                int loIndex = (i + 0) * sizeBits;
636                uint64_t argBits = bits(FpSrcReg1.uqw, hiIndex, loIndex);
637
638                if (size == 4) {
639                    floatInt fi;
640                    fi.i = argBits;
641                    fi.f = sqrt(fi.f);
642                    argBits = fi.i;
643                } else {
644                    doubleInt di;
645                    di.i = argBits;
646                    di.d = sqrt(di.d);
647                    argBits = di.i;
648                }
649                result = insertBits(result, hiIndex, loIndex, argBits);
650            }
651            FpDestReg.uqw = result;
652        '''
653
654    class Maddf(MediaOp):
655        code = '''
656            union floatInt
657            {
658                float f;
659                uint32_t i;
660            };
661            union doubleInt
662            {
663                double d;
664                uint64_t i;
665            };
666
667            assert(srcSize == destSize);
668            int size = srcSize;
669            int sizeBits = size * 8;
670            assert(srcSize == 4 || srcSize == 8);
671            int items = (ext & 0x1) ? 1: (sizeof(FloatRegBits) / size);
672            uint64_t result = FpDestReg.uqw;
673
674            for (int i = 0; i < items; i++) {
675                int hiIndex = (i + 1) * sizeBits - 1;
676                int loIndex = (i + 0) * sizeBits;
677                uint64_t arg1Bits = bits(FpSrcReg1.uqw, hiIndex, loIndex);
678                uint64_t arg2Bits = bits(FpSrcReg2.uqw, hiIndex, loIndex);
679                uint64_t resBits;
680
681                if (size == 4) {
682                    floatInt arg1, arg2, res;
683                    arg1.i = arg1Bits;
684                    arg2.i = arg2Bits;
685                    res.f = arg1.f + arg2.f;
686                    resBits = res.i;
687                } else {
688                    doubleInt arg1, arg2, res;
689                    arg1.i = arg1Bits;
690                    arg2.i = arg2Bits;
691                    res.d = arg1.d + arg2.d;
692                    resBits = res.i;
693                }
694
695                result = insertBits(result, hiIndex, loIndex, resBits);
696            }
697            FpDestReg.uqw = result;
698        '''
699
700    class Msubf(MediaOp):
701        code = '''
702            union floatInt
703            {
704                float f;
705                uint32_t i;
706            };
707            union doubleInt
708            {
709                double d;
710                uint64_t i;
711            };
712
713            assert(srcSize == destSize);
714            int size = srcSize;
715            int sizeBits = size * 8;
716            assert(srcSize == 4 || srcSize == 8);
717            int items = (ext & 0x1) ? 1: (sizeof(FloatRegBits) / size);
718            uint64_t result = FpDestReg.uqw;
719
720            for (int i = 0; i < items; i++) {
721                int hiIndex = (i + 1) * sizeBits - 1;
722                int loIndex = (i + 0) * sizeBits;
723                uint64_t arg1Bits = bits(FpSrcReg1.uqw, hiIndex, loIndex);
724                uint64_t arg2Bits = bits(FpSrcReg2.uqw, hiIndex, loIndex);
725                uint64_t resBits;
726
727                if (size == 4) {
728                    floatInt arg1, arg2, res;
729                    arg1.i = arg1Bits;
730                    arg2.i = arg2Bits;
731                    res.f = arg1.f - arg2.f;
732                    resBits = res.i;
733                } else {
734                    doubleInt arg1, arg2, res;
735                    arg1.i = arg1Bits;
736                    arg2.i = arg2Bits;
737                    res.d = arg1.d - arg2.d;
738                    resBits = res.i;
739                }
740
741                result = insertBits(result, hiIndex, loIndex, resBits);
742            }
743            FpDestReg.uqw = result;
744        '''
745
746    class Mmulf(MediaOp):
747        code = '''
748            union floatInt
749            {
750                float f;
751                uint32_t i;
752            };
753            union doubleInt
754            {
755                double d;
756                uint64_t i;
757            };
758
759            assert(srcSize == destSize);
760            int size = srcSize;
761            int sizeBits = size * 8;
762            assert(srcSize == 4 || srcSize == 8);
763            int items = (ext & 0x1) ? 1: (sizeof(FloatRegBits) / size);
764            uint64_t result = FpDestReg.uqw;
765
766            for (int i = 0; i < items; i++) {
767                int hiIndex = (i + 1) * sizeBits - 1;
768                int loIndex = (i + 0) * sizeBits;
769                uint64_t arg1Bits = bits(FpSrcReg1.uqw, hiIndex, loIndex);
770                uint64_t arg2Bits = bits(FpSrcReg2.uqw, hiIndex, loIndex);
771                uint64_t resBits;
772
773                if (size == 4) {
774                    floatInt arg1, arg2, res;
775                    arg1.i = arg1Bits;
776                    arg2.i = arg2Bits;
777                    res.f = arg1.f * arg2.f;
778                    resBits = res.i;
779                } else {
780                    doubleInt arg1, arg2, res;
781                    arg1.i = arg1Bits;
782                    arg2.i = arg2Bits;
783                    res.d = arg1.d * arg2.d;
784                    resBits = res.i;
785                }
786
787                result = insertBits(result, hiIndex, loIndex, resBits);
788            }
789            FpDestReg.uqw = result;
790        '''
791
792    class Mdivf(MediaOp):
793        code = '''
794            union floatInt
795            {
796                float f;
797                uint32_t i;
798            };
799            union doubleInt
800            {
801                double d;
802                uint64_t i;
803            };
804
805            assert(srcSize == destSize);
806            int size = srcSize;
807            int sizeBits = size * 8;
808            assert(srcSize == 4 || srcSize == 8);
809            int items = (ext & 0x1) ? 1: (sizeof(FloatRegBits) / size);
810            uint64_t result = FpDestReg.uqw;
811
812            for (int i = 0; i < items; i++) {
813                int hiIndex = (i + 1) * sizeBits - 1;
814                int loIndex = (i + 0) * sizeBits;
815                uint64_t arg1Bits = bits(FpSrcReg1.uqw, hiIndex, loIndex);
816                uint64_t arg2Bits = bits(FpSrcReg2.uqw, hiIndex, loIndex);
817                uint64_t resBits;
818
819                if (size == 4) {
820                    floatInt arg1, arg2, res;
821                    arg1.i = arg1Bits;
822                    arg2.i = arg2Bits;
823                    res.f = arg1.f / arg2.f;
824                    resBits = res.i;
825                } else {
826                    doubleInt arg1, arg2, res;
827                    arg1.i = arg1Bits;
828                    arg2.i = arg2Bits;
829                    res.d = arg1.d / arg2.d;
830                    resBits = res.i;
831                }
832
833                result = insertBits(result, hiIndex, loIndex, resBits);
834            }
835            FpDestReg.uqw = result;
836        '''
837
838    class Maddi(MediaOp):
839        code = '''
840            assert(srcSize == destSize);
841            int size = srcSize;
842            int sizeBits = size * 8;
843            int items = (ext & 0x1) ? 1: (sizeof(FloatRegBits) / size);
844            uint64_t result = FpDestReg.uqw;
845
846            for (int i = 0; i < items; i++) {
847                int hiIndex = (i + 1) * sizeBits - 1;
848                int loIndex = (i + 0) * sizeBits;
849                uint64_t arg1Bits = bits(FpSrcReg1.uqw, hiIndex, loIndex);
850                uint64_t arg2Bits = bits(FpSrcReg2.uqw, hiIndex, loIndex);
851                uint64_t resBits = arg1Bits + arg2Bits;
852                
853                if (ext & 0x2) {
854                    if (findCarry(sizeBits, resBits, arg1Bits, arg2Bits))
855                        resBits = mask(sizeBits);
856                } else if (ext & 0x4) {
857                    int arg1Sign = bits(arg1Bits, sizeBits - 1);
858                    int arg2Sign = bits(arg2Bits, sizeBits - 1);
859                    int resSign = bits(resBits, sizeBits - 1);
860                    if ((arg1Sign == arg2Sign) && (arg1Sign != resSign)) {
861                        if (resSign == 0)
862                            resBits = (1 << (sizeBits - 1));
863                        else
864                            resBits = mask(sizeBits - 1);
865                    }
866                }
867
868                result = insertBits(result, hiIndex, loIndex, resBits);
869            }
870            FpDestReg.uqw = result;
871        '''
872
873    class Mmuli(MediaOp):
874        code = '''
875            int srcBits = srcSize * 8;
876            int destBits = destSize * 8;
877            assert(destBits <= 64);
878            assert(destSize >= srcSize);
879            int items = (ext & 0x1) ? 1: (sizeof(FloatRegBits) / destSize);
880            uint64_t result = FpDestReg.uqw;
881
882            for (int i = 0; i < items; i++) {
883                int srcHiIndex = (i + 1) * srcBits - 1;
884                int srcLoIndex = (i + 0) * srcBits;
885                uint64_t arg1Bits = bits(FpSrcReg1.uqw, srcHiIndex, srcLoIndex);
886                uint64_t arg2Bits = bits(FpSrcReg2.uqw, srcHiIndex, srcLoIndex);
887                uint64_t resBits;
888
889                if (ext & 0x2) {
890                    int64_t arg1 = arg1Bits |
891                        (0 - (arg1Bits & (1 << (srcBits - 1))));
892                    int64_t arg2 = arg2Bits |
893                        (0 - (arg2Bits & (1 << (srcBits - 1))));
894                    resBits = (uint64_t)(arg1 * arg2);
895                } else {
896                    resBits = arg1Bits * arg2Bits;
897                }
898
899                if (ext & 0x4)
900                    resBits += (1 << (destBits - 1));
901                
902                if (ext & 0x8)
903                    resBits >>= destBits;
904
905                int destHiIndex = (i + 1) * destBits - 1;
906                int destLoIndex = (i + 0) * destBits;
907                result = insertBits(result, destHiIndex, destLoIndex, resBits);
908            }
909            FpDestReg.uqw = result;
910        '''
911
912    class Cvti2f(MediaOp):
913        def __init__(self, dest, src, \
914                size = None, destSize = None, srcSize = None, ext = None):
915            super(Cvti2f, self).__init__(dest, src,\
916                    "InstRegIndex(0)", size, destSize, srcSize, ext)
917        code = '''
918            union floatInt
919            {
920                float f;
921                uint32_t i;
922            };
923            union doubleInt
924            {
925                double d;
926                uint64_t i;
927            };
928
929            assert(destSize == 4 || destSize == 8);
930            assert(srcSize == 4 || srcSize == 8);
931            int srcSizeBits = srcSize * 8;
932            int destSizeBits = destSize * 8;
933            int items;
934            int srcStart = 0;
935            int destStart = 0;
936            if (srcSize == 2 * destSize) {
937                items = (ext & 0x1) ? 1: sizeof(FloatRegBits) / srcSize;
938                if (ext & 0x2)
939                    destStart = destSizeBits * items;
940            } else if (destSize == 2 * srcSize) {
941                items = (ext & 0x1) ? 1: sizeof(FloatRegBits) / destSize;
942                if (ext & 0x2)
943                    srcStart = srcSizeBits * items;
944            } else {
945                items = (ext & 0x1) ? 1: sizeof(FloatRegBits) / destSize;
946            }
947            uint64_t result = FpDestReg.uqw;
948
949            for (int i = 0; i < items; i++) {
950                int srcHiIndex = srcStart + (i + 1) * srcSizeBits - 1;
951                int srcLoIndex = srcStart + (i + 0) * srcSizeBits;
952                uint64_t argBits = bits(FpSrcReg1.uqw, srcHiIndex, srcLoIndex);
953                int64_t sArg = argBits | (0 - (argBits & (1 << srcHiIndex)));
954                double arg = sArg;
955
956                if (destSize == 4) {
957                    floatInt fi;
958                    fi.f = arg;
959                    argBits = fi.i;
960                } else {
961                    doubleInt di;
962                    di.d = arg;
963                    argBits = di.i;
964                }
965                int destHiIndex = destStart + (i + 1) * destSizeBits - 1;
966                int destLoIndex = destStart + (i + 0) * destSizeBits;
967                result = insertBits(result, destHiIndex, destLoIndex, argBits);
968            }
969            FpDestReg.uqw = result;
970        '''
971
972    class Cvtf2f(MediaOp):
973        def __init__(self, dest, src, \
974                size = None, destSize = None, srcSize = None, ext = None):
975            super(Cvtf2f, self).__init__(dest, src,\
976                    "InstRegIndex(0)", size, destSize, srcSize, ext)
977        code = '''
978            union floatInt
979            {
980                float f;
981                uint32_t i;
982            };
983            union doubleInt
984            {
985                double d;
986                uint64_t i;
987            };
988
989            assert(destSize == 4 || destSize == 8);
990            assert(srcSize == 4 || srcSize == 8);
991            int srcSizeBits = srcSize * 8;
992            int destSizeBits = destSize * 8;
993            int items;
994            int srcStart = 0;
995            int destStart = 0;
996            if (srcSize == 2 * destSize) {
997                items = (ext & 0x1) ? 1: sizeof(FloatRegBits) / srcSize;
998                if (ext & 0x2)
999                    destStart = destSizeBits * items;
1000            } else if (destSize == 2 * srcSize) {
1001                items = (ext & 0x1) ? 1: sizeof(FloatRegBits) / destSize;
1002                if (ext & 0x2)
1003                    srcStart = srcSizeBits * items;
1004            } else {
1005                items = (ext & 0x1) ? 1: sizeof(FloatRegBits) / destSize;
1006            }
1007            uint64_t result = FpDestReg.uqw;
1008
1009            for (int i = 0; i < items; i++) {
1010                int srcHiIndex = srcStart + (i + 1) * srcSizeBits - 1;
1011                int srcLoIndex = srcStart + (i + 0) * srcSizeBits;
1012                uint64_t argBits = bits(FpSrcReg1.uqw, srcHiIndex, srcLoIndex);
1013                double arg;
1014
1015                if (srcSize == 4) {
1016                    floatInt fi;
1017                    fi.i = argBits;
1018                    arg = fi.f;
1019                } else {
1020                    doubleInt di;
1021                    di.i = argBits;
1022                    arg = di.d;
1023                }
1024                if (destSize == 4) {
1025                    floatInt fi;
1026                    fi.f = arg;
1027                    argBits = fi.i;
1028                } else {
1029                    doubleInt di;
1030                    di.d = arg;
1031                    argBits = di.i;
1032                }
1033                int destHiIndex = destStart + (i + 1) * destSizeBits - 1;
1034                int destLoIndex = destStart + (i + 0) * destSizeBits;
1035                result = insertBits(result, destHiIndex, destLoIndex, argBits);
1036            }
1037            FpDestReg.uqw = result;
1038        '''
1039
1040    class Mcmpi2r(MediaOp):
1041        code = '''
1042            union floatInt
1043            {
1044                float f;
1045                uint32_t i;
1046            };
1047            union doubleInt
1048            {
1049                double d;
1050                uint64_t i;
1051            };
1052
1053            assert(srcSize == destSize);
1054            int size = srcSize;
1055            int sizeBits = size * 8;
1056            int items = (ext & 0x1) ? 1: (sizeof(FloatRegBits) / size);
1057            uint64_t result = FpDestReg.uqw;
1058
1059            for (int i = 0; i < items; i++) {
1060                int hiIndex = (i + 1) * sizeBits - 1;
1061                int loIndex = (i + 0) * sizeBits;
1062                uint64_t arg1Bits = bits(FpSrcReg1.uqw, hiIndex, loIndex);
1063                int64_t arg1 = arg1Bits |
1064                    (0 - (arg1Bits & (1 << (sizeBits - 1))));
1065                uint64_t arg2Bits = bits(FpSrcReg2.uqw, hiIndex, loIndex);
1066                int64_t arg2 = arg2Bits |
1067                    (0 - (arg2Bits & (1 << (sizeBits - 1))));
1068
1069                uint64_t resBits = 0;
1070                if ((ext & 0x2) == 0 && arg1 == arg2 ||
1071                        (ext & 0x2) == 0x2 && arg1 > arg2)
1072                    resBits = mask(sizeBits);
1073
1074                result = insertBits(result, hiIndex, loIndex, resBits);
1075            }
1076            FpDestReg.uqw = result;
1077        '''
1078}};
1079