mediaop.isa revision 6574:991d265901cc
1/// Copyright (c) 2009 The Regents of The University of Michigan
2// All rights reserved.
3//
4// Redistribution and use in source and binary forms, with or without
5// modification, are permitted provided that the following conditions are
6// met: redistributions of source code must retain the above copyright
7// notice, this list of conditions and the following disclaimer;
8// redistributions in binary form must reproduce the above copyright
9// notice, this list of conditions and the following disclaimer in the
10// documentation and/or other materials provided with the distribution;
11// neither the name of the copyright holders nor the names of its
12// contributors may be used to endorse or promote products derived from
13// this software without specific prior written permission.
14//
15// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
16// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
17// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
18// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
19// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
20// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
21// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
22// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
23// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
24// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
25// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
26//
27// Authors: Gabe Black
28
29def template MediaOpExecute {{
30        Fault %(class_name)s::execute(%(CPU_exec_context)s *xc,
31                Trace::InstRecord *traceData) const
32        {
33            Fault fault = NoFault;
34
35            %(op_decl)s;
36            %(op_rd)s;
37
38            %(code)s;
39
40            //Write the resulting state to the execution context
41            if(fault == NoFault)
42            {
43                %(op_wb)s;
44            }
45            return fault;
46        }
47}};
48
49def template MediaOpRegDeclare {{
50    class %(class_name)s : public %(base_class)s
51    {
52      protected:
53        void buildMe();
54
55      public:
56        %(class_name)s(ExtMachInst _machInst,
57                const char * instMnem,
58                bool isMicro, bool isDelayed, bool isFirst, bool isLast,
59                InstRegIndex _src1, InstRegIndex _src2, InstRegIndex _dest,
60                uint8_t _srcSize, uint8_t _destSize, uint16_t _ext);
61
62        %(class_name)s(ExtMachInst _machInst,
63                const char * instMnem,
64                InstRegIndex _src1, InstRegIndex _src2, InstRegIndex _dest,
65                uint8_t _srcSize, uint8_t _destSize, uint16_t _ext);
66
67        %(BasicExecDeclare)s
68    };
69}};
70
71def template MediaOpImmDeclare {{
72
73    class %(class_name)s : public %(base_class)s
74    {
75      protected:
76        void buildMe();
77
78      public:
79        %(class_name)s(ExtMachInst _machInst,
80                const char * instMnem,
81                bool isMicro, bool isDelayed, bool isFirst, bool isLast,
82                InstRegIndex _src1, uint16_t _imm8, InstRegIndex _dest,
83                uint8_t _srcSize, uint8_t _destSize, uint16_t _ext);
84
85        %(class_name)s(ExtMachInst _machInst,
86                const char * instMnem,
87                InstRegIndex _src1, uint16_t _imm8, InstRegIndex _dest,
88                uint8_t _srcSize, uint8_t _destSize, uint16_t _ext);
89
90        %(BasicExecDeclare)s
91    };
92}};
93
94def template MediaOpRegConstructor {{
95
96    inline void %(class_name)s::buildMe()
97    {
98        %(constructor)s;
99    }
100
101    inline %(class_name)s::%(class_name)s(
102            ExtMachInst machInst, const char * instMnem,
103            InstRegIndex _src1, InstRegIndex _src2, InstRegIndex _dest,
104            uint8_t _srcSize, uint8_t _destSize, uint16_t _ext) :
105        %(base_class)s(machInst, "%(mnemonic)s", instMnem,
106                false, false, false, false,
107                _src1, _src2, _dest, _srcSize, _destSize, _ext,
108                %(op_class)s)
109    {
110        buildMe();
111    }
112
113    inline %(class_name)s::%(class_name)s(
114            ExtMachInst machInst, const char * instMnem,
115            bool isMicro, bool isDelayed, bool isFirst, bool isLast,
116            InstRegIndex _src1, InstRegIndex _src2, InstRegIndex _dest,
117            uint8_t _srcSize, uint8_t _destSize, uint16_t _ext) :
118        %(base_class)s(machInst, "%(mnemonic)s", instMnem,
119                isMicro, isDelayed, isFirst, isLast,
120                _src1, _src2, _dest, _srcSize, _destSize, _ext,
121                %(op_class)s)
122    {
123        buildMe();
124    }
125}};
126
127def template MediaOpImmConstructor {{
128
129    inline void %(class_name)s::buildMe()
130    {
131        %(constructor)s;
132    }
133
134    inline %(class_name)s::%(class_name)s(
135            ExtMachInst machInst, const char * instMnem,
136            InstRegIndex _src1, uint16_t _imm8, InstRegIndex _dest,
137            uint8_t _srcSize, uint8_t _destSize, uint16_t _ext) :
138        %(base_class)s(machInst, "%(mnemonic)s", instMnem,
139                false, false, false, false,
140                _src1, _imm8, _dest, _srcSize, _destSize, _ext,
141                %(op_class)s)
142    {
143        buildMe();
144    }
145
146    inline %(class_name)s::%(class_name)s(
147            ExtMachInst machInst, const char * instMnem,
148            bool isMicro, bool isDelayed, bool isFirst, bool isLast,
149            InstRegIndex _src1, uint16_t _imm8, InstRegIndex _dest,
150            uint8_t _srcSize, uint8_t _destSize, uint16_t _ext) :
151        %(base_class)s(machInst, "%(mnemonic)s", instMnem,
152                isMicro, isDelayed, isFirst, isLast,
153                _src1, _imm8, _dest, _srcSize, _destSize, _ext,
154                %(op_class)s)
155    {
156        buildMe();
157    }
158}};
159
160let {{
161    # Make these empty strings so that concatenating onto
162    # them will always work.
163    header_output = ""
164    decoder_output = ""
165    exec_output = ""
166
167    immTemplates = (
168            MediaOpImmDeclare,
169            MediaOpImmConstructor,
170            MediaOpExecute)
171
172    regTemplates = (
173            MediaOpRegDeclare,
174            MediaOpRegConstructor,
175            MediaOpExecute)
176
177    class MediaOpMeta(type):
178        def buildCppClasses(self, name, Name, suffix, code):
179
180            # Globals to stick the output in
181            global header_output
182            global decoder_output
183            global exec_output
184
185            # If op2 is used anywhere, make register and immediate versions
186            # of this code.
187            matcher = re.compile("(?<!\\w)(?P<prefix>s?)op2(?P<typeQual>\\.\\w+)?")
188            match = matcher.search(code)
189            if match:
190                typeQual = ""
191                if match.group("typeQual"):
192                    typeQual = match.group("typeQual")
193                src2_name = "%spsrc2%s" % (match.group("prefix"), typeQual)
194                self.buildCppClasses(name, Name, suffix,
195                        matcher.sub(src2_name, code))
196                self.buildCppClasses(name + "i", Name, suffix + "Imm",
197                        matcher.sub("imm8", code))
198                return
199
200            base = "X86ISA::MediaOp"
201
202            # If imm8 shows up in the code, use the immediate templates, if
203            # not, hopefully the register ones will be correct.
204            matcher = re.compile("(?<!\w)imm8(?!\w)")
205            if matcher.search(code):
206                base += "Imm"
207                templates = immTemplates
208            else:
209                base += "Reg"
210                templates = regTemplates
211
212            # Get everything ready for the substitution
213            iop = InstObjParams(name, Name + suffix, base, {"code" : code})
214
215            # Generate the actual code (finally!)
216            header_output += templates[0].subst(iop)
217            decoder_output += templates[1].subst(iop)
218            exec_output += templates[2].subst(iop)
219
220
221        def __new__(mcls, Name, bases, dict):
222            abstract = False
223            name = Name.lower()
224            if "abstract" in dict:
225                abstract = dict['abstract']
226                del dict['abstract']
227
228            cls = super(MediaOpMeta, mcls).__new__(mcls, Name, bases, dict)
229            if not abstract:
230                cls.className = Name
231                cls.base_mnemonic = name
232                code = cls.code
233
234                # Set up the C++ classes
235                mcls.buildCppClasses(cls, name, Name, "", code)
236
237                # Hook into the microassembler dict
238                global microopClasses
239                microopClasses[name] = cls
240
241                # If op2 is used anywhere, make register and immediate versions
242                # of this code.
243                matcher = re.compile("op2(?P<typeQual>\\.\\w+)?")
244                if matcher.search(code):
245                    microopClasses[name + 'i'] = cls
246            return cls
247
248
249    class MediaOp(X86Microop):
250        __metaclass__ = MediaOpMeta
251        # This class itself doesn't act as a microop
252        abstract = True
253
254        def __init__(self, dest, src1, op2,
255                size = None, destSize = None, srcSize = None, ext = None):
256            self.dest = dest
257            self.src1 = src1
258            self.op2 = op2
259            if size is not None:
260                self.srcSize = size
261                self.destSize = size
262            if srcSize is not None:
263                self.srcSize = srcSize
264            if destSize is not None:
265                self.destSize = destSize
266            if self.srcSize is None:
267                raise Exception, "Source size not set."
268            if self.destSize is None:
269                raise Exception, "Dest size not set."
270            if ext is None:
271                self.ext = 0
272            else:
273                self.ext = ext 
274
275        def getAllocator(self, *microFlags):
276            className = self.className
277            if self.mnemonic == self.base_mnemonic + 'i':
278                className += "Imm"
279            allocator = '''new %(class_name)s(machInst, macrocodeBlock
280                    %(flags)s, %(src1)s, %(op2)s, %(dest)s,
281                    %(srcSize)s, %(destSize)s, %(ext)s)''' % {
282                "class_name" : className,
283                "flags" : self.microFlagsText(microFlags),
284                "src1" : self.src1, "op2" : self.op2,
285                "dest" : self.dest,
286                "srcSize" : self.srcSize,
287                "destSize" : self.destSize,
288                "ext" : self.ext}
289            return allocator
290
291    class Mov2int(MediaOp):
292        def __init__(self, dest, src, \
293                size = None, destSize = None, srcSize = None, ext = None):
294            super(Mov2int, self).__init__(dest, src,\
295                    "InstRegIndex(0)", size, destSize, srcSize, ext)
296        code = '''
297            uint64_t fpSrcReg1 = bits(FpSrcReg1.uqw, srcSize * 8 - 1, 0);
298            DestReg = merge(DestReg, fpSrcReg1, destSize);
299        '''
300
301    class Mov2fp(MediaOp):
302        def __init__(self, dest, src, \
303                size = None, destSize = None, srcSize = None, ext = None):
304            super(Mov2fp, self).__init__(dest, src,\
305                    "InstRegIndex(0)", size, destSize, srcSize, ext)
306        code = '''
307            uint64_t srcReg1 = pick(SrcReg1, 0, srcSize);
308            FpDestReg.uqw =
309                insertBits(FpDestReg.uqw, destSize * 8 - 1, 0, srcReg1);
310        '''
311
312    class Unpack(MediaOp):
313        code = '''
314            assert(srcSize == destSize);
315            int size = destSize;
316            int items = (sizeof(FloatRegBits) / size) / 2;
317            int offset = ext ? items : 0;
318            uint64_t result = 0;
319            for (int i = 0; i < items; i++) {
320                uint64_t pickedLow =
321                    bits(FpSrcReg1.uqw, (i + offset + 1) * 8 * size - 1,
322                                        (i + offset) * 8 * size);
323                result = insertBits(result,
324                                    (2 * i + 1) * 8 * size - 1,
325                                    (2 * i + 0) * 8 * size,
326                                    pickedLow);
327                uint64_t pickedHigh =
328                    bits(FpSrcReg2.uqw, (i + offset + 1) * 8 * size - 1,
329                                        (i + offset) * 8 * size);
330                result = insertBits(result,
331                                    (2 * i + 2) * 8 * size - 1,
332                                    (2 * i + 1) * 8 * size,
333                                    pickedHigh);
334            }
335            FpDestReg.uqw = result;
336        '''
337
338    class Pack(MediaOp):
339        code = '''
340            assert(srcSize == destSize * 2);
341            int items = (sizeof(FloatRegBits) / destSize);
342            int destBits = destSize * 8;
343            int srcBits = srcSize * 8;
344            uint64_t result = 0;
345            int i;
346            for (i = 0; i < items / 2; i++) {
347                uint64_t picked =
348                    bits(FpSrcReg1.uqw, (i + 1) * srcBits - 1,
349                                        (i + 0) * srcBits);
350                unsigned signBit = bits(picked, srcBits - 1);
351                uint64_t overflow = bits(picked, srcBits - 1, destBits - 1);
352
353                // Handle saturation.
354                if (signBit) {
355                    if (overflow != mask(destBits - srcBits + 1)) {
356                        if (ext & 0x1)
357                            picked = (1 << (destBits - 1));
358                        else
359                            picked = 0;
360                    }
361                } else {
362                    if (overflow != 0) {
363                        if (ext & 0x1)
364                            picked = mask(destBits - 1);
365                        else
366                            picked = mask(destBits);
367                    }
368                }
369                result = insertBits(result,
370                                    (i + 1) * destBits - 1,
371                                    (i + 0) * destBits,
372                                    picked);
373            }
374            for (;i < items; i++) {
375                uint64_t picked =
376                    bits(FpSrcReg2.uqw, (i - items + 1) * srcBits - 1,
377                                        (i - items + 0) * srcBits);
378                unsigned signBit = bits(picked, srcBits - 1);
379                uint64_t overflow = bits(picked, srcBits - 1, destBits - 1);
380
381                // Handle saturation.
382                if (signBit) {
383                    if (overflow != mask(destBits - srcBits + 1)) {
384                        if (ext & 0x1)
385                            picked = (1 << (destBits - 1));
386                        else
387                            picked = 0;
388                    }
389                } else {
390                    if (overflow != 0) {
391                        if (ext & 0x1)
392                            picked = mask(destBits - 1);
393                        else
394                            picked = mask(destBits);
395                    }
396                }
397                result = insertBits(result,
398                                    (i + 1) * destBits - 1,
399                                    (i + 0) * destBits,
400                                    picked);
401            }
402            FpDestReg.uqw = result;
403        '''
404
405    class Mxor(MediaOp):
406        def __init__(self, dest, src1, src2):
407            super(Mxor, self).__init__(dest, src1, src2, 1)
408        code = '''
409            FpDestReg.uqw = FpSrcReg1.uqw ^ FpSrcReg2.uqw;
410        '''
411
412    class Mor(MediaOp):
413        def __init__(self, dest, src1, src2):
414            super(Mor, self).__init__(dest, src1, src2, 1)
415        code = '''
416            FpDestReg.uqw = FpSrcReg1.uqw | FpSrcReg2.uqw;
417        '''
418
419    class Mand(MediaOp):
420        def __init__(self, dest, src1, src2):
421            super(Mand, self).__init__(dest, src1, src2, 1)
422        code = '''
423            FpDestReg.uqw = FpSrcReg1.uqw & FpSrcReg2.uqw;
424        '''
425
426    class Mandn(MediaOp):
427        def __init__(self, dest, src1, src2):
428            super(Mandn, self).__init__(dest, src1, src2, 1)
429        code = '''
430            FpDestReg.uqw = ~FpSrcReg1.uqw & FpSrcReg2.uqw;
431        '''
432
433    class Mminf(MediaOp):
434        code = '''
435            union floatInt
436            {
437                float f;
438                uint32_t i;
439            };
440            union doubleInt
441            {
442                double d;
443                uint64_t i;
444            };
445
446            assert(srcSize == destSize);
447            int size = srcSize;
448            int sizeBits = size * 8;
449            assert(srcSize == 4 || srcSize == 8);
450            int items = (ext & 0x1) ? 1: (sizeof(FloatRegBits) / size);
451            uint64_t result = FpDestReg.uqw;
452
453            for (int i = 0; i < items; i++) {
454                double arg1, arg2;
455                int hiIndex = (i + 1) * sizeBits - 1;
456                int loIndex = (i + 0) * sizeBits;
457                uint64_t arg1Bits = bits(FpSrcReg1.uqw, hiIndex, loIndex);
458                uint64_t arg2Bits = bits(FpSrcReg2.uqw, hiIndex, loIndex);
459
460                if (size == 4) {
461                    floatInt fi;
462                    fi.i = arg1Bits;
463                    arg1 = fi.f;
464                    fi.i = arg2Bits;
465                    arg2 = fi.f;
466                } else {
467                    doubleInt di;
468                    di.i = arg1Bits;
469                    arg1 = di.d;
470                    di.i = arg2Bits;
471                    arg2 = di.d;
472                }
473
474                if (arg1 < arg2) {
475                    result = insertBits(result, hiIndex, loIndex, arg1Bits);
476                } else {
477                    result = insertBits(result, hiIndex, loIndex, arg2Bits);
478                }
479            }
480            FpDestReg.uqw = result;
481        '''
482
483    class Mmaxf(MediaOp):
484        code = '''
485            union floatInt
486            {
487                float f;
488                uint32_t i;
489            };
490            union doubleInt
491            {
492                double d;
493                uint64_t i;
494            };
495
496            assert(srcSize == destSize);
497            int size = srcSize;
498            int sizeBits = size * 8;
499            assert(srcSize == 4 || srcSize == 8);
500            int items = (ext & 0x1) ? 1: (sizeof(FloatRegBits) / size);
501            uint64_t result = FpDestReg.uqw;
502
503            for (int i = 0; i < items; i++) {
504                double arg1, arg2;
505                int hiIndex = (i + 1) * sizeBits - 1;
506                int loIndex = (i + 0) * sizeBits;
507                uint64_t arg1Bits = bits(FpSrcReg1.uqw, hiIndex, loIndex);
508                uint64_t arg2Bits = bits(FpSrcReg2.uqw, hiIndex, loIndex);
509
510                if (size == 4) {
511                    floatInt fi;
512                    fi.i = arg1Bits;
513                    arg1 = fi.f;
514                    fi.i = arg2Bits;
515                    arg2 = fi.f;
516                } else {
517                    doubleInt di;
518                    di.i = arg1Bits;
519                    arg1 = di.d;
520                    di.i = arg2Bits;
521                    arg2 = di.d;
522                }
523
524                if (arg1 > arg2) {
525                    result = insertBits(result, hiIndex, loIndex, arg1Bits);
526                } else {
527                    result = insertBits(result, hiIndex, loIndex, arg2Bits);
528                }
529            }
530            FpDestReg.uqw = result;
531        '''
532
533    class Mmini(MediaOp):
534        code = '''
535
536            assert(srcSize == destSize);
537            int size = srcSize;
538            int sizeBits = size * 8;
539            int items = (ext & 0x1) ? 1: (sizeof(FloatRegBits) / size);
540            uint64_t result = FpDestReg.uqw;
541
542            for (int i = 0; i < items; i++) {
543                int hiIndex = (i + 1) * sizeBits - 1;
544                int loIndex = (i + 0) * sizeBits;
545                uint64_t arg1Bits = bits(FpSrcReg1.uqw, hiIndex, loIndex);
546                int64_t arg1 = arg1Bits |
547                    (0 - (arg1Bits & (1 << (sizeBits - 1))));
548                uint64_t arg2Bits = bits(FpSrcReg2.uqw, hiIndex, loIndex);
549                int64_t arg2 = arg2Bits |
550                    (0 - (arg2Bits & (1 << (sizeBits - 1))));
551                uint64_t resBits;
552
553                if (ext & 0x2) {
554                    if (arg1 < arg2) {
555                        resBits = arg1Bits;
556                    } else {
557                        resBits = arg2Bits;
558                    }
559                } else {
560                    if (arg1Bits < arg2Bits) {
561                        resBits = arg1Bits;
562                    } else {
563                        resBits = arg2Bits;
564                    }
565                }
566                result = insertBits(result, hiIndex, loIndex, resBits);
567            }
568            FpDestReg.uqw = result;
569        '''
570
571    class Mmaxi(MediaOp):
572        code = '''
573
574            assert(srcSize == destSize);
575            int size = srcSize;
576            int sizeBits = size * 8;
577            int items = (ext & 0x1) ? 1: (sizeof(FloatRegBits) / size);
578            uint64_t result = FpDestReg.uqw;
579
580            for (int i = 0; i < items; i++) {
581                int hiIndex = (i + 1) * sizeBits - 1;
582                int loIndex = (i + 0) * sizeBits;
583                uint64_t arg1Bits = bits(FpSrcReg1.uqw, hiIndex, loIndex);
584                int64_t arg1 = arg1Bits |
585                    (0 - (arg1Bits & (1 << (sizeBits - 1))));
586                uint64_t arg2Bits = bits(FpSrcReg2.uqw, hiIndex, loIndex);
587                int64_t arg2 = arg2Bits |
588                    (0 - (arg2Bits & (1 << (sizeBits - 1))));
589                uint64_t resBits;
590
591                if (ext & 0x2) {
592                    if (arg1 > arg2) {
593                        resBits = arg1Bits;
594                    } else {
595                        resBits = arg2Bits;
596                    }
597                } else {
598                    if (arg1Bits > arg2Bits) {
599                        resBits = arg1Bits;
600                    } else {
601                        resBits = arg2Bits;
602                    }
603                }
604                result = insertBits(result, hiIndex, loIndex, resBits);
605            }
606            FpDestReg.uqw = result;
607        '''
608
609    class Msqrt(MediaOp):
610        def __init__(self, dest, src, \
611                size = None, destSize = None, srcSize = None, ext = None):
612            super(Msqrt, self).__init__(dest, src,\
613                    "InstRegIndex(0)", size, destSize, srcSize, ext)
614        code = '''
615            union floatInt
616            {
617                float f;
618                uint32_t i;
619            };
620            union doubleInt
621            {
622                double d;
623                uint64_t i;
624            };
625
626            assert(srcSize == destSize);
627            int size = srcSize;
628            int sizeBits = size * 8;
629            assert(srcSize == 4 || srcSize == 8);
630            int items = (ext & 0x1) ? 1: (sizeof(FloatRegBits) / size);
631            uint64_t result = FpDestReg.uqw;
632
633            for (int i = 0; i < items; i++) {
634                int hiIndex = (i + 1) * sizeBits - 1;
635                int loIndex = (i + 0) * sizeBits;
636                uint64_t argBits = bits(FpSrcReg1.uqw, hiIndex, loIndex);
637
638                if (size == 4) {
639                    floatInt fi;
640                    fi.i = argBits;
641                    fi.f = sqrt(fi.f);
642                    argBits = fi.i;
643                } else {
644                    doubleInt di;
645                    di.i = argBits;
646                    di.d = sqrt(di.d);
647                    argBits = di.i;
648                }
649                result = insertBits(result, hiIndex, loIndex, argBits);
650            }
651            FpDestReg.uqw = result;
652        '''
653
654    class Maddf(MediaOp):
655        code = '''
656            union floatInt
657            {
658                float f;
659                uint32_t i;
660            };
661            union doubleInt
662            {
663                double d;
664                uint64_t i;
665            };
666
667            assert(srcSize == destSize);
668            int size = srcSize;
669            int sizeBits = size * 8;
670            assert(srcSize == 4 || srcSize == 8);
671            int items = (ext & 0x1) ? 1: (sizeof(FloatRegBits) / size);
672            uint64_t result = FpDestReg.uqw;
673
674            for (int i = 0; i < items; i++) {
675                int hiIndex = (i + 1) * sizeBits - 1;
676                int loIndex = (i + 0) * sizeBits;
677                uint64_t arg1Bits = bits(FpSrcReg1.uqw, hiIndex, loIndex);
678                uint64_t arg2Bits = bits(FpSrcReg2.uqw, hiIndex, loIndex);
679                uint64_t resBits;
680
681                if (size == 4) {
682                    floatInt arg1, arg2, res;
683                    arg1.i = arg1Bits;
684                    arg2.i = arg2Bits;
685                    res.f = arg1.f + arg2.f;
686                    resBits = res.i;
687                } else {
688                    doubleInt arg1, arg2, res;
689                    arg1.i = arg1Bits;
690                    arg2.i = arg2Bits;
691                    res.d = arg1.d + arg2.d;
692                    resBits = res.i;
693                }
694
695                result = insertBits(result, hiIndex, loIndex, resBits);
696            }
697            FpDestReg.uqw = result;
698        '''
699
700    class Msubf(MediaOp):
701        code = '''
702            union floatInt
703            {
704                float f;
705                uint32_t i;
706            };
707            union doubleInt
708            {
709                double d;
710                uint64_t i;
711            };
712
713            assert(srcSize == destSize);
714            int size = srcSize;
715            int sizeBits = size * 8;
716            assert(srcSize == 4 || srcSize == 8);
717            int items = (ext & 0x1) ? 1: (sizeof(FloatRegBits) / size);
718            uint64_t result = FpDestReg.uqw;
719
720            for (int i = 0; i < items; i++) {
721                int hiIndex = (i + 1) * sizeBits - 1;
722                int loIndex = (i + 0) * sizeBits;
723                uint64_t arg1Bits = bits(FpSrcReg1.uqw, hiIndex, loIndex);
724                uint64_t arg2Bits = bits(FpSrcReg2.uqw, hiIndex, loIndex);
725                uint64_t resBits;
726
727                if (size == 4) {
728                    floatInt arg1, arg2, res;
729                    arg1.i = arg1Bits;
730                    arg2.i = arg2Bits;
731                    res.f = arg1.f - arg2.f;
732                    resBits = res.i;
733                } else {
734                    doubleInt arg1, arg2, res;
735                    arg1.i = arg1Bits;
736                    arg2.i = arg2Bits;
737                    res.d = arg1.d - arg2.d;
738                    resBits = res.i;
739                }
740
741                result = insertBits(result, hiIndex, loIndex, resBits);
742            }
743            FpDestReg.uqw = result;
744        '''
745
746    class Mmulf(MediaOp):
747        code = '''
748            union floatInt
749            {
750                float f;
751                uint32_t i;
752            };
753            union doubleInt
754            {
755                double d;
756                uint64_t i;
757            };
758
759            assert(srcSize == destSize);
760            int size = srcSize;
761            int sizeBits = size * 8;
762            assert(srcSize == 4 || srcSize == 8);
763            int items = (ext & 0x1) ? 1: (sizeof(FloatRegBits) / size);
764            uint64_t result = FpDestReg.uqw;
765
766            for (int i = 0; i < items; i++) {
767                int hiIndex = (i + 1) * sizeBits - 1;
768                int loIndex = (i + 0) * sizeBits;
769                uint64_t arg1Bits = bits(FpSrcReg1.uqw, hiIndex, loIndex);
770                uint64_t arg2Bits = bits(FpSrcReg2.uqw, hiIndex, loIndex);
771                uint64_t resBits;
772
773                if (size == 4) {
774                    floatInt arg1, arg2, res;
775                    arg1.i = arg1Bits;
776                    arg2.i = arg2Bits;
777                    res.f = arg1.f * arg2.f;
778                    resBits = res.i;
779                } else {
780                    doubleInt arg1, arg2, res;
781                    arg1.i = arg1Bits;
782                    arg2.i = arg2Bits;
783                    res.d = arg1.d * arg2.d;
784                    resBits = res.i;
785                }
786
787                result = insertBits(result, hiIndex, loIndex, resBits);
788            }
789            FpDestReg.uqw = result;
790        '''
791
792    class Mdivf(MediaOp):
793        code = '''
794            union floatInt
795            {
796                float f;
797                uint32_t i;
798            };
799            union doubleInt
800            {
801                double d;
802                uint64_t i;
803            };
804
805            assert(srcSize == destSize);
806            int size = srcSize;
807            int sizeBits = size * 8;
808            assert(srcSize == 4 || srcSize == 8);
809            int items = (ext & 0x1) ? 1: (sizeof(FloatRegBits) / size);
810            uint64_t result = FpDestReg.uqw;
811
812            for (int i = 0; i < items; i++) {
813                int hiIndex = (i + 1) * sizeBits - 1;
814                int loIndex = (i + 0) * sizeBits;
815                uint64_t arg1Bits = bits(FpSrcReg1.uqw, hiIndex, loIndex);
816                uint64_t arg2Bits = bits(FpSrcReg2.uqw, hiIndex, loIndex);
817                uint64_t resBits;
818
819                if (size == 4) {
820                    floatInt arg1, arg2, res;
821                    arg1.i = arg1Bits;
822                    arg2.i = arg2Bits;
823                    res.f = arg1.f / arg2.f;
824                    resBits = res.i;
825                } else {
826                    doubleInt arg1, arg2, res;
827                    arg1.i = arg1Bits;
828                    arg2.i = arg2Bits;
829                    res.d = arg1.d / arg2.d;
830                    resBits = res.i;
831                }
832
833                result = insertBits(result, hiIndex, loIndex, resBits);
834            }
835            FpDestReg.uqw = result;
836        '''
837
838    class Maddi(MediaOp):
839        code = '''
840            assert(srcSize == destSize);
841            int size = srcSize;
842            int sizeBits = size * 8;
843            int items = (ext & 0x1) ? 1: (sizeof(FloatRegBits) / size);
844            uint64_t result = FpDestReg.uqw;
845
846            for (int i = 0; i < items; i++) {
847                int hiIndex = (i + 1) * sizeBits - 1;
848                int loIndex = (i + 0) * sizeBits;
849                uint64_t arg1Bits = bits(FpSrcReg1.uqw, hiIndex, loIndex);
850                uint64_t arg2Bits = bits(FpSrcReg2.uqw, hiIndex, loIndex);
851                uint64_t resBits = arg1Bits + arg2Bits;
852                
853                if (ext & 0x2) {
854                    if (findCarry(sizeBits, resBits, arg1Bits, arg2Bits))
855                        resBits = mask(sizeBits);
856                } else if (ext & 0x4) {
857                    int arg1Sign = bits(arg1Bits, sizeBits - 1);
858                    int arg2Sign = bits(arg2Bits, sizeBits - 1);
859                    int resSign = bits(resBits, sizeBits - 1);
860                    if ((arg1Sign == arg2Sign) && (arg1Sign != resSign)) {
861                        if (resSign == 0)
862                            resBits = (1 << (sizeBits - 1));
863                        else
864                            resBits = mask(sizeBits - 1);
865                    }
866                }
867
868                result = insertBits(result, hiIndex, loIndex, resBits);
869            }
870            FpDestReg.uqw = result;
871        '''
872
873    class Cvti2f(MediaOp):
874        def __init__(self, dest, src, \
875                size = None, destSize = None, srcSize = None, ext = None):
876            super(Cvti2f, self).__init__(dest, src,\
877                    "InstRegIndex(0)", size, destSize, srcSize, ext)
878        code = '''
879            union floatInt
880            {
881                float f;
882                uint32_t i;
883            };
884            union doubleInt
885            {
886                double d;
887                uint64_t i;
888            };
889
890            assert(destSize == 4 || destSize == 8);
891            assert(srcSize == 4 || srcSize == 8);
892            int srcSizeBits = srcSize * 8;
893            int destSizeBits = destSize * 8;
894            int items;
895            int srcStart = 0;
896            int destStart = 0;
897            if (srcSize == 2 * destSize) {
898                items = (ext & 0x1) ? 1: sizeof(FloatRegBits) / srcSize;
899                if (ext & 0x2)
900                    destStart = destSizeBits * items;
901            } else if (destSize == 2 * srcSize) {
902                items = (ext & 0x1) ? 1: sizeof(FloatRegBits) / destSize;
903                if (ext & 0x2)
904                    srcStart = srcSizeBits * items;
905            } else {
906                items = (ext & 0x1) ? 1: sizeof(FloatRegBits) / destSize;
907            }
908            uint64_t result = FpDestReg.uqw;
909
910            for (int i = 0; i < items; i++) {
911                int srcHiIndex = srcStart + (i + 1) * srcSizeBits - 1;
912                int srcLoIndex = srcStart + (i + 0) * srcSizeBits;
913                uint64_t argBits = bits(FpSrcReg1.uqw, srcHiIndex, srcLoIndex);
914                int64_t sArg = argBits | (0 - (argBits & (1 << srcHiIndex)));
915                double arg = sArg;
916
917                if (destSize == 4) {
918                    floatInt fi;
919                    fi.f = arg;
920                    argBits = fi.i;
921                } else {
922                    doubleInt di;
923                    di.d = arg;
924                    argBits = di.i;
925                }
926                int destHiIndex = destStart + (i + 1) * destSizeBits - 1;
927                int destLoIndex = destStart + (i + 0) * destSizeBits;
928                result = insertBits(result, destHiIndex, destLoIndex, argBits);
929            }
930            FpDestReg.uqw = result;
931        '''
932
933    class Cvtf2f(MediaOp):
934        def __init__(self, dest, src, \
935                size = None, destSize = None, srcSize = None, ext = None):
936            super(Cvtf2f, self).__init__(dest, src,\
937                    "InstRegIndex(0)", size, destSize, srcSize, ext)
938        code = '''
939            union floatInt
940            {
941                float f;
942                uint32_t i;
943            };
944            union doubleInt
945            {
946                double d;
947                uint64_t i;
948            };
949
950            assert(destSize == 4 || destSize == 8);
951            assert(srcSize == 4 || srcSize == 8);
952            int srcSizeBits = srcSize * 8;
953            int destSizeBits = destSize * 8;
954            int items;
955            int srcStart = 0;
956            int destStart = 0;
957            if (srcSize == 2 * destSize) {
958                items = (ext & 0x1) ? 1: sizeof(FloatRegBits) / srcSize;
959                if (ext & 0x2)
960                    destStart = destSizeBits * items;
961            } else if (destSize == 2 * srcSize) {
962                items = (ext & 0x1) ? 1: sizeof(FloatRegBits) / destSize;
963                if (ext & 0x2)
964                    srcStart = srcSizeBits * items;
965            } else {
966                items = (ext & 0x1) ? 1: sizeof(FloatRegBits) / destSize;
967            }
968            uint64_t result = FpDestReg.uqw;
969
970            for (int i = 0; i < items; i++) {
971                int srcHiIndex = srcStart + (i + 1) * srcSizeBits - 1;
972                int srcLoIndex = srcStart + (i + 0) * srcSizeBits;
973                uint64_t argBits = bits(FpSrcReg1.uqw, srcHiIndex, srcLoIndex);
974                double arg;
975
976                if (srcSize == 4) {
977                    floatInt fi;
978                    fi.i = argBits;
979                    arg = fi.f;
980                } else {
981                    doubleInt di;
982                    di.i = argBits;
983                    arg = di.d;
984                }
985                if (destSize == 4) {
986                    floatInt fi;
987                    fi.f = arg;
988                    argBits = fi.i;
989                } else {
990                    doubleInt di;
991                    di.d = arg;
992                    argBits = di.i;
993                }
994                int destHiIndex = destStart + (i + 1) * destSizeBits - 1;
995                int destLoIndex = destStart + (i + 0) * destSizeBits;
996                result = insertBits(result, destHiIndex, destLoIndex, argBits);
997            }
998            FpDestReg.uqw = result;
999        '''
1000
1001    class Mcmpi2r(MediaOp):
1002        code = '''
1003            union floatInt
1004            {
1005                float f;
1006                uint32_t i;
1007            };
1008            union doubleInt
1009            {
1010                double d;
1011                uint64_t i;
1012            };
1013
1014            assert(srcSize == destSize);
1015            int size = srcSize;
1016            int sizeBits = size * 8;
1017            int items = (ext & 0x1) ? 1: (sizeof(FloatRegBits) / size);
1018            uint64_t result = FpDestReg.uqw;
1019
1020            for (int i = 0; i < items; i++) {
1021                int hiIndex = (i + 1) * sizeBits - 1;
1022                int loIndex = (i + 0) * sizeBits;
1023                uint64_t arg1Bits = bits(FpSrcReg1.uqw, hiIndex, loIndex);
1024                int64_t arg1 = arg1Bits |
1025                    (0 - (arg1Bits & (1 << (sizeBits - 1))));
1026                uint64_t arg2Bits = bits(FpSrcReg2.uqw, hiIndex, loIndex);
1027                int64_t arg2 = arg2Bits |
1028                    (0 - (arg2Bits & (1 << (sizeBits - 1))));
1029
1030                uint64_t resBits = 0;
1031                if ((ext & 0x2) == 0 && arg1 == arg2 ||
1032                        (ext & 0x2) == 0x2 && arg1 > arg2)
1033                    resBits = mask(sizeBits);
1034
1035                result = insertBits(result, hiIndex, loIndex, resBits);
1036            }
1037            FpDestReg.uqw = result;
1038        '''
1039}};
1040