mediaop.isa revision 6572:b0cef5e2dfdb
1/// Copyright (c) 2009 The Regents of The University of Michigan
2// All rights reserved.
3//
4// Redistribution and use in source and binary forms, with or without
5// modification, are permitted provided that the following conditions are
6// met: redistributions of source code must retain the above copyright
7// notice, this list of conditions and the following disclaimer;
8// redistributions in binary form must reproduce the above copyright
9// notice, this list of conditions and the following disclaimer in the
10// documentation and/or other materials provided with the distribution;
11// neither the name of the copyright holders nor the names of its
12// contributors may be used to endorse or promote products derived from
13// this software without specific prior written permission.
14//
15// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
16// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
17// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
18// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
19// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
20// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
21// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
22// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
23// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
24// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
25// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
26//
27// Authors: Gabe Black
28
29def template MediaOpExecute {{
30        Fault %(class_name)s::execute(%(CPU_exec_context)s *xc,
31                Trace::InstRecord *traceData) const
32        {
33            Fault fault = NoFault;
34
35            %(op_decl)s;
36            %(op_rd)s;
37
38            %(code)s;
39
40            //Write the resulting state to the execution context
41            if(fault == NoFault)
42            {
43                %(op_wb)s;
44            }
45            return fault;
46        }
47}};
48
49def template MediaOpRegDeclare {{
50    class %(class_name)s : public %(base_class)s
51    {
52      protected:
53        void buildMe();
54
55      public:
56        %(class_name)s(ExtMachInst _machInst,
57                const char * instMnem,
58                bool isMicro, bool isDelayed, bool isFirst, bool isLast,
59                InstRegIndex _src1, InstRegIndex _src2, InstRegIndex _dest,
60                uint8_t _srcSize, uint8_t _destSize, uint16_t _ext);
61
62        %(class_name)s(ExtMachInst _machInst,
63                const char * instMnem,
64                InstRegIndex _src1, InstRegIndex _src2, InstRegIndex _dest,
65                uint8_t _srcSize, uint8_t _destSize, uint16_t _ext);
66
67        %(BasicExecDeclare)s
68    };
69}};
70
71def template MediaOpImmDeclare {{
72
73    class %(class_name)s : public %(base_class)s
74    {
75      protected:
76        void buildMe();
77
78      public:
79        %(class_name)s(ExtMachInst _machInst,
80                const char * instMnem,
81                bool isMicro, bool isDelayed, bool isFirst, bool isLast,
82                InstRegIndex _src1, uint16_t _imm8, InstRegIndex _dest,
83                uint8_t _srcSize, uint8_t _destSize, uint16_t _ext);
84
85        %(class_name)s(ExtMachInst _machInst,
86                const char * instMnem,
87                InstRegIndex _src1, uint16_t _imm8, InstRegIndex _dest,
88                uint8_t _srcSize, uint8_t _destSize, uint16_t _ext);
89
90        %(BasicExecDeclare)s
91    };
92}};
93
94def template MediaOpRegConstructor {{
95
96    inline void %(class_name)s::buildMe()
97    {
98        %(constructor)s;
99    }
100
101    inline %(class_name)s::%(class_name)s(
102            ExtMachInst machInst, const char * instMnem,
103            InstRegIndex _src1, InstRegIndex _src2, InstRegIndex _dest,
104            uint8_t _srcSize, uint8_t _destSize, uint16_t _ext) :
105        %(base_class)s(machInst, "%(mnemonic)s", instMnem,
106                false, false, false, false,
107                _src1, _src2, _dest, _srcSize, _destSize, _ext,
108                %(op_class)s)
109    {
110        buildMe();
111    }
112
113    inline %(class_name)s::%(class_name)s(
114            ExtMachInst machInst, const char * instMnem,
115            bool isMicro, bool isDelayed, bool isFirst, bool isLast,
116            InstRegIndex _src1, InstRegIndex _src2, InstRegIndex _dest,
117            uint8_t _srcSize, uint8_t _destSize, uint16_t _ext) :
118        %(base_class)s(machInst, "%(mnemonic)s", instMnem,
119                isMicro, isDelayed, isFirst, isLast,
120                _src1, _src2, _dest, _srcSize, _destSize, _ext,
121                %(op_class)s)
122    {
123        buildMe();
124    }
125}};
126
127def template MediaOpImmConstructor {{
128
129    inline void %(class_name)s::buildMe()
130    {
131        %(constructor)s;
132    }
133
134    inline %(class_name)s::%(class_name)s(
135            ExtMachInst machInst, const char * instMnem,
136            InstRegIndex _src1, uint16_t _imm8, InstRegIndex _dest,
137            uint8_t _srcSize, uint8_t _destSize, uint16_t _ext) :
138        %(base_class)s(machInst, "%(mnemonic)s", instMnem,
139                false, false, false, false,
140                _src1, _imm8, _dest, _srcSize, _destSize, _ext,
141                %(op_class)s)
142    {
143        buildMe();
144    }
145
146    inline %(class_name)s::%(class_name)s(
147            ExtMachInst machInst, const char * instMnem,
148            bool isMicro, bool isDelayed, bool isFirst, bool isLast,
149            InstRegIndex _src1, uint16_t _imm8, InstRegIndex _dest,
150            uint8_t _srcSize, uint8_t _destSize, uint16_t _ext) :
151        %(base_class)s(machInst, "%(mnemonic)s", instMnem,
152                isMicro, isDelayed, isFirst, isLast,
153                _src1, _imm8, _dest, _srcSize, _destSize, _ext,
154                %(op_class)s)
155    {
156        buildMe();
157    }
158}};
159
160let {{
161    # Make these empty strings so that concatenating onto
162    # them will always work.
163    header_output = ""
164    decoder_output = ""
165    exec_output = ""
166
167    immTemplates = (
168            MediaOpImmDeclare,
169            MediaOpImmConstructor,
170            MediaOpExecute)
171
172    regTemplates = (
173            MediaOpRegDeclare,
174            MediaOpRegConstructor,
175            MediaOpExecute)
176
177    class MediaOpMeta(type):
178        def buildCppClasses(self, name, Name, suffix, code):
179
180            # Globals to stick the output in
181            global header_output
182            global decoder_output
183            global exec_output
184
185            # If op2 is used anywhere, make register and immediate versions
186            # of this code.
187            matcher = re.compile("(?<!\\w)(?P<prefix>s?)op2(?P<typeQual>\\.\\w+)?")
188            match = matcher.search(code)
189            if match:
190                typeQual = ""
191                if match.group("typeQual"):
192                    typeQual = match.group("typeQual")
193                src2_name = "%spsrc2%s" % (match.group("prefix"), typeQual)
194                self.buildCppClasses(name, Name, suffix,
195                        matcher.sub(src2_name, code))
196                self.buildCppClasses(name + "i", Name, suffix + "Imm",
197                        matcher.sub("imm8", code))
198                return
199
200            base = "X86ISA::MediaOp"
201
202            # If imm8 shows up in the code, use the immediate templates, if
203            # not, hopefully the register ones will be correct.
204            matcher = re.compile("(?<!\w)imm8(?!\w)")
205            if matcher.search(code):
206                base += "Imm"
207                templates = immTemplates
208            else:
209                base += "Reg"
210                templates = regTemplates
211
212            # Get everything ready for the substitution
213            iop = InstObjParams(name, Name + suffix, base, {"code" : code})
214
215            # Generate the actual code (finally!)
216            header_output += templates[0].subst(iop)
217            decoder_output += templates[1].subst(iop)
218            exec_output += templates[2].subst(iop)
219
220
221        def __new__(mcls, Name, bases, dict):
222            abstract = False
223            name = Name.lower()
224            if "abstract" in dict:
225                abstract = dict['abstract']
226                del dict['abstract']
227
228            cls = super(MediaOpMeta, mcls).__new__(mcls, Name, bases, dict)
229            if not abstract:
230                cls.className = Name
231                cls.base_mnemonic = name
232                code = cls.code
233
234                # Set up the C++ classes
235                mcls.buildCppClasses(cls, name, Name, "", code)
236
237                # Hook into the microassembler dict
238                global microopClasses
239                microopClasses[name] = cls
240
241                # If op2 is used anywhere, make register and immediate versions
242                # of this code.
243                matcher = re.compile("op2(?P<typeQual>\\.\\w+)?")
244                if matcher.search(code):
245                    microopClasses[name + 'i'] = cls
246            return cls
247
248
249    class MediaOp(X86Microop):
250        __metaclass__ = MediaOpMeta
251        # This class itself doesn't act as a microop
252        abstract = True
253
254        def __init__(self, dest, src1, op2,
255                size = None, destSize = None, srcSize = None, ext = None):
256            self.dest = dest
257            self.src1 = src1
258            self.op2 = op2
259            if size is not None:
260                self.srcSize = size
261                self.destSize = size
262            if srcSize is not None:
263                self.srcSize = srcSize
264            if destSize is not None:
265                self.destSize = destSize
266            if self.srcSize is None:
267                raise Exception, "Source size not set."
268            if self.destSize is None:
269                raise Exception, "Dest size not set."
270            if ext is None:
271                self.ext = 0
272            else:
273                self.ext = ext 
274
275        def getAllocator(self, *microFlags):
276            className = self.className
277            if self.mnemonic == self.base_mnemonic + 'i':
278                className += "Imm"
279            allocator = '''new %(class_name)s(machInst, macrocodeBlock
280                    %(flags)s, %(src1)s, %(op2)s, %(dest)s,
281                    %(srcSize)s, %(destSize)s, %(ext)s)''' % {
282                "class_name" : className,
283                "flags" : self.microFlagsText(microFlags),
284                "src1" : self.src1, "op2" : self.op2,
285                "dest" : self.dest,
286                "srcSize" : self.srcSize,
287                "destSize" : self.destSize,
288                "ext" : self.ext}
289            return allocator
290
291    class Mov2int(MediaOp):
292        def __init__(self, dest, src, \
293                size = None, destSize = None, srcSize = None, ext = None):
294            super(Mov2int, self).__init__(dest, src,\
295                    "InstRegIndex(0)", size, destSize, srcSize, ext)
296        code = '''
297            uint64_t fpSrcReg1 = bits(FpSrcReg1.uqw, srcSize * 8 - 1, 0);
298            DestReg = merge(DestReg, fpSrcReg1, destSize);
299        '''
300
301    class Mov2fp(MediaOp):
302        def __init__(self, dest, src, \
303                size = None, destSize = None, srcSize = None, ext = None):
304            super(Mov2fp, self).__init__(dest, src,\
305                    "InstRegIndex(0)", size, destSize, srcSize, ext)
306        code = '''
307            uint64_t srcReg1 = pick(SrcReg1, 0, srcSize);
308            FpDestReg.uqw =
309                insertBits(FpDestReg.uqw, destSize * 8 - 1, 0, srcReg1);
310        '''
311
312    class Unpack(MediaOp):
313        code = '''
314            assert(srcSize == destSize);
315            int size = destSize;
316            int items = (sizeof(FloatRegBits) / size) / 2;
317            int offset = ext ? items : 0;
318            uint64_t result = 0;
319            for (int i = 0; i < items; i++) {
320                uint64_t pickedLow =
321                    bits(FpSrcReg1.uqw, (i + offset + 1) * 8 * size - 1,
322                                        (i + offset) * 8 * size);
323                result = insertBits(result,
324                                    (2 * i + 1) * 8 * size - 1,
325                                    (2 * i + 0) * 8 * size,
326                                    pickedLow);
327                uint64_t pickedHigh =
328                    bits(FpSrcReg2.uqw, (i + offset + 1) * 8 * size - 1,
329                                        (i + offset) * 8 * size);
330                result = insertBits(result,
331                                    (2 * i + 2) * 8 * size - 1,
332                                    (2 * i + 1) * 8 * size,
333                                    pickedHigh);
334            }
335            FpDestReg.uqw = result;
336        '''
337
338    class Pack(MediaOp):
339        code = '''
340            assert(srcSize == destSize * 2);
341            int items = (sizeof(FloatRegBits) / destSize);
342            int destBits = destSize * 8;
343            int srcBits = srcSize * 8;
344            uint64_t result = 0;
345            int i;
346            for (i = 0; i < items / 2; i++) {
347                uint64_t picked =
348                    bits(FpSrcReg1.uqw, (i + 1) * srcBits - 1,
349                                        (i + 0) * srcBits);
350                unsigned signBit = bits(picked, srcBits - 1);
351                uint64_t overflow = bits(picked, srcBits - 1, destBits - 1);
352
353                // Handle saturation.
354                if (signBit) {
355                    if (overflow != mask(destBits - srcBits + 1)) {
356                        if (ext & 0x1)
357                            picked = (1 << (destBits - 1));
358                        else
359                            picked = 0;
360                    }
361                } else {
362                    if (overflow != 0) {
363                        if (ext & 0x1)
364                            picked = mask(destBits - 1);
365                        else
366                            picked = mask(destBits);
367                    }
368                }
369                result = insertBits(result,
370                                    (i + 1) * destBits - 1,
371                                    (i + 0) * destBits,
372                                    picked);
373            }
374            for (;i < items; i++) {
375                uint64_t picked =
376                    bits(FpSrcReg2.uqw, (i - items + 1) * srcBits - 1,
377                                        (i - items + 0) * srcBits);
378                unsigned signBit = bits(picked, srcBits - 1);
379                uint64_t overflow = bits(picked, srcBits - 1, destBits - 1);
380
381                // Handle saturation.
382                if (signBit) {
383                    if (overflow != mask(destBits - srcBits + 1)) {
384                        if (ext & 0x1)
385                            picked = (1 << (destBits - 1));
386                        else
387                            picked = 0;
388                    }
389                } else {
390                    if (overflow != 0) {
391                        if (ext & 0x1)
392                            picked = mask(destBits - 1);
393                        else
394                            picked = mask(destBits);
395                    }
396                }
397                result = insertBits(result,
398                                    (i + 1) * destBits - 1,
399                                    (i + 0) * destBits,
400                                    picked);
401            }
402            FpDestReg.uqw = result;
403        '''
404
405    class Mxor(MediaOp):
406        def __init__(self, dest, src1, src2):
407            super(Mxor, self).__init__(dest, src1, src2, 1)
408        code = '''
409            FpDestReg.uqw = FpSrcReg1.uqw ^ FpSrcReg2.uqw;
410        '''
411
412    class Mor(MediaOp):
413        def __init__(self, dest, src1, src2):
414            super(Mor, self).__init__(dest, src1, src2, 1)
415        code = '''
416            FpDestReg.uqw = FpSrcReg1.uqw | FpSrcReg2.uqw;
417        '''
418
419    class Mand(MediaOp):
420        def __init__(self, dest, src1, src2):
421            super(Mand, self).__init__(dest, src1, src2, 1)
422        code = '''
423            FpDestReg.uqw = FpSrcReg1.uqw & FpSrcReg2.uqw;
424        '''
425
426    class Mandn(MediaOp):
427        def __init__(self, dest, src1, src2):
428            super(Mandn, self).__init__(dest, src1, src2, 1)
429        code = '''
430            FpDestReg.uqw = ~FpSrcReg1.uqw & FpSrcReg2.uqw;
431        '''
432
433    class Mminf(MediaOp):
434        code = '''
435            union floatInt
436            {
437                float f;
438                uint32_t i;
439            };
440            union doubleInt
441            {
442                double d;
443                uint64_t i;
444            };
445
446            assert(srcSize == destSize);
447            int size = srcSize;
448            int sizeBits = size * 8;
449            assert(srcSize == 4 || srcSize == 8);
450            int items = (ext & 0x1) ? 1: (sizeof(FloatRegBits) / size);
451            uint64_t result = FpDestReg.uqw;
452
453            for (int i = 0; i < items; i++) {
454                double arg1, arg2;
455                int hiIndex = (i + 1) * sizeBits - 1;
456                int loIndex = (i + 0) * sizeBits;
457                uint64_t arg1Bits = bits(FpSrcReg1.uqw, hiIndex, loIndex);
458                uint64_t arg2Bits = bits(FpSrcReg2.uqw, hiIndex, loIndex);
459
460                if (size == 4) {
461                    floatInt fi;
462                    fi.i = arg1Bits;
463                    arg1 = fi.f;
464                    fi.i = arg2Bits;
465                    arg2 = fi.f;
466                } else {
467                    doubleInt di;
468                    di.i = arg1Bits;
469                    arg1 = di.d;
470                    di.i = arg2Bits;
471                    arg2 = di.d;
472                }
473
474                if (arg1 < arg2) {
475                    result = insertBits(result, hiIndex, loIndex, arg1Bits);
476                } else {
477                    result = insertBits(result, hiIndex, loIndex, arg2Bits);
478                }
479            }
480            FpDestReg.uqw = result;
481        '''
482
483    class Mmaxf(MediaOp):
484        code = '''
485            union floatInt
486            {
487                float f;
488                uint32_t i;
489            };
490            union doubleInt
491            {
492                double d;
493                uint64_t i;
494            };
495
496            assert(srcSize == destSize);
497            int size = srcSize;
498            int sizeBits = size * 8;
499            assert(srcSize == 4 || srcSize == 8);
500            int items = (ext & 0x1) ? 1: (sizeof(FloatRegBits) / size);
501            uint64_t result = FpDestReg.uqw;
502
503            for (int i = 0; i < items; i++) {
504                double arg1, arg2;
505                int hiIndex = (i + 1) * sizeBits - 1;
506                int loIndex = (i + 0) * sizeBits;
507                uint64_t arg1Bits = bits(FpSrcReg1.uqw, hiIndex, loIndex);
508                uint64_t arg2Bits = bits(FpSrcReg2.uqw, hiIndex, loIndex);
509
510                if (size == 4) {
511                    floatInt fi;
512                    fi.i = arg1Bits;
513                    arg1 = fi.f;
514                    fi.i = arg2Bits;
515                    arg2 = fi.f;
516                } else {
517                    doubleInt di;
518                    di.i = arg1Bits;
519                    arg1 = di.d;
520                    di.i = arg2Bits;
521                    arg2 = di.d;
522                }
523
524                if (arg1 > arg2) {
525                    result = insertBits(result, hiIndex, loIndex, arg1Bits);
526                } else {
527                    result = insertBits(result, hiIndex, loIndex, arg2Bits);
528                }
529            }
530            FpDestReg.uqw = result;
531        '''
532
533    class Mmini(MediaOp):
534        code = '''
535
536            assert(srcSize == destSize);
537            int size = srcSize;
538            int sizeBits = size * 8;
539            int items = (ext & 0x1) ? 1: (sizeof(FloatRegBits) / size);
540            uint64_t result = FpDestReg.uqw;
541
542            for (int i = 0; i < items; i++) {
543                int hiIndex = (i + 1) * sizeBits - 1;
544                int loIndex = (i + 0) * sizeBits;
545                uint64_t arg1Bits = bits(FpSrcReg1.uqw, hiIndex, loIndex);
546                int64_t arg1 = arg1Bits |
547                    (0 - (arg1Bits & (1 << (sizeBits - 1))));
548                uint64_t arg2Bits = bits(FpSrcReg2.uqw, hiIndex, loIndex);
549                int64_t arg2 = arg2Bits |
550                    (0 - (arg2Bits & (1 << (sizeBits - 1))));
551                uint64_t resBits;
552
553                if (ext & 0x2) {
554                    if (arg1 < arg2) {
555                        resBits = arg1Bits;
556                    } else {
557                        resBits = arg2Bits;
558                    }
559                } else {
560                    if (arg1Bits < arg2Bits) {
561                        resBits = arg1Bits;
562                    } else {
563                        resBits = arg2Bits;
564                    }
565                }
566                result = insertBits(result, hiIndex, loIndex, resBits);
567            }
568            FpDestReg.uqw = result;
569        '''
570
571    class Msqrt(MediaOp):
572        def __init__(self, dest, src, \
573                size = None, destSize = None, srcSize = None, ext = None):
574            super(Msqrt, self).__init__(dest, src,\
575                    "InstRegIndex(0)", size, destSize, srcSize, ext)
576        code = '''
577            union floatInt
578            {
579                float f;
580                uint32_t i;
581            };
582            union doubleInt
583            {
584                double d;
585                uint64_t i;
586            };
587
588            assert(srcSize == destSize);
589            int size = srcSize;
590            int sizeBits = size * 8;
591            assert(srcSize == 4 || srcSize == 8);
592            int items = (ext & 0x1) ? 1: (sizeof(FloatRegBits) / size);
593            uint64_t result = FpDestReg.uqw;
594
595            for (int i = 0; i < items; i++) {
596                int hiIndex = (i + 1) * sizeBits - 1;
597                int loIndex = (i + 0) * sizeBits;
598                uint64_t argBits = bits(FpSrcReg1.uqw, hiIndex, loIndex);
599
600                if (size == 4) {
601                    floatInt fi;
602                    fi.i = argBits;
603                    fi.f = sqrt(fi.f);
604                    argBits = fi.i;
605                } else {
606                    doubleInt di;
607                    di.i = argBits;
608                    di.d = sqrt(di.d);
609                    argBits = di.i;
610                }
611                result = insertBits(result, hiIndex, loIndex, argBits);
612            }
613            FpDestReg.uqw = result;
614        '''
615
616    class Maddf(MediaOp):
617        code = '''
618            union floatInt
619            {
620                float f;
621                uint32_t i;
622            };
623            union doubleInt
624            {
625                double d;
626                uint64_t i;
627            };
628
629            assert(srcSize == destSize);
630            int size = srcSize;
631            int sizeBits = size * 8;
632            assert(srcSize == 4 || srcSize == 8);
633            int items = (ext & 0x1) ? 1: (sizeof(FloatRegBits) / size);
634            uint64_t result = FpDestReg.uqw;
635
636            for (int i = 0; i < items; i++) {
637                int hiIndex = (i + 1) * sizeBits - 1;
638                int loIndex = (i + 0) * sizeBits;
639                uint64_t arg1Bits = bits(FpSrcReg1.uqw, hiIndex, loIndex);
640                uint64_t arg2Bits = bits(FpSrcReg2.uqw, hiIndex, loIndex);
641                uint64_t resBits;
642
643                if (size == 4) {
644                    floatInt arg1, arg2, res;
645                    arg1.i = arg1Bits;
646                    arg2.i = arg2Bits;
647                    res.f = arg1.f + arg2.f;
648                    resBits = res.i;
649                } else {
650                    doubleInt arg1, arg2, res;
651                    arg1.i = arg1Bits;
652                    arg2.i = arg2Bits;
653                    res.d = arg1.d + arg2.d;
654                    resBits = res.i;
655                }
656
657                result = insertBits(result, hiIndex, loIndex, resBits);
658            }
659            FpDestReg.uqw = result;
660        '''
661
662    class Msubf(MediaOp):
663        code = '''
664            union floatInt
665            {
666                float f;
667                uint32_t i;
668            };
669            union doubleInt
670            {
671                double d;
672                uint64_t i;
673            };
674
675            assert(srcSize == destSize);
676            int size = srcSize;
677            int sizeBits = size * 8;
678            assert(srcSize == 4 || srcSize == 8);
679            int items = (ext & 0x1) ? 1: (sizeof(FloatRegBits) / size);
680            uint64_t result = FpDestReg.uqw;
681
682            for (int i = 0; i < items; i++) {
683                int hiIndex = (i + 1) * sizeBits - 1;
684                int loIndex = (i + 0) * sizeBits;
685                uint64_t arg1Bits = bits(FpSrcReg1.uqw, hiIndex, loIndex);
686                uint64_t arg2Bits = bits(FpSrcReg2.uqw, hiIndex, loIndex);
687                uint64_t resBits;
688
689                if (size == 4) {
690                    floatInt arg1, arg2, res;
691                    arg1.i = arg1Bits;
692                    arg2.i = arg2Bits;
693                    res.f = arg1.f - arg2.f;
694                    resBits = res.i;
695                } else {
696                    doubleInt arg1, arg2, res;
697                    arg1.i = arg1Bits;
698                    arg2.i = arg2Bits;
699                    res.d = arg1.d - arg2.d;
700                    resBits = res.i;
701                }
702
703                result = insertBits(result, hiIndex, loIndex, resBits);
704            }
705            FpDestReg.uqw = result;
706        '''
707
708    class Mmulf(MediaOp):
709        code = '''
710            union floatInt
711            {
712                float f;
713                uint32_t i;
714            };
715            union doubleInt
716            {
717                double d;
718                uint64_t i;
719            };
720
721            assert(srcSize == destSize);
722            int size = srcSize;
723            int sizeBits = size * 8;
724            assert(srcSize == 4 || srcSize == 8);
725            int items = (ext & 0x1) ? 1: (sizeof(FloatRegBits) / size);
726            uint64_t result = FpDestReg.uqw;
727
728            for (int i = 0; i < items; i++) {
729                int hiIndex = (i + 1) * sizeBits - 1;
730                int loIndex = (i + 0) * sizeBits;
731                uint64_t arg1Bits = bits(FpSrcReg1.uqw, hiIndex, loIndex);
732                uint64_t arg2Bits = bits(FpSrcReg2.uqw, hiIndex, loIndex);
733                uint64_t resBits;
734
735                if (size == 4) {
736                    floatInt arg1, arg2, res;
737                    arg1.i = arg1Bits;
738                    arg2.i = arg2Bits;
739                    res.f = arg1.f * arg2.f;
740                    resBits = res.i;
741                } else {
742                    doubleInt arg1, arg2, res;
743                    arg1.i = arg1Bits;
744                    arg2.i = arg2Bits;
745                    res.d = arg1.d * arg2.d;
746                    resBits = res.i;
747                }
748
749                result = insertBits(result, hiIndex, loIndex, resBits);
750            }
751            FpDestReg.uqw = result;
752        '''
753
754    class Mdivf(MediaOp):
755        code = '''
756            union floatInt
757            {
758                float f;
759                uint32_t i;
760            };
761            union doubleInt
762            {
763                double d;
764                uint64_t i;
765            };
766
767            assert(srcSize == destSize);
768            int size = srcSize;
769            int sizeBits = size * 8;
770            assert(srcSize == 4 || srcSize == 8);
771            int items = (ext & 0x1) ? 1: (sizeof(FloatRegBits) / size);
772            uint64_t result = FpDestReg.uqw;
773
774            for (int i = 0; i < items; i++) {
775                int hiIndex = (i + 1) * sizeBits - 1;
776                int loIndex = (i + 0) * sizeBits;
777                uint64_t arg1Bits = bits(FpSrcReg1.uqw, hiIndex, loIndex);
778                uint64_t arg2Bits = bits(FpSrcReg2.uqw, hiIndex, loIndex);
779                uint64_t resBits;
780
781                if (size == 4) {
782                    floatInt arg1, arg2, res;
783                    arg1.i = arg1Bits;
784                    arg2.i = arg2Bits;
785                    res.f = arg1.f / arg2.f;
786                    resBits = res.i;
787                } else {
788                    doubleInt arg1, arg2, res;
789                    arg1.i = arg1Bits;
790                    arg2.i = arg2Bits;
791                    res.d = arg1.d / arg2.d;
792                    resBits = res.i;
793                }
794
795                result = insertBits(result, hiIndex, loIndex, resBits);
796            }
797            FpDestReg.uqw = result;
798        '''
799
800    class Maddi(MediaOp):
801        code = '''
802            assert(srcSize == destSize);
803            int size = srcSize;
804            int sizeBits = size * 8;
805            int items = (ext & 0x1) ? 1: (sizeof(FloatRegBits) / size);
806            uint64_t result = FpDestReg.uqw;
807
808            for (int i = 0; i < items; i++) {
809                int hiIndex = (i + 1) * sizeBits - 1;
810                int loIndex = (i + 0) * sizeBits;
811                uint64_t arg1Bits = bits(FpSrcReg1.uqw, hiIndex, loIndex);
812                uint64_t arg2Bits = bits(FpSrcReg2.uqw, hiIndex, loIndex);
813                uint64_t resBits = arg1Bits + arg2Bits;
814                
815                if (ext & 0x2) {
816                    if (findCarry(sizeBits, resBits, arg1Bits, arg2Bits))
817                        resBits = mask(sizeBits);
818                } else if (ext & 0x4) {
819                    int arg1Sign = bits(arg1Bits, sizeBits - 1);
820                    int arg2Sign = bits(arg2Bits, sizeBits - 1);
821                    int resSign = bits(resBits, sizeBits - 1);
822                    if ((arg1Sign == arg2Sign) && (arg1Sign != resSign)) {
823                        if (resSign == 0)
824                            resBits = (1 << (sizeBits - 1));
825                        else
826                            resBits = mask(sizeBits - 1);
827                    }
828                }
829
830                result = insertBits(result, hiIndex, loIndex, resBits);
831            }
832            FpDestReg.uqw = result;
833        '''
834
835    class Cvti2f(MediaOp):
836        def __init__(self, dest, src, \
837                size = None, destSize = None, srcSize = None, ext = None):
838            super(Cvti2f, self).__init__(dest, src,\
839                    "InstRegIndex(0)", size, destSize, srcSize, ext)
840        code = '''
841            union floatInt
842            {
843                float f;
844                uint32_t i;
845            };
846            union doubleInt
847            {
848                double d;
849                uint64_t i;
850            };
851
852            assert(destSize == 4 || destSize == 8);
853            assert(srcSize == 4 || srcSize == 8);
854            int srcSizeBits = srcSize * 8;
855            int destSizeBits = destSize * 8;
856            int items;
857            int srcStart = 0;
858            int destStart = 0;
859            if (srcSize == 2 * destSize) {
860                items = (ext & 0x1) ? 1: sizeof(FloatRegBits) / srcSize;
861                if (ext & 0x2)
862                    destStart = destSizeBits * items;
863            } else if (destSize == 2 * srcSize) {
864                items = (ext & 0x1) ? 1: sizeof(FloatRegBits) / destSize;
865                if (ext & 0x2)
866                    srcStart = srcSizeBits * items;
867            } else {
868                items = (ext & 0x1) ? 1: sizeof(FloatRegBits) / destSize;
869            }
870            uint64_t result = FpDestReg.uqw;
871
872            for (int i = 0; i < items; i++) {
873                int srcHiIndex = srcStart + (i + 1) * srcSizeBits - 1;
874                int srcLoIndex = srcStart + (i + 0) * srcSizeBits;
875                uint64_t argBits = bits(FpSrcReg1.uqw, srcHiIndex, srcLoIndex);
876                int64_t sArg = argBits | (0 - (argBits & (1 << srcHiIndex)));
877                double arg = sArg;
878
879                if (destSize == 4) {
880                    floatInt fi;
881                    fi.f = arg;
882                    argBits = fi.i;
883                } else {
884                    doubleInt di;
885                    di.d = arg;
886                    argBits = di.i;
887                }
888                int destHiIndex = destStart + (i + 1) * destSizeBits - 1;
889                int destLoIndex = destStart + (i + 0) * destSizeBits;
890                result = insertBits(result, destHiIndex, destLoIndex, argBits);
891            }
892            FpDestReg.uqw = result;
893        '''
894
895    class Cvtf2f(MediaOp):
896        def __init__(self, dest, src, \
897                size = None, destSize = None, srcSize = None, ext = None):
898            super(Cvtf2f, self).__init__(dest, src,\
899                    "InstRegIndex(0)", size, destSize, srcSize, ext)
900        code = '''
901            union floatInt
902            {
903                float f;
904                uint32_t i;
905            };
906            union doubleInt
907            {
908                double d;
909                uint64_t i;
910            };
911
912            assert(destSize == 4 || destSize == 8);
913            assert(srcSize == 4 || srcSize == 8);
914            int srcSizeBits = srcSize * 8;
915            int destSizeBits = destSize * 8;
916            int items;
917            int srcStart = 0;
918            int destStart = 0;
919            if (srcSize == 2 * destSize) {
920                items = (ext & 0x1) ? 1: sizeof(FloatRegBits) / srcSize;
921                if (ext & 0x2)
922                    destStart = destSizeBits * items;
923            } else if (destSize == 2 * srcSize) {
924                items = (ext & 0x1) ? 1: sizeof(FloatRegBits) / destSize;
925                if (ext & 0x2)
926                    srcStart = srcSizeBits * items;
927            } else {
928                items = (ext & 0x1) ? 1: sizeof(FloatRegBits) / destSize;
929            }
930            uint64_t result = FpDestReg.uqw;
931
932            for (int i = 0; i < items; i++) {
933                int srcHiIndex = srcStart + (i + 1) * srcSizeBits - 1;
934                int srcLoIndex = srcStart + (i + 0) * srcSizeBits;
935                uint64_t argBits = bits(FpSrcReg1.uqw, srcHiIndex, srcLoIndex);
936                double arg;
937
938                if (srcSize == 4) {
939                    floatInt fi;
940                    fi.i = argBits;
941                    arg = fi.f;
942                } else {
943                    doubleInt di;
944                    di.i = argBits;
945                    arg = di.d;
946                }
947                if (destSize == 4) {
948                    floatInt fi;
949                    fi.f = arg;
950                    argBits = fi.i;
951                } else {
952                    doubleInt di;
953                    di.d = arg;
954                    argBits = di.i;
955                }
956                int destHiIndex = destStart + (i + 1) * destSizeBits - 1;
957                int destLoIndex = destStart + (i + 0) * destSizeBits;
958                result = insertBits(result, destHiIndex, destLoIndex, argBits);
959            }
960            FpDestReg.uqw = result;
961        '''
962
963    class Mcmpi2r(MediaOp):
964        code = '''
965            union floatInt
966            {
967                float f;
968                uint32_t i;
969            };
970            union doubleInt
971            {
972                double d;
973                uint64_t i;
974            };
975
976            assert(srcSize == destSize);
977            int size = srcSize;
978            int sizeBits = size * 8;
979            int items = (ext & 0x1) ? 1: (sizeof(FloatRegBits) / size);
980            uint64_t result = FpDestReg.uqw;
981
982            for (int i = 0; i < items; i++) {
983                int hiIndex = (i + 1) * sizeBits - 1;
984                int loIndex = (i + 0) * sizeBits;
985                uint64_t arg1Bits = bits(FpSrcReg1.uqw, hiIndex, loIndex);
986                int64_t arg1 = arg1Bits |
987                    (0 - (arg1Bits & (1 << (sizeBits - 1))));
988                uint64_t arg2Bits = bits(FpSrcReg2.uqw, hiIndex, loIndex);
989                int64_t arg2 = arg2Bits |
990                    (0 - (arg2Bits & (1 << (sizeBits - 1))));
991
992                uint64_t resBits = 0;
993                if ((ext & 0x2) == 0 && arg1 == arg2 ||
994                        (ext & 0x2) == 0x2 && arg1 > arg2)
995                    resBits = mask(sizeBits);
996
997                result = insertBits(result, hiIndex, loIndex, resBits);
998            }
999            FpDestReg.uqw = result;
1000        '''
1001}};
1002