mediaop.isa revision 6589:7b0f907855d5
1/// Copyright (c) 2009 The Regents of The University of Michigan
2// All rights reserved.
3//
4// Redistribution and use in source and binary forms, with or without
5// modification, are permitted provided that the following conditions are
6// met: redistributions of source code must retain the above copyright
7// notice, this list of conditions and the following disclaimer;
8// redistributions in binary form must reproduce the above copyright
9// notice, this list of conditions and the following disclaimer in the
10// documentation and/or other materials provided with the distribution;
11// neither the name of the copyright holders nor the names of its
12// contributors may be used to endorse or promote products derived from
13// this software without specific prior written permission.
14//
15// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
16// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
17// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
18// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
19// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
20// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
21// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
22// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
23// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
24// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
25// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
26//
27// Authors: Gabe Black
28
29def template MediaOpExecute {{
30        Fault %(class_name)s::execute(%(CPU_exec_context)s *xc,
31                Trace::InstRecord *traceData) const
32        {
33            Fault fault = NoFault;
34
35            %(op_decl)s;
36            %(op_rd)s;
37
38            %(code)s;
39
40            //Write the resulting state to the execution context
41            if(fault == NoFault)
42            {
43                %(op_wb)s;
44            }
45            return fault;
46        }
47}};
48
49def template MediaOpRegDeclare {{
50    class %(class_name)s : public %(base_class)s
51    {
52      protected:
53        void buildMe();
54
55      public:
56        %(class_name)s(ExtMachInst _machInst,
57                const char * instMnem,
58                bool isMicro, bool isDelayed, bool isFirst, bool isLast,
59                InstRegIndex _src1, InstRegIndex _src2, InstRegIndex _dest,
60                uint8_t _srcSize, uint8_t _destSize, uint16_t _ext);
61
62        %(class_name)s(ExtMachInst _machInst,
63                const char * instMnem,
64                InstRegIndex _src1, InstRegIndex _src2, InstRegIndex _dest,
65                uint8_t _srcSize, uint8_t _destSize, uint16_t _ext);
66
67        %(BasicExecDeclare)s
68    };
69}};
70
71def template MediaOpImmDeclare {{
72
73    class %(class_name)s : public %(base_class)s
74    {
75      protected:
76        void buildMe();
77
78      public:
79        %(class_name)s(ExtMachInst _machInst,
80                const char * instMnem,
81                bool isMicro, bool isDelayed, bool isFirst, bool isLast,
82                InstRegIndex _src1, uint16_t _imm8, InstRegIndex _dest,
83                uint8_t _srcSize, uint8_t _destSize, uint16_t _ext);
84
85        %(class_name)s(ExtMachInst _machInst,
86                const char * instMnem,
87                InstRegIndex _src1, uint16_t _imm8, InstRegIndex _dest,
88                uint8_t _srcSize, uint8_t _destSize, uint16_t _ext);
89
90        %(BasicExecDeclare)s
91    };
92}};
93
94def template MediaOpRegConstructor {{
95
96    inline void %(class_name)s::buildMe()
97    {
98        %(constructor)s;
99    }
100
101    inline %(class_name)s::%(class_name)s(
102            ExtMachInst machInst, const char * instMnem,
103            InstRegIndex _src1, InstRegIndex _src2, InstRegIndex _dest,
104            uint8_t _srcSize, uint8_t _destSize, uint16_t _ext) :
105        %(base_class)s(machInst, "%(mnemonic)s", instMnem,
106                false, false, false, false,
107                _src1, _src2, _dest, _srcSize, _destSize, _ext,
108                %(op_class)s)
109    {
110        buildMe();
111    }
112
113    inline %(class_name)s::%(class_name)s(
114            ExtMachInst machInst, const char * instMnem,
115            bool isMicro, bool isDelayed, bool isFirst, bool isLast,
116            InstRegIndex _src1, InstRegIndex _src2, InstRegIndex _dest,
117            uint8_t _srcSize, uint8_t _destSize, uint16_t _ext) :
118        %(base_class)s(machInst, "%(mnemonic)s", instMnem,
119                isMicro, isDelayed, isFirst, isLast,
120                _src1, _src2, _dest, _srcSize, _destSize, _ext,
121                %(op_class)s)
122    {
123        buildMe();
124    }
125}};
126
127def template MediaOpImmConstructor {{
128
129    inline void %(class_name)s::buildMe()
130    {
131        %(constructor)s;
132    }
133
134    inline %(class_name)s::%(class_name)s(
135            ExtMachInst machInst, const char * instMnem,
136            InstRegIndex _src1, uint16_t _imm8, InstRegIndex _dest,
137            uint8_t _srcSize, uint8_t _destSize, uint16_t _ext) :
138        %(base_class)s(machInst, "%(mnemonic)s", instMnem,
139                false, false, false, false,
140                _src1, _imm8, _dest, _srcSize, _destSize, _ext,
141                %(op_class)s)
142    {
143        buildMe();
144    }
145
146    inline %(class_name)s::%(class_name)s(
147            ExtMachInst machInst, const char * instMnem,
148            bool isMicro, bool isDelayed, bool isFirst, bool isLast,
149            InstRegIndex _src1, uint16_t _imm8, InstRegIndex _dest,
150            uint8_t _srcSize, uint8_t _destSize, uint16_t _ext) :
151        %(base_class)s(machInst, "%(mnemonic)s", instMnem,
152                isMicro, isDelayed, isFirst, isLast,
153                _src1, _imm8, _dest, _srcSize, _destSize, _ext,
154                %(op_class)s)
155    {
156        buildMe();
157    }
158}};
159
160let {{
161    # Make these empty strings so that concatenating onto
162    # them will always work.
163    header_output = ""
164    decoder_output = ""
165    exec_output = ""
166
167    immTemplates = (
168            MediaOpImmDeclare,
169            MediaOpImmConstructor,
170            MediaOpExecute)
171
172    regTemplates = (
173            MediaOpRegDeclare,
174            MediaOpRegConstructor,
175            MediaOpExecute)
176
177    class MediaOpMeta(type):
178        def buildCppClasses(self, name, Name, suffix, code):
179
180            # Globals to stick the output in
181            global header_output
182            global decoder_output
183            global exec_output
184
185            # If op2 is used anywhere, make register and immediate versions
186            # of this code.
187            matcher = re.compile("(?<!\\w)(?P<prefix>s?)op2(?P<typeQual>\\.\\w+)?")
188            match = matcher.search(code)
189            if match:
190                typeQual = ""
191                if match.group("typeQual"):
192                    typeQual = match.group("typeQual")
193                src2_name = "%sFpSrcReg2%s" % (match.group("prefix"), typeQual)
194                self.buildCppClasses(name, Name, suffix,
195                        matcher.sub(src2_name, code))
196                self.buildCppClasses(name + "i", Name, suffix + "Imm",
197                        matcher.sub("imm8", code))
198                return
199
200            base = "X86ISA::MediaOp"
201
202            # If imm8 shows up in the code, use the immediate templates, if
203            # not, hopefully the register ones will be correct.
204            matcher = re.compile("(?<!\w)imm8(?!\w)")
205            if matcher.search(code):
206                base += "Imm"
207                templates = immTemplates
208            else:
209                base += "Reg"
210                templates = regTemplates
211
212            # Get everything ready for the substitution
213            iop = InstObjParams(name, Name + suffix, base, {"code" : code})
214
215            # Generate the actual code (finally!)
216            header_output += templates[0].subst(iop)
217            decoder_output += templates[1].subst(iop)
218            exec_output += templates[2].subst(iop)
219
220
221        def __new__(mcls, Name, bases, dict):
222            abstract = False
223            name = Name.lower()
224            if "abstract" in dict:
225                abstract = dict['abstract']
226                del dict['abstract']
227
228            cls = super(MediaOpMeta, mcls).__new__(mcls, Name, bases, dict)
229            if not abstract:
230                cls.className = Name
231                cls.base_mnemonic = name
232                code = cls.code
233
234                # Set up the C++ classes
235                mcls.buildCppClasses(cls, name, Name, "", code)
236
237                # Hook into the microassembler dict
238                global microopClasses
239                microopClasses[name] = cls
240
241                # If op2 is used anywhere, make register and immediate versions
242                # of this code.
243                matcher = re.compile("op2(?P<typeQual>\\.\\w+)?")
244                if matcher.search(code):
245                    microopClasses[name + 'i'] = cls
246            return cls
247
248
249    class MediaOp(X86Microop):
250        __metaclass__ = MediaOpMeta
251        # This class itself doesn't act as a microop
252        abstract = True
253
254        def __init__(self, dest, src1, op2,
255                size = None, destSize = None, srcSize = None, ext = None):
256            self.dest = dest
257            self.src1 = src1
258            self.op2 = op2
259            if size is not None:
260                self.srcSize = size
261                self.destSize = size
262            if srcSize is not None:
263                self.srcSize = srcSize
264            if destSize is not None:
265                self.destSize = destSize
266            if self.srcSize is None:
267                raise Exception, "Source size not set."
268            if self.destSize is None:
269                raise Exception, "Dest size not set."
270            if ext is None:
271                self.ext = 0
272            else:
273                self.ext = ext 
274
275        def getAllocator(self, *microFlags):
276            className = self.className
277            if self.mnemonic == self.base_mnemonic + 'i':
278                className += "Imm"
279            allocator = '''new %(class_name)s(machInst, macrocodeBlock
280                    %(flags)s, %(src1)s, %(op2)s, %(dest)s,
281                    %(srcSize)s, %(destSize)s, %(ext)s)''' % {
282                "class_name" : className,
283                "flags" : self.microFlagsText(microFlags),
284                "src1" : self.src1, "op2" : self.op2,
285                "dest" : self.dest,
286                "srcSize" : self.srcSize,
287                "destSize" : self.destSize,
288                "ext" : self.ext}
289            return allocator
290
291    class Mov2int(MediaOp):
292        def __init__(self, dest, src1, src2 = 0, \
293                size = None, destSize = None, srcSize = None, ext = None):
294            super(Mov2int, self).__init__(dest, src1,\
295                    src2, size, destSize, srcSize, ext)
296        code = '''
297            int items = sizeof(FloatRegBits) / srcSize;
298            int offset = imm8;
299            if (bits(src1, 0) && (ext & 0x1))
300                offset -= items;
301            if (offset >= 0 && offset < items) {
302                uint64_t fpSrcReg1 =
303                    bits(FpSrcReg1.uqw,
304                            (offset + 1) * srcSize * 8 - 1,
305                            (offset + 0) * srcSize * 8);
306                DestReg = merge(0, fpSrcReg1, destSize);
307            } else {
308                DestReg = DestReg;
309            }
310        '''
311
312    class Mov2fp(MediaOp):
313        def __init__(self, dest, src1, src2 = 0, \
314                size = None, destSize = None, srcSize = None, ext = None):
315            super(Mov2fp, self).__init__(dest, src1,\
316                    src2, size, destSize, srcSize, ext)
317        code = '''
318            int items = sizeof(FloatRegBits) / destSize;
319            int offset = imm8;
320            if (bits(dest, 0) && (ext & 0x1))
321                offset -= items;
322            if (offset >= 0 && offset < items) {
323                uint64_t srcReg1 = pick(SrcReg1, 0, srcSize);
324                FpDestReg.uqw =
325                    insertBits(FpDestReg.uqw,
326                            (offset + 1) * destSize * 8 - 1,
327                            (offset + 0) * destSize * 8, srcReg1);
328            } else {
329                FpDestReg.uqw = FpDestReg.uqw;
330            }
331        '''
332
333    class Unpack(MediaOp):
334        code = '''
335            assert(srcSize == destSize);
336            int size = destSize;
337            int items = (sizeof(FloatRegBits) / size) / 2;
338            int offset = ext ? items : 0;
339            uint64_t result = 0;
340            for (int i = 0; i < items; i++) {
341                uint64_t pickedLow =
342                    bits(FpSrcReg1.uqw, (i + offset + 1) * 8 * size - 1,
343                                        (i + offset) * 8 * size);
344                result = insertBits(result,
345                                    (2 * i + 1) * 8 * size - 1,
346                                    (2 * i + 0) * 8 * size,
347                                    pickedLow);
348                uint64_t pickedHigh =
349                    bits(FpSrcReg2.uqw, (i + offset + 1) * 8 * size - 1,
350                                        (i + offset) * 8 * size);
351                result = insertBits(result,
352                                    (2 * i + 2) * 8 * size - 1,
353                                    (2 * i + 1) * 8 * size,
354                                    pickedHigh);
355            }
356            FpDestReg.uqw = result;
357        '''
358
359    class Pack(MediaOp):
360        code = '''
361            assert(srcSize == destSize * 2);
362            int items = (sizeof(FloatRegBits) / destSize);
363            int destBits = destSize * 8;
364            int srcBits = srcSize * 8;
365            uint64_t result = 0;
366            int i;
367            for (i = 0; i < items / 2; i++) {
368                uint64_t picked =
369                    bits(FpSrcReg1.uqw, (i + 1) * srcBits - 1,
370                                        (i + 0) * srcBits);
371                unsigned signBit = bits(picked, srcBits - 1);
372                uint64_t overflow = bits(picked, srcBits - 1, destBits - 1);
373
374                // Handle saturation.
375                if (signBit) {
376                    if (overflow != mask(destBits - srcBits + 1)) {
377                        if (ext & 0x1)
378                            picked = (1 << (destBits - 1));
379                        else
380                            picked = 0;
381                    }
382                } else {
383                    if (overflow != 0) {
384                        if (ext & 0x1)
385                            picked = mask(destBits - 1);
386                        else
387                            picked = mask(destBits);
388                    }
389                }
390                result = insertBits(result,
391                                    (i + 1) * destBits - 1,
392                                    (i + 0) * destBits,
393                                    picked);
394            }
395            for (;i < items; i++) {
396                uint64_t picked =
397                    bits(FpSrcReg2.uqw, (i - items + 1) * srcBits - 1,
398                                        (i - items + 0) * srcBits);
399                unsigned signBit = bits(picked, srcBits - 1);
400                uint64_t overflow = bits(picked, srcBits - 1, destBits - 1);
401
402                // Handle saturation.
403                if (signBit) {
404                    if (overflow != mask(destBits - srcBits + 1)) {
405                        if (ext & 0x1)
406                            picked = (1 << (destBits - 1));
407                        else
408                            picked = 0;
409                    }
410                } else {
411                    if (overflow != 0) {
412                        if (ext & 0x1)
413                            picked = mask(destBits - 1);
414                        else
415                            picked = mask(destBits);
416                    }
417                }
418                result = insertBits(result,
419                                    (i + 1) * destBits - 1,
420                                    (i + 0) * destBits,
421                                    picked);
422            }
423            FpDestReg.uqw = result;
424        '''
425
426    class Mxor(MediaOp):
427        def __init__(self, dest, src1, src2):
428            super(Mxor, self).__init__(dest, src1, src2, 1)
429        code = '''
430            FpDestReg.uqw = FpSrcReg1.uqw ^ FpSrcReg2.uqw;
431        '''
432
433    class Mor(MediaOp):
434        def __init__(self, dest, src1, src2):
435            super(Mor, self).__init__(dest, src1, src2, 1)
436        code = '''
437            FpDestReg.uqw = FpSrcReg1.uqw | FpSrcReg2.uqw;
438        '''
439
440    class Mand(MediaOp):
441        def __init__(self, dest, src1, src2):
442            super(Mand, self).__init__(dest, src1, src2, 1)
443        code = '''
444            FpDestReg.uqw = FpSrcReg1.uqw & FpSrcReg2.uqw;
445        '''
446
447    class Mandn(MediaOp):
448        def __init__(self, dest, src1, src2):
449            super(Mandn, self).__init__(dest, src1, src2, 1)
450        code = '''
451            FpDestReg.uqw = ~FpSrcReg1.uqw & FpSrcReg2.uqw;
452        '''
453
454    class Mminf(MediaOp):
455        code = '''
456            union floatInt
457            {
458                float f;
459                uint32_t i;
460            };
461            union doubleInt
462            {
463                double d;
464                uint64_t i;
465            };
466
467            assert(srcSize == destSize);
468            int size = srcSize;
469            int sizeBits = size * 8;
470            assert(srcSize == 4 || srcSize == 8);
471            int items = (ext & 0x1) ? 1: (sizeof(FloatRegBits) / size);
472            uint64_t result = FpDestReg.uqw;
473
474            for (int i = 0; i < items; i++) {
475                double arg1, arg2;
476                int hiIndex = (i + 1) * sizeBits - 1;
477                int loIndex = (i + 0) * sizeBits;
478                uint64_t arg1Bits = bits(FpSrcReg1.uqw, hiIndex, loIndex);
479                uint64_t arg2Bits = bits(FpSrcReg2.uqw, hiIndex, loIndex);
480
481                if (size == 4) {
482                    floatInt fi;
483                    fi.i = arg1Bits;
484                    arg1 = fi.f;
485                    fi.i = arg2Bits;
486                    arg2 = fi.f;
487                } else {
488                    doubleInt di;
489                    di.i = arg1Bits;
490                    arg1 = di.d;
491                    di.i = arg2Bits;
492                    arg2 = di.d;
493                }
494
495                if (arg1 < arg2) {
496                    result = insertBits(result, hiIndex, loIndex, arg1Bits);
497                } else {
498                    result = insertBits(result, hiIndex, loIndex, arg2Bits);
499                }
500            }
501            FpDestReg.uqw = result;
502        '''
503
504    class Mmaxf(MediaOp):
505        code = '''
506            union floatInt
507            {
508                float f;
509                uint32_t i;
510            };
511            union doubleInt
512            {
513                double d;
514                uint64_t i;
515            };
516
517            assert(srcSize == destSize);
518            int size = srcSize;
519            int sizeBits = size * 8;
520            assert(srcSize == 4 || srcSize == 8);
521            int items = (ext & 0x1) ? 1: (sizeof(FloatRegBits) / size);
522            uint64_t result = FpDestReg.uqw;
523
524            for (int i = 0; i < items; i++) {
525                double arg1, arg2;
526                int hiIndex = (i + 1) * sizeBits - 1;
527                int loIndex = (i + 0) * sizeBits;
528                uint64_t arg1Bits = bits(FpSrcReg1.uqw, hiIndex, loIndex);
529                uint64_t arg2Bits = bits(FpSrcReg2.uqw, hiIndex, loIndex);
530
531                if (size == 4) {
532                    floatInt fi;
533                    fi.i = arg1Bits;
534                    arg1 = fi.f;
535                    fi.i = arg2Bits;
536                    arg2 = fi.f;
537                } else {
538                    doubleInt di;
539                    di.i = arg1Bits;
540                    arg1 = di.d;
541                    di.i = arg2Bits;
542                    arg2 = di.d;
543                }
544
545                if (arg1 > arg2) {
546                    result = insertBits(result, hiIndex, loIndex, arg1Bits);
547                } else {
548                    result = insertBits(result, hiIndex, loIndex, arg2Bits);
549                }
550            }
551            FpDestReg.uqw = result;
552        '''
553
554    class Mmini(MediaOp):
555        code = '''
556
557            assert(srcSize == destSize);
558            int size = srcSize;
559            int sizeBits = size * 8;
560            int items = (ext & 0x1) ? 1: (sizeof(FloatRegBits) / size);
561            uint64_t result = FpDestReg.uqw;
562
563            for (int i = 0; i < items; i++) {
564                int hiIndex = (i + 1) * sizeBits - 1;
565                int loIndex = (i + 0) * sizeBits;
566                uint64_t arg1Bits = bits(FpSrcReg1.uqw, hiIndex, loIndex);
567                int64_t arg1 = arg1Bits |
568                    (0 - (arg1Bits & (1 << (sizeBits - 1))));
569                uint64_t arg2Bits = bits(FpSrcReg2.uqw, hiIndex, loIndex);
570                int64_t arg2 = arg2Bits |
571                    (0 - (arg2Bits & (1 << (sizeBits - 1))));
572                uint64_t resBits;
573
574                if (ext & 0x2) {
575                    if (arg1 < arg2) {
576                        resBits = arg1Bits;
577                    } else {
578                        resBits = arg2Bits;
579                    }
580                } else {
581                    if (arg1Bits < arg2Bits) {
582                        resBits = arg1Bits;
583                    } else {
584                        resBits = arg2Bits;
585                    }
586                }
587                result = insertBits(result, hiIndex, loIndex, resBits);
588            }
589            FpDestReg.uqw = result;
590        '''
591
592    class Mmaxi(MediaOp):
593        code = '''
594
595            assert(srcSize == destSize);
596            int size = srcSize;
597            int sizeBits = size * 8;
598            int items = (ext & 0x1) ? 1: (sizeof(FloatRegBits) / size);
599            uint64_t result = FpDestReg.uqw;
600
601            for (int i = 0; i < items; i++) {
602                int hiIndex = (i + 1) * sizeBits - 1;
603                int loIndex = (i + 0) * sizeBits;
604                uint64_t arg1Bits = bits(FpSrcReg1.uqw, hiIndex, loIndex);
605                int64_t arg1 = arg1Bits |
606                    (0 - (arg1Bits & (1 << (sizeBits - 1))));
607                uint64_t arg2Bits = bits(FpSrcReg2.uqw, hiIndex, loIndex);
608                int64_t arg2 = arg2Bits |
609                    (0 - (arg2Bits & (1 << (sizeBits - 1))));
610                uint64_t resBits;
611
612                if (ext & 0x2) {
613                    if (arg1 > arg2) {
614                        resBits = arg1Bits;
615                    } else {
616                        resBits = arg2Bits;
617                    }
618                } else {
619                    if (arg1Bits > arg2Bits) {
620                        resBits = arg1Bits;
621                    } else {
622                        resBits = arg2Bits;
623                    }
624                }
625                result = insertBits(result, hiIndex, loIndex, resBits);
626            }
627            FpDestReg.uqw = result;
628        '''
629
630    class Msqrt(MediaOp):
631        def __init__(self, dest, src, \
632                size = None, destSize = None, srcSize = None, ext = None):
633            super(Msqrt, self).__init__(dest, src,\
634                    "InstRegIndex(0)", size, destSize, srcSize, ext)
635        code = '''
636            union floatInt
637            {
638                float f;
639                uint32_t i;
640            };
641            union doubleInt
642            {
643                double d;
644                uint64_t i;
645            };
646
647            assert(srcSize == destSize);
648            int size = srcSize;
649            int sizeBits = size * 8;
650            assert(srcSize == 4 || srcSize == 8);
651            int items = (ext & 0x1) ? 1: (sizeof(FloatRegBits) / size);
652            uint64_t result = FpDestReg.uqw;
653
654            for (int i = 0; i < items; i++) {
655                int hiIndex = (i + 1) * sizeBits - 1;
656                int loIndex = (i + 0) * sizeBits;
657                uint64_t argBits = bits(FpSrcReg1.uqw, hiIndex, loIndex);
658
659                if (size == 4) {
660                    floatInt fi;
661                    fi.i = argBits;
662                    fi.f = sqrt(fi.f);
663                    argBits = fi.i;
664                } else {
665                    doubleInt di;
666                    di.i = argBits;
667                    di.d = sqrt(di.d);
668                    argBits = di.i;
669                }
670                result = insertBits(result, hiIndex, loIndex, argBits);
671            }
672            FpDestReg.uqw = result;
673        '''
674
675    class Maddf(MediaOp):
676        code = '''
677            union floatInt
678            {
679                float f;
680                uint32_t i;
681            };
682            union doubleInt
683            {
684                double d;
685                uint64_t i;
686            };
687
688            assert(srcSize == destSize);
689            int size = srcSize;
690            int sizeBits = size * 8;
691            assert(srcSize == 4 || srcSize == 8);
692            int items = (ext & 0x1) ? 1: (sizeof(FloatRegBits) / size);
693            uint64_t result = FpDestReg.uqw;
694
695            for (int i = 0; i < items; i++) {
696                int hiIndex = (i + 1) * sizeBits - 1;
697                int loIndex = (i + 0) * sizeBits;
698                uint64_t arg1Bits = bits(FpSrcReg1.uqw, hiIndex, loIndex);
699                uint64_t arg2Bits = bits(FpSrcReg2.uqw, hiIndex, loIndex);
700                uint64_t resBits;
701
702                if (size == 4) {
703                    floatInt arg1, arg2, res;
704                    arg1.i = arg1Bits;
705                    arg2.i = arg2Bits;
706                    res.f = arg1.f + arg2.f;
707                    resBits = res.i;
708                } else {
709                    doubleInt arg1, arg2, res;
710                    arg1.i = arg1Bits;
711                    arg2.i = arg2Bits;
712                    res.d = arg1.d + arg2.d;
713                    resBits = res.i;
714                }
715
716                result = insertBits(result, hiIndex, loIndex, resBits);
717            }
718            FpDestReg.uqw = result;
719        '''
720
721    class Msubf(MediaOp):
722        code = '''
723            union floatInt
724            {
725                float f;
726                uint32_t i;
727            };
728            union doubleInt
729            {
730                double d;
731                uint64_t i;
732            };
733
734            assert(srcSize == destSize);
735            int size = srcSize;
736            int sizeBits = size * 8;
737            assert(srcSize == 4 || srcSize == 8);
738            int items = (ext & 0x1) ? 1: (sizeof(FloatRegBits) / size);
739            uint64_t result = FpDestReg.uqw;
740
741            for (int i = 0; i < items; i++) {
742                int hiIndex = (i + 1) * sizeBits - 1;
743                int loIndex = (i + 0) * sizeBits;
744                uint64_t arg1Bits = bits(FpSrcReg1.uqw, hiIndex, loIndex);
745                uint64_t arg2Bits = bits(FpSrcReg2.uqw, hiIndex, loIndex);
746                uint64_t resBits;
747
748                if (size == 4) {
749                    floatInt arg1, arg2, res;
750                    arg1.i = arg1Bits;
751                    arg2.i = arg2Bits;
752                    res.f = arg1.f - arg2.f;
753                    resBits = res.i;
754                } else {
755                    doubleInt arg1, arg2, res;
756                    arg1.i = arg1Bits;
757                    arg2.i = arg2Bits;
758                    res.d = arg1.d - arg2.d;
759                    resBits = res.i;
760                }
761
762                result = insertBits(result, hiIndex, loIndex, resBits);
763            }
764            FpDestReg.uqw = result;
765        '''
766
767    class Mmulf(MediaOp):
768        code = '''
769            union floatInt
770            {
771                float f;
772                uint32_t i;
773            };
774            union doubleInt
775            {
776                double d;
777                uint64_t i;
778            };
779
780            assert(srcSize == destSize);
781            int size = srcSize;
782            int sizeBits = size * 8;
783            assert(srcSize == 4 || srcSize == 8);
784            int items = (ext & 0x1) ? 1: (sizeof(FloatRegBits) / size);
785            uint64_t result = FpDestReg.uqw;
786
787            for (int i = 0; i < items; i++) {
788                int hiIndex = (i + 1) * sizeBits - 1;
789                int loIndex = (i + 0) * sizeBits;
790                uint64_t arg1Bits = bits(FpSrcReg1.uqw, hiIndex, loIndex);
791                uint64_t arg2Bits = bits(FpSrcReg2.uqw, hiIndex, loIndex);
792                uint64_t resBits;
793
794                if (size == 4) {
795                    floatInt arg1, arg2, res;
796                    arg1.i = arg1Bits;
797                    arg2.i = arg2Bits;
798                    res.f = arg1.f * arg2.f;
799                    resBits = res.i;
800                } else {
801                    doubleInt arg1, arg2, res;
802                    arg1.i = arg1Bits;
803                    arg2.i = arg2Bits;
804                    res.d = arg1.d * arg2.d;
805                    resBits = res.i;
806                }
807
808                result = insertBits(result, hiIndex, loIndex, resBits);
809            }
810            FpDestReg.uqw = result;
811        '''
812
813    class Mdivf(MediaOp):
814        code = '''
815            union floatInt
816            {
817                float f;
818                uint32_t i;
819            };
820            union doubleInt
821            {
822                double d;
823                uint64_t i;
824            };
825
826            assert(srcSize == destSize);
827            int size = srcSize;
828            int sizeBits = size * 8;
829            assert(srcSize == 4 || srcSize == 8);
830            int items = (ext & 0x1) ? 1: (sizeof(FloatRegBits) / size);
831            uint64_t result = FpDestReg.uqw;
832
833            for (int i = 0; i < items; i++) {
834                int hiIndex = (i + 1) * sizeBits - 1;
835                int loIndex = (i + 0) * sizeBits;
836                uint64_t arg1Bits = bits(FpSrcReg1.uqw, hiIndex, loIndex);
837                uint64_t arg2Bits = bits(FpSrcReg2.uqw, hiIndex, loIndex);
838                uint64_t resBits;
839
840                if (size == 4) {
841                    floatInt arg1, arg2, res;
842                    arg1.i = arg1Bits;
843                    arg2.i = arg2Bits;
844                    res.f = arg1.f / arg2.f;
845                    resBits = res.i;
846                } else {
847                    doubleInt arg1, arg2, res;
848                    arg1.i = arg1Bits;
849                    arg2.i = arg2Bits;
850                    res.d = arg1.d / arg2.d;
851                    resBits = res.i;
852                }
853
854                result = insertBits(result, hiIndex, loIndex, resBits);
855            }
856            FpDestReg.uqw = result;
857        '''
858
859    class Maddi(MediaOp):
860        code = '''
861            assert(srcSize == destSize);
862            int size = srcSize;
863            int sizeBits = size * 8;
864            int items = (ext & 0x1) ? 1: (sizeof(FloatRegBits) / size);
865            uint64_t result = FpDestReg.uqw;
866
867            for (int i = 0; i < items; i++) {
868                int hiIndex = (i + 1) * sizeBits - 1;
869                int loIndex = (i + 0) * sizeBits;
870                uint64_t arg1Bits = bits(FpSrcReg1.uqw, hiIndex, loIndex);
871                uint64_t arg2Bits = bits(FpSrcReg2.uqw, hiIndex, loIndex);
872                uint64_t resBits = arg1Bits + arg2Bits;
873                
874                if (ext & 0x2) {
875                    if (findCarry(sizeBits, resBits, arg1Bits, arg2Bits))
876                        resBits = mask(sizeBits);
877                } else if (ext & 0x4) {
878                    int arg1Sign = bits(arg1Bits, sizeBits - 1);
879                    int arg2Sign = bits(arg2Bits, sizeBits - 1);
880                    int resSign = bits(resBits, sizeBits - 1);
881                    if ((arg1Sign == arg2Sign) && (arg1Sign != resSign)) {
882                        if (resSign == 0)
883                            resBits = (1 << (sizeBits - 1));
884                        else
885                            resBits = mask(sizeBits - 1);
886                    }
887                }
888
889                result = insertBits(result, hiIndex, loIndex, resBits);
890            }
891            FpDestReg.uqw = result;
892        '''
893
894    class Msubi(MediaOp):
895        code = '''
896            assert(srcSize == destSize);
897            int size = srcSize;
898            int sizeBits = size * 8;
899            int items = (ext & 0x1) ? 1: (sizeof(FloatRegBits) / size);
900            uint64_t result = FpDestReg.uqw;
901
902            for (int i = 0; i < items; i++) {
903                int hiIndex = (i + 1) * sizeBits - 1;
904                int loIndex = (i + 0) * sizeBits;
905                uint64_t arg1Bits = bits(FpSrcReg1.uqw, hiIndex, loIndex);
906                uint64_t arg2Bits = bits(FpSrcReg2.uqw, hiIndex, loIndex);
907                uint64_t resBits = arg1Bits - arg2Bits;
908                
909                if (ext & 0x2) {
910                    if (arg2Bits > arg1Bits) {
911                        resBits = 0;
912                    } else if (!findCarry(sizeBits, resBits,
913                                         arg1Bits, ~arg2Bits)) {
914                        resBits = mask(sizeBits);
915                    }
916                } else if (ext & 0x4) {
917                    int arg1Sign = bits(arg1Bits, sizeBits - 1);
918                    int arg2Sign = !bits(arg2Bits, sizeBits - 1);
919                    int resSign = bits(resBits, sizeBits - 1);
920                    if ((arg1Sign == arg2Sign) && (arg1Sign != resSign)) {
921                        if (resSign == 0)
922                            resBits = (1 << (sizeBits - 1));
923                        else
924                            resBits = mask(sizeBits - 1);
925                    }
926                }
927
928                result = insertBits(result, hiIndex, loIndex, resBits);
929            }
930            FpDestReg.uqw = result;
931        '''
932
933    class Mmuli(MediaOp):
934        code = '''
935            int srcBits = srcSize * 8;
936            int destBits = destSize * 8;
937            assert(destBits <= 64);
938            assert(destSize >= srcSize);
939            int items = (ext & 0x1) ? 1: (sizeof(FloatRegBits) / destSize);
940            uint64_t result = FpDestReg.uqw;
941
942            for (int i = 0; i < items; i++) {
943                int offset = 0;
944                if (ext & 16) {
945                    if (ext & 32)
946                        offset = i * (destBits - srcBits);
947                    else
948                        offset = i * (destBits - srcBits) + srcBits;
949                }
950                int srcHiIndex = (i + 1) * srcBits - 1 + offset;
951                int srcLoIndex = (i + 0) * srcBits + offset;
952                uint64_t arg1Bits = bits(FpSrcReg1.uqw, srcHiIndex, srcLoIndex);
953                uint64_t arg2Bits = bits(FpSrcReg2.uqw, srcHiIndex, srcLoIndex);
954                uint64_t resBits;
955
956                if (ext & 0x2) {
957                    int64_t arg1 = arg1Bits |
958                        (0 - (arg1Bits & (1 << (srcBits - 1))));
959                    int64_t arg2 = arg2Bits |
960                        (0 - (arg2Bits & (1 << (srcBits - 1))));
961                    resBits = (uint64_t)(arg1 * arg2);
962                } else {
963                    resBits = arg1Bits * arg2Bits;
964                }
965
966                if (ext & 0x4)
967                    resBits += (1 << (destBits - 1));
968                
969                if (ext & 0x8)
970                    resBits >>= destBits;
971
972                int destHiIndex = (i + 1) * destBits - 1;
973                int destLoIndex = (i + 0) * destBits;
974                result = insertBits(result, destHiIndex, destLoIndex, resBits);
975            }
976            FpDestReg.uqw = result;
977        '''
978
979    class Mavg(MediaOp):
980        code = '''
981            assert(srcSize == destSize);
982            int size = srcSize;
983            int sizeBits = size * 8;
984            int items = (ext & 0x1) ? 1: (sizeof(FloatRegBits) / size);
985            uint64_t result = FpDestReg.uqw;
986
987            for (int i = 0; i < items; i++) {
988                int hiIndex = (i + 1) * sizeBits - 1;
989                int loIndex = (i + 0) * sizeBits;
990                uint64_t arg1Bits = bits(FpSrcReg1.uqw, hiIndex, loIndex);
991                uint64_t arg2Bits = bits(FpSrcReg2.uqw, hiIndex, loIndex);
992                uint64_t resBits = (arg1Bits + arg2Bits + 1) / 2;
993                
994                result = insertBits(result, hiIndex, loIndex, resBits);
995            }
996            FpDestReg.uqw = result;
997        '''
998
999    class Msad(MediaOp):
1000        code = '''
1001            int srcBits = srcSize * 8;
1002            int items = sizeof(FloatRegBits) / srcSize;
1003
1004            uint64_t sum = 0;
1005            for (int i = 0; i < items; i++) {
1006                int hiIndex = (i + 1) * srcBits - 1;
1007                int loIndex = (i + 0) * srcBits;
1008                uint64_t arg1Bits = bits(FpSrcReg1.uqw, hiIndex, loIndex);
1009                uint64_t arg2Bits = bits(FpSrcReg2.uqw, hiIndex, loIndex);
1010                int64_t resBits = arg1Bits - arg2Bits;
1011                if (resBits < 0)
1012                    resBits = -resBits;
1013                sum += resBits;
1014            }
1015            FpDestReg.uqw = sum & mask(destSize * 8);
1016        '''
1017
1018    class Msrl(MediaOp):
1019        code = '''
1020
1021            assert(srcSize == destSize);
1022            int size = srcSize;
1023            int sizeBits = size * 8;
1024            int items = (ext & 0x1) ? 1: (sizeof(FloatRegBits) / size);
1025            uint64_t shiftAmt = op2.uqw;
1026            uint64_t result = FpDestReg.uqw;
1027
1028            for (int i = 0; i < items; i++) {
1029                int hiIndex = (i + 1) * sizeBits - 1;
1030                int loIndex = (i + 0) * sizeBits;
1031                uint64_t arg1Bits = bits(FpSrcReg1.uqw, hiIndex, loIndex);
1032                uint64_t resBits;
1033                if (shiftAmt >= sizeBits) {
1034                    resBits = 0;
1035                } else {
1036                    resBits = (arg1Bits >> shiftAmt) &
1037                        mask(sizeBits - shiftAmt);
1038                }
1039
1040                result = insertBits(result, hiIndex, loIndex, resBits);
1041            }
1042            FpDestReg.uqw = result;
1043        '''
1044
1045    class Msra(MediaOp):
1046        code = '''
1047
1048            assert(srcSize == destSize);
1049            int size = srcSize;
1050            int sizeBits = size * 8;
1051            int items = (ext & 0x1) ? 1: (sizeof(FloatRegBits) / size);
1052            uint64_t shiftAmt = op2.uqw;
1053            uint64_t result = FpDestReg.uqw;
1054
1055            for (int i = 0; i < items; i++) {
1056                int hiIndex = (i + 1) * sizeBits - 1;
1057                int loIndex = (i + 0) * sizeBits;
1058                uint64_t arg1Bits = bits(FpSrcReg1.uqw, hiIndex, loIndex);
1059                uint64_t resBits;
1060                if (shiftAmt >= sizeBits) {
1061                    if (bits(arg1Bits, sizeBits - 1))
1062                        resBits = mask(sizeBits);
1063                    else
1064                        resBits = 0;
1065                } else {
1066                    resBits = (arg1Bits >> shiftAmt);
1067                    resBits = resBits |
1068                        (0 - (resBits & (1 << (sizeBits - 1 - shiftAmt))));
1069                }
1070
1071                result = insertBits(result, hiIndex, loIndex, resBits);
1072            }
1073            FpDestReg.uqw = result;
1074        '''
1075
1076    class Msll(MediaOp):
1077        code = '''
1078
1079            assert(srcSize == destSize);
1080            int size = srcSize;
1081            int sizeBits = size * 8;
1082            int items = (ext & 0x1) ? 1: (sizeof(FloatRegBits) / size);
1083            uint64_t shiftAmt = op2.uqw;
1084            uint64_t result = FpDestReg.uqw;
1085
1086            for (int i = 0; i < items; i++) {
1087                int hiIndex = (i + 1) * sizeBits - 1;
1088                int loIndex = (i + 0) * sizeBits;
1089                uint64_t arg1Bits = bits(FpSrcReg1.uqw, hiIndex, loIndex);
1090                uint64_t resBits;
1091                if (shiftAmt >= sizeBits) {
1092                    resBits = 0;
1093                } else {
1094                    resBits = (arg1Bits << shiftAmt);
1095                }
1096
1097                result = insertBits(result, hiIndex, loIndex, resBits);
1098            }
1099            FpDestReg.uqw = result;
1100        '''
1101
1102    class Cvti2f(MediaOp):
1103        def __init__(self, dest, src, \
1104                size = None, destSize = None, srcSize = None, ext = None):
1105            super(Cvti2f, self).__init__(dest, src,\
1106                    "InstRegIndex(0)", size, destSize, srcSize, ext)
1107        code = '''
1108            union floatInt
1109            {
1110                float f;
1111                uint32_t i;
1112            };
1113            union doubleInt
1114            {
1115                double d;
1116                uint64_t i;
1117            };
1118
1119            assert(destSize == 4 || destSize == 8);
1120            assert(srcSize == 4 || srcSize == 8);
1121            int srcSizeBits = srcSize * 8;
1122            int destSizeBits = destSize * 8;
1123            int items;
1124            int srcStart = 0;
1125            int destStart = 0;
1126            if (srcSize == 2 * destSize) {
1127                items = (ext & 0x1) ? 1: sizeof(FloatRegBits) / srcSize;
1128                if (ext & 0x2)
1129                    destStart = destSizeBits * items;
1130            } else if (destSize == 2 * srcSize) {
1131                items = (ext & 0x1) ? 1: sizeof(FloatRegBits) / destSize;
1132                if (ext & 0x2)
1133                    srcStart = srcSizeBits * items;
1134            } else {
1135                items = (ext & 0x1) ? 1: sizeof(FloatRegBits) / destSize;
1136            }
1137            uint64_t result = FpDestReg.uqw;
1138
1139            for (int i = 0; i < items; i++) {
1140                int srcHiIndex = srcStart + (i + 1) * srcSizeBits - 1;
1141                int srcLoIndex = srcStart + (i + 0) * srcSizeBits;
1142                uint64_t argBits = bits(FpSrcReg1.uqw, srcHiIndex, srcLoIndex);
1143                int64_t sArg = argBits | (0 - (argBits & (1 << srcHiIndex)));
1144                double arg = sArg;
1145
1146                if (destSize == 4) {
1147                    floatInt fi;
1148                    fi.f = arg;
1149                    argBits = fi.i;
1150                } else {
1151                    doubleInt di;
1152                    di.d = arg;
1153                    argBits = di.i;
1154                }
1155                int destHiIndex = destStart + (i + 1) * destSizeBits - 1;
1156                int destLoIndex = destStart + (i + 0) * destSizeBits;
1157                result = insertBits(result, destHiIndex, destLoIndex, argBits);
1158            }
1159            FpDestReg.uqw = result;
1160        '''
1161
1162    class Cvtf2f(MediaOp):
1163        def __init__(self, dest, src, \
1164                size = None, destSize = None, srcSize = None, ext = None):
1165            super(Cvtf2f, self).__init__(dest, src,\
1166                    "InstRegIndex(0)", size, destSize, srcSize, ext)
1167        code = '''
1168            union floatInt
1169            {
1170                float f;
1171                uint32_t i;
1172            };
1173            union doubleInt
1174            {
1175                double d;
1176                uint64_t i;
1177            };
1178
1179            assert(destSize == 4 || destSize == 8);
1180            assert(srcSize == 4 || srcSize == 8);
1181            int srcSizeBits = srcSize * 8;
1182            int destSizeBits = destSize * 8;
1183            int items;
1184            int srcStart = 0;
1185            int destStart = 0;
1186            if (srcSize == 2 * destSize) {
1187                items = (ext & 0x1) ? 1: sizeof(FloatRegBits) / srcSize;
1188                if (ext & 0x2)
1189                    destStart = destSizeBits * items;
1190            } else if (destSize == 2 * srcSize) {
1191                items = (ext & 0x1) ? 1: sizeof(FloatRegBits) / destSize;
1192                if (ext & 0x2)
1193                    srcStart = srcSizeBits * items;
1194            } else {
1195                items = (ext & 0x1) ? 1: sizeof(FloatRegBits) / destSize;
1196            }
1197            uint64_t result = FpDestReg.uqw;
1198
1199            for (int i = 0; i < items; i++) {
1200                int srcHiIndex = srcStart + (i + 1) * srcSizeBits - 1;
1201                int srcLoIndex = srcStart + (i + 0) * srcSizeBits;
1202                uint64_t argBits = bits(FpSrcReg1.uqw, srcHiIndex, srcLoIndex);
1203                double arg;
1204
1205                if (srcSize == 4) {
1206                    floatInt fi;
1207                    fi.i = argBits;
1208                    arg = fi.f;
1209                } else {
1210                    doubleInt di;
1211                    di.i = argBits;
1212                    arg = di.d;
1213                }
1214                if (destSize == 4) {
1215                    floatInt fi;
1216                    fi.f = arg;
1217                    argBits = fi.i;
1218                } else {
1219                    doubleInt di;
1220                    di.d = arg;
1221                    argBits = di.i;
1222                }
1223                int destHiIndex = destStart + (i + 1) * destSizeBits - 1;
1224                int destLoIndex = destStart + (i + 0) * destSizeBits;
1225                result = insertBits(result, destHiIndex, destLoIndex, argBits);
1226            }
1227            FpDestReg.uqw = result;
1228        '''
1229
1230    class Mcmpi2r(MediaOp):
1231        code = '''
1232            union floatInt
1233            {
1234                float f;
1235                uint32_t i;
1236            };
1237            union doubleInt
1238            {
1239                double d;
1240                uint64_t i;
1241            };
1242
1243            assert(srcSize == destSize);
1244            int size = srcSize;
1245            int sizeBits = size * 8;
1246            int items = (ext & 0x1) ? 1: (sizeof(FloatRegBits) / size);
1247            uint64_t result = FpDestReg.uqw;
1248
1249            for (int i = 0; i < items; i++) {
1250                int hiIndex = (i + 1) * sizeBits - 1;
1251                int loIndex = (i + 0) * sizeBits;
1252                uint64_t arg1Bits = bits(FpSrcReg1.uqw, hiIndex, loIndex);
1253                int64_t arg1 = arg1Bits |
1254                    (0 - (arg1Bits & (1 << (sizeBits - 1))));
1255                uint64_t arg2Bits = bits(FpSrcReg2.uqw, hiIndex, loIndex);
1256                int64_t arg2 = arg2Bits |
1257                    (0 - (arg2Bits & (1 << (sizeBits - 1))));
1258
1259                uint64_t resBits = 0;
1260                if ((ext & 0x2) == 0 && arg1 == arg2 ||
1261                        (ext & 0x2) == 0x2 && arg1 > arg2)
1262                    resBits = mask(sizeBits);
1263
1264                result = insertBits(result, hiIndex, loIndex, resBits);
1265            }
1266            FpDestReg.uqw = result;
1267        '''
1268}};
1269