mediaop.isa revision 6594:a5dbea7ba3f9
1/// Copyright (c) 2009 The Regents of The University of Michigan
2// All rights reserved.
3//
4// Redistribution and use in source and binary forms, with or without
5// modification, are permitted provided that the following conditions are
6// met: redistributions of source code must retain the above copyright
7// notice, this list of conditions and the following disclaimer;
8// redistributions in binary form must reproduce the above copyright
9// notice, this list of conditions and the following disclaimer in the
10// documentation and/or other materials provided with the distribution;
11// neither the name of the copyright holders nor the names of its
12// contributors may be used to endorse or promote products derived from
13// this software without specific prior written permission.
14//
15// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
16// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
17// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
18// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
19// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
20// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
21// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
22// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
23// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
24// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
25// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
26//
27// Authors: Gabe Black
28
29def template MediaOpExecute {{
30        Fault %(class_name)s::execute(%(CPU_exec_context)s *xc,
31                Trace::InstRecord *traceData) const
32        {
33            Fault fault = NoFault;
34
35            %(op_decl)s;
36            %(op_rd)s;
37
38            %(code)s;
39
40            //Write the resulting state to the execution context
41            if(fault == NoFault)
42            {
43                %(op_wb)s;
44            }
45            return fault;
46        }
47}};
48
49def template MediaOpRegDeclare {{
50    class %(class_name)s : public %(base_class)s
51    {
52      protected:
53        void buildMe();
54
55      public:
56        %(class_name)s(ExtMachInst _machInst,
57                const char * instMnem,
58                bool isMicro, bool isDelayed, bool isFirst, bool isLast,
59                InstRegIndex _src1, InstRegIndex _src2, InstRegIndex _dest,
60                uint8_t _srcSize, uint8_t _destSize, uint16_t _ext);
61
62        %(class_name)s(ExtMachInst _machInst,
63                const char * instMnem,
64                InstRegIndex _src1, InstRegIndex _src2, InstRegIndex _dest,
65                uint8_t _srcSize, uint8_t _destSize, uint16_t _ext);
66
67        %(BasicExecDeclare)s
68    };
69}};
70
71def template MediaOpImmDeclare {{
72
73    class %(class_name)s : public %(base_class)s
74    {
75      protected:
76        void buildMe();
77
78      public:
79        %(class_name)s(ExtMachInst _machInst,
80                const char * instMnem,
81                bool isMicro, bool isDelayed, bool isFirst, bool isLast,
82                InstRegIndex _src1, uint16_t _imm8, InstRegIndex _dest,
83                uint8_t _srcSize, uint8_t _destSize, uint16_t _ext);
84
85        %(class_name)s(ExtMachInst _machInst,
86                const char * instMnem,
87                InstRegIndex _src1, uint16_t _imm8, InstRegIndex _dest,
88                uint8_t _srcSize, uint8_t _destSize, uint16_t _ext);
89
90        %(BasicExecDeclare)s
91    };
92}};
93
94def template MediaOpRegConstructor {{
95
96    inline void %(class_name)s::buildMe()
97    {
98        %(constructor)s;
99    }
100
101    inline %(class_name)s::%(class_name)s(
102            ExtMachInst machInst, const char * instMnem,
103            InstRegIndex _src1, InstRegIndex _src2, InstRegIndex _dest,
104            uint8_t _srcSize, uint8_t _destSize, uint16_t _ext) :
105        %(base_class)s(machInst, "%(mnemonic)s", instMnem,
106                false, false, false, false,
107                _src1, _src2, _dest, _srcSize, _destSize, _ext,
108                %(op_class)s)
109    {
110        buildMe();
111    }
112
113    inline %(class_name)s::%(class_name)s(
114            ExtMachInst machInst, const char * instMnem,
115            bool isMicro, bool isDelayed, bool isFirst, bool isLast,
116            InstRegIndex _src1, InstRegIndex _src2, InstRegIndex _dest,
117            uint8_t _srcSize, uint8_t _destSize, uint16_t _ext) :
118        %(base_class)s(machInst, "%(mnemonic)s", instMnem,
119                isMicro, isDelayed, isFirst, isLast,
120                _src1, _src2, _dest, _srcSize, _destSize, _ext,
121                %(op_class)s)
122    {
123        buildMe();
124    }
125}};
126
127def template MediaOpImmConstructor {{
128
129    inline void %(class_name)s::buildMe()
130    {
131        %(constructor)s;
132    }
133
134    inline %(class_name)s::%(class_name)s(
135            ExtMachInst machInst, const char * instMnem,
136            InstRegIndex _src1, uint16_t _imm8, InstRegIndex _dest,
137            uint8_t _srcSize, uint8_t _destSize, uint16_t _ext) :
138        %(base_class)s(machInst, "%(mnemonic)s", instMnem,
139                false, false, false, false,
140                _src1, _imm8, _dest, _srcSize, _destSize, _ext,
141                %(op_class)s)
142    {
143        buildMe();
144    }
145
146    inline %(class_name)s::%(class_name)s(
147            ExtMachInst machInst, const char * instMnem,
148            bool isMicro, bool isDelayed, bool isFirst, bool isLast,
149            InstRegIndex _src1, uint16_t _imm8, InstRegIndex _dest,
150            uint8_t _srcSize, uint8_t _destSize, uint16_t _ext) :
151        %(base_class)s(machInst, "%(mnemonic)s", instMnem,
152                isMicro, isDelayed, isFirst, isLast,
153                _src1, _imm8, _dest, _srcSize, _destSize, _ext,
154                %(op_class)s)
155    {
156        buildMe();
157    }
158}};
159
160let {{
161    # Make these empty strings so that concatenating onto
162    # them will always work.
163    header_output = ""
164    decoder_output = ""
165    exec_output = ""
166
167    immTemplates = (
168            MediaOpImmDeclare,
169            MediaOpImmConstructor,
170            MediaOpExecute)
171
172    regTemplates = (
173            MediaOpRegDeclare,
174            MediaOpRegConstructor,
175            MediaOpExecute)
176
177    class MediaOpMeta(type):
178        def buildCppClasses(self, name, Name, suffix, code):
179
180            # Globals to stick the output in
181            global header_output
182            global decoder_output
183            global exec_output
184
185            # If op2 is used anywhere, make register and immediate versions
186            # of this code.
187            matcher = re.compile("(?<!\\w)(?P<prefix>s?)op2(?P<typeQual>\\.\\w+)?")
188            match = matcher.search(code)
189            if match:
190                typeQual = ""
191                if match.group("typeQual"):
192                    typeQual = match.group("typeQual")
193                src2_name = "%sFpSrcReg2%s" % (match.group("prefix"), typeQual)
194                self.buildCppClasses(name, Name, suffix,
195                        matcher.sub(src2_name, code))
196                self.buildCppClasses(name + "i", Name, suffix + "Imm",
197                        matcher.sub("imm8", code))
198                return
199
200            base = "X86ISA::MediaOp"
201
202            # If imm8 shows up in the code, use the immediate templates, if
203            # not, hopefully the register ones will be correct.
204            matcher = re.compile("(?<!\w)imm8(?!\w)")
205            if matcher.search(code):
206                base += "Imm"
207                templates = immTemplates
208            else:
209                base += "Reg"
210                templates = regTemplates
211
212            # Get everything ready for the substitution
213            iop = InstObjParams(name, Name + suffix, base, {"code" : code})
214
215            # Generate the actual code (finally!)
216            header_output += templates[0].subst(iop)
217            decoder_output += templates[1].subst(iop)
218            exec_output += templates[2].subst(iop)
219
220
221        def __new__(mcls, Name, bases, dict):
222            abstract = False
223            name = Name.lower()
224            if "abstract" in dict:
225                abstract = dict['abstract']
226                del dict['abstract']
227
228            cls = super(MediaOpMeta, mcls).__new__(mcls, Name, bases, dict)
229            if not abstract:
230                cls.className = Name
231                cls.base_mnemonic = name
232                code = cls.code
233
234                # Set up the C++ classes
235                mcls.buildCppClasses(cls, name, Name, "", code)
236
237                # Hook into the microassembler dict
238                global microopClasses
239                microopClasses[name] = cls
240
241                # If op2 is used anywhere, make register and immediate versions
242                # of this code.
243                matcher = re.compile("op2(?P<typeQual>\\.\\w+)?")
244                if matcher.search(code):
245                    microopClasses[name + 'i'] = cls
246            return cls
247
248
249    class MediaOp(X86Microop):
250        __metaclass__ = MediaOpMeta
251        # This class itself doesn't act as a microop
252        abstract = True
253
254        def __init__(self, dest, src1, op2,
255                size = None, destSize = None, srcSize = None, ext = None):
256            self.dest = dest
257            self.src1 = src1
258            self.op2 = op2
259            if size is not None:
260                self.srcSize = size
261                self.destSize = size
262            if srcSize is not None:
263                self.srcSize = srcSize
264            if destSize is not None:
265                self.destSize = destSize
266            if self.srcSize is None:
267                raise Exception, "Source size not set."
268            if self.destSize is None:
269                raise Exception, "Dest size not set."
270            if ext is None:
271                self.ext = 0
272            else:
273                self.ext = ext 
274
275        def getAllocator(self, *microFlags):
276            className = self.className
277            if self.mnemonic == self.base_mnemonic + 'i':
278                className += "Imm"
279            allocator = '''new %(class_name)s(machInst, macrocodeBlock
280                    %(flags)s, %(src1)s, %(op2)s, %(dest)s,
281                    %(srcSize)s, %(destSize)s, %(ext)s)''' % {
282                "class_name" : className,
283                "flags" : self.microFlagsText(microFlags),
284                "src1" : self.src1, "op2" : self.op2,
285                "dest" : self.dest,
286                "srcSize" : self.srcSize,
287                "destSize" : self.destSize,
288                "ext" : self.ext}
289            return allocator
290
291    class Mov2int(MediaOp):
292        def __init__(self, dest, src1, src2 = 0, \
293                size = None, destSize = None, srcSize = None, ext = None):
294            super(Mov2int, self).__init__(dest, src1,\
295                    src2, size, destSize, srcSize, ext)
296        code = '''
297            int items = sizeof(FloatRegBits) / srcSize;
298            int offset = imm8;
299            if (bits(src1, 0) && (ext & 0x1))
300                offset -= items;
301            if (offset >= 0 && offset < items) {
302                uint64_t fpSrcReg1 =
303                    bits(FpSrcReg1.uqw,
304                            (offset + 1) * srcSize * 8 - 1,
305                            (offset + 0) * srcSize * 8);
306                DestReg = merge(0, fpSrcReg1, destSize);
307            } else {
308                DestReg = DestReg;
309            }
310        '''
311
312    class Mov2fp(MediaOp):
313        def __init__(self, dest, src1, src2 = 0, \
314                size = None, destSize = None, srcSize = None, ext = None):
315            super(Mov2fp, self).__init__(dest, src1,\
316                    src2, size, destSize, srcSize, ext)
317        code = '''
318            int items = sizeof(FloatRegBits) / destSize;
319            int offset = imm8;
320            if (bits(dest, 0) && (ext & 0x1))
321                offset -= items;
322            if (offset >= 0 && offset < items) {
323                uint64_t srcReg1 = pick(SrcReg1, 0, srcSize);
324                FpDestReg.uqw =
325                    insertBits(FpDestReg.uqw,
326                            (offset + 1) * destSize * 8 - 1,
327                            (offset + 0) * destSize * 8, srcReg1);
328            } else {
329                FpDestReg.uqw = FpDestReg.uqw;
330            }
331        '''
332
333    class Movsign(MediaOp):
334        def __init__(self, dest, src, \
335                size = None, destSize = None, srcSize = None, ext = None):
336            super(Movsign, self).__init__(dest, src,\
337                    "InstRegIndex(0)", size, destSize, srcSize, ext)
338        code = '''
339            int items = sizeof(FloatRegBits) / srcSize;
340            uint64_t result = 0;
341            int offset = (ext & 0x1) ? items : 0;
342            for (int i = 0; i < items; i++) {
343                uint64_t picked =
344                    bits(FpSrcReg1.uqw, (i + 1) * 8 * srcSize - 1);
345                result = insertBits(result, i + offset, i + offset, picked);
346            }
347            DestReg = DestReg | result;
348        '''
349
350    class Maskmov(MediaOp):
351        code = '''
352            assert(srcSize == destSize);
353            int size = srcSize;
354            int sizeBits = size * 8;
355            int items = (ext & 0x1) ? 1: (sizeof(FloatRegBits) / size);
356            uint64_t result = FpDestReg.uqw;
357
358            for (int i = 0; i < items; i++) {
359                int hiIndex = (i + 1) * sizeBits - 1;
360                int loIndex = (i + 0) * sizeBits;
361                uint64_t arg1Bits = bits(FpSrcReg1.uqw, hiIndex, loIndex);
362                if (bits(FpSrcReg2.uqw, hiIndex))
363                    result = insertBits(result, hiIndex, loIndex, arg1Bits);
364            }
365            FpDestReg.uqw = result;
366        '''
367
368    class Unpack(MediaOp):
369        code = '''
370            assert(srcSize == destSize);
371            int size = destSize;
372            int items = (sizeof(FloatRegBits) / size) / 2;
373            int offset = ext ? items : 0;
374            uint64_t result = 0;
375            for (int i = 0; i < items; i++) {
376                uint64_t pickedLow =
377                    bits(FpSrcReg1.uqw, (i + offset + 1) * 8 * size - 1,
378                                        (i + offset) * 8 * size);
379                result = insertBits(result,
380                                    (2 * i + 1) * 8 * size - 1,
381                                    (2 * i + 0) * 8 * size,
382                                    pickedLow);
383                uint64_t pickedHigh =
384                    bits(FpSrcReg2.uqw, (i + offset + 1) * 8 * size - 1,
385                                        (i + offset) * 8 * size);
386                result = insertBits(result,
387                                    (2 * i + 2) * 8 * size - 1,
388                                    (2 * i + 1) * 8 * size,
389                                    pickedHigh);
390            }
391            FpDestReg.uqw = result;
392        '''
393
394    class Pack(MediaOp):
395        code = '''
396            assert(srcSize == destSize * 2);
397            int items = (sizeof(FloatRegBits) / destSize);
398            int destBits = destSize * 8;
399            int srcBits = srcSize * 8;
400            uint64_t result = 0;
401            int i;
402            for (i = 0; i < items / 2; i++) {
403                uint64_t picked =
404                    bits(FpSrcReg1.uqw, (i + 1) * srcBits - 1,
405                                        (i + 0) * srcBits);
406                unsigned signBit = bits(picked, srcBits - 1);
407                uint64_t overflow = bits(picked, srcBits - 1, destBits - 1);
408
409                // Handle saturation.
410                if (signBit) {
411                    if (overflow != mask(destBits - srcBits + 1)) {
412                        if (ext & 0x1)
413                            picked = (1 << (destBits - 1));
414                        else
415                            picked = 0;
416                    }
417                } else {
418                    if (overflow != 0) {
419                        if (ext & 0x1)
420                            picked = mask(destBits - 1);
421                        else
422                            picked = mask(destBits);
423                    }
424                }
425                result = insertBits(result,
426                                    (i + 1) * destBits - 1,
427                                    (i + 0) * destBits,
428                                    picked);
429            }
430            for (;i < items; i++) {
431                uint64_t picked =
432                    bits(FpSrcReg2.uqw, (i - items + 1) * srcBits - 1,
433                                        (i - items + 0) * srcBits);
434                unsigned signBit = bits(picked, srcBits - 1);
435                uint64_t overflow = bits(picked, srcBits - 1, destBits - 1);
436
437                // Handle saturation.
438                if (signBit) {
439                    if (overflow != mask(destBits - srcBits + 1)) {
440                        if (ext & 0x1)
441                            picked = (1 << (destBits - 1));
442                        else
443                            picked = 0;
444                    }
445                } else {
446                    if (overflow != 0) {
447                        if (ext & 0x1)
448                            picked = mask(destBits - 1);
449                        else
450                            picked = mask(destBits);
451                    }
452                }
453                result = insertBits(result,
454                                    (i + 1) * destBits - 1,
455                                    (i + 0) * destBits,
456                                    picked);
457            }
458            FpDestReg.uqw = result;
459        '''
460
461    class Mxor(MediaOp):
462        def __init__(self, dest, src1, src2):
463            super(Mxor, self).__init__(dest, src1, src2, 1)
464        code = '''
465            FpDestReg.uqw = FpSrcReg1.uqw ^ FpSrcReg2.uqw;
466        '''
467
468    class Mor(MediaOp):
469        def __init__(self, dest, src1, src2):
470            super(Mor, self).__init__(dest, src1, src2, 1)
471        code = '''
472            FpDestReg.uqw = FpSrcReg1.uqw | FpSrcReg2.uqw;
473        '''
474
475    class Mand(MediaOp):
476        def __init__(self, dest, src1, src2):
477            super(Mand, self).__init__(dest, src1, src2, 1)
478        code = '''
479            FpDestReg.uqw = FpSrcReg1.uqw & FpSrcReg2.uqw;
480        '''
481
482    class Mandn(MediaOp):
483        def __init__(self, dest, src1, src2):
484            super(Mandn, self).__init__(dest, src1, src2, 1)
485        code = '''
486            FpDestReg.uqw = ~FpSrcReg1.uqw & FpSrcReg2.uqw;
487        '''
488
489    class Mminf(MediaOp):
490        code = '''
491            union floatInt
492            {
493                float f;
494                uint32_t i;
495            };
496            union doubleInt
497            {
498                double d;
499                uint64_t i;
500            };
501
502            assert(srcSize == destSize);
503            int size = srcSize;
504            int sizeBits = size * 8;
505            assert(srcSize == 4 || srcSize == 8);
506            int items = (ext & 0x1) ? 1: (sizeof(FloatRegBits) / size);
507            uint64_t result = FpDestReg.uqw;
508
509            for (int i = 0; i < items; i++) {
510                double arg1, arg2;
511                int hiIndex = (i + 1) * sizeBits - 1;
512                int loIndex = (i + 0) * sizeBits;
513                uint64_t arg1Bits = bits(FpSrcReg1.uqw, hiIndex, loIndex);
514                uint64_t arg2Bits = bits(FpSrcReg2.uqw, hiIndex, loIndex);
515
516                if (size == 4) {
517                    floatInt fi;
518                    fi.i = arg1Bits;
519                    arg1 = fi.f;
520                    fi.i = arg2Bits;
521                    arg2 = fi.f;
522                } else {
523                    doubleInt di;
524                    di.i = arg1Bits;
525                    arg1 = di.d;
526                    di.i = arg2Bits;
527                    arg2 = di.d;
528                }
529
530                if (arg1 < arg2) {
531                    result = insertBits(result, hiIndex, loIndex, arg1Bits);
532                } else {
533                    result = insertBits(result, hiIndex, loIndex, arg2Bits);
534                }
535            }
536            FpDestReg.uqw = result;
537        '''
538
539    class Mmaxf(MediaOp):
540        code = '''
541            union floatInt
542            {
543                float f;
544                uint32_t i;
545            };
546            union doubleInt
547            {
548                double d;
549                uint64_t i;
550            };
551
552            assert(srcSize == destSize);
553            int size = srcSize;
554            int sizeBits = size * 8;
555            assert(srcSize == 4 || srcSize == 8);
556            int items = (ext & 0x1) ? 1: (sizeof(FloatRegBits) / size);
557            uint64_t result = FpDestReg.uqw;
558
559            for (int i = 0; i < items; i++) {
560                double arg1, arg2;
561                int hiIndex = (i + 1) * sizeBits - 1;
562                int loIndex = (i + 0) * sizeBits;
563                uint64_t arg1Bits = bits(FpSrcReg1.uqw, hiIndex, loIndex);
564                uint64_t arg2Bits = bits(FpSrcReg2.uqw, hiIndex, loIndex);
565
566                if (size == 4) {
567                    floatInt fi;
568                    fi.i = arg1Bits;
569                    arg1 = fi.f;
570                    fi.i = arg2Bits;
571                    arg2 = fi.f;
572                } else {
573                    doubleInt di;
574                    di.i = arg1Bits;
575                    arg1 = di.d;
576                    di.i = arg2Bits;
577                    arg2 = di.d;
578                }
579
580                if (arg1 > arg2) {
581                    result = insertBits(result, hiIndex, loIndex, arg1Bits);
582                } else {
583                    result = insertBits(result, hiIndex, loIndex, arg2Bits);
584                }
585            }
586            FpDestReg.uqw = result;
587        '''
588
589    class Mmini(MediaOp):
590        code = '''
591
592            assert(srcSize == destSize);
593            int size = srcSize;
594            int sizeBits = size * 8;
595            int items = (ext & 0x1) ? 1: (sizeof(FloatRegBits) / size);
596            uint64_t result = FpDestReg.uqw;
597
598            for (int i = 0; i < items; i++) {
599                int hiIndex = (i + 1) * sizeBits - 1;
600                int loIndex = (i + 0) * sizeBits;
601                uint64_t arg1Bits = bits(FpSrcReg1.uqw, hiIndex, loIndex);
602                int64_t arg1 = arg1Bits |
603                    (0 - (arg1Bits & (1 << (sizeBits - 1))));
604                uint64_t arg2Bits = bits(FpSrcReg2.uqw, hiIndex, loIndex);
605                int64_t arg2 = arg2Bits |
606                    (0 - (arg2Bits & (1 << (sizeBits - 1))));
607                uint64_t resBits;
608
609                if (ext & 0x2) {
610                    if (arg1 < arg2) {
611                        resBits = arg1Bits;
612                    } else {
613                        resBits = arg2Bits;
614                    }
615                } else {
616                    if (arg1Bits < arg2Bits) {
617                        resBits = arg1Bits;
618                    } else {
619                        resBits = arg2Bits;
620                    }
621                }
622                result = insertBits(result, hiIndex, loIndex, resBits);
623            }
624            FpDestReg.uqw = result;
625        '''
626
627    class Mmaxi(MediaOp):
628        code = '''
629
630            assert(srcSize == destSize);
631            int size = srcSize;
632            int sizeBits = size * 8;
633            int items = (ext & 0x1) ? 1: (sizeof(FloatRegBits) / size);
634            uint64_t result = FpDestReg.uqw;
635
636            for (int i = 0; i < items; i++) {
637                int hiIndex = (i + 1) * sizeBits - 1;
638                int loIndex = (i + 0) * sizeBits;
639                uint64_t arg1Bits = bits(FpSrcReg1.uqw, hiIndex, loIndex);
640                int64_t arg1 = arg1Bits |
641                    (0 - (arg1Bits & (1 << (sizeBits - 1))));
642                uint64_t arg2Bits = bits(FpSrcReg2.uqw, hiIndex, loIndex);
643                int64_t arg2 = arg2Bits |
644                    (0 - (arg2Bits & (1 << (sizeBits - 1))));
645                uint64_t resBits;
646
647                if (ext & 0x2) {
648                    if (arg1 > arg2) {
649                        resBits = arg1Bits;
650                    } else {
651                        resBits = arg2Bits;
652                    }
653                } else {
654                    if (arg1Bits > arg2Bits) {
655                        resBits = arg1Bits;
656                    } else {
657                        resBits = arg2Bits;
658                    }
659                }
660                result = insertBits(result, hiIndex, loIndex, resBits);
661            }
662            FpDestReg.uqw = result;
663        '''
664
665    class Msqrt(MediaOp):
666        def __init__(self, dest, src, \
667                size = None, destSize = None, srcSize = None, ext = None):
668            super(Msqrt, self).__init__(dest, src,\
669                    "InstRegIndex(0)", size, destSize, srcSize, ext)
670        code = '''
671            union floatInt
672            {
673                float f;
674                uint32_t i;
675            };
676            union doubleInt
677            {
678                double d;
679                uint64_t i;
680            };
681
682            assert(srcSize == destSize);
683            int size = srcSize;
684            int sizeBits = size * 8;
685            assert(srcSize == 4 || srcSize == 8);
686            int items = (ext & 0x1) ? 1: (sizeof(FloatRegBits) / size);
687            uint64_t result = FpDestReg.uqw;
688
689            for (int i = 0; i < items; i++) {
690                int hiIndex = (i + 1) * sizeBits - 1;
691                int loIndex = (i + 0) * sizeBits;
692                uint64_t argBits = bits(FpSrcReg1.uqw, hiIndex, loIndex);
693
694                if (size == 4) {
695                    floatInt fi;
696                    fi.i = argBits;
697                    fi.f = sqrt(fi.f);
698                    argBits = fi.i;
699                } else {
700                    doubleInt di;
701                    di.i = argBits;
702                    di.d = sqrt(di.d);
703                    argBits = di.i;
704                }
705                result = insertBits(result, hiIndex, loIndex, argBits);
706            }
707            FpDestReg.uqw = result;
708        '''
709
710    class Maddf(MediaOp):
711        code = '''
712            union floatInt
713            {
714                float f;
715                uint32_t i;
716            };
717            union doubleInt
718            {
719                double d;
720                uint64_t i;
721            };
722
723            assert(srcSize == destSize);
724            int size = srcSize;
725            int sizeBits = size * 8;
726            assert(srcSize == 4 || srcSize == 8);
727            int items = (ext & 0x1) ? 1: (sizeof(FloatRegBits) / size);
728            uint64_t result = FpDestReg.uqw;
729
730            for (int i = 0; i < items; i++) {
731                int hiIndex = (i + 1) * sizeBits - 1;
732                int loIndex = (i + 0) * sizeBits;
733                uint64_t arg1Bits = bits(FpSrcReg1.uqw, hiIndex, loIndex);
734                uint64_t arg2Bits = bits(FpSrcReg2.uqw, hiIndex, loIndex);
735                uint64_t resBits;
736
737                if (size == 4) {
738                    floatInt arg1, arg2, res;
739                    arg1.i = arg1Bits;
740                    arg2.i = arg2Bits;
741                    res.f = arg1.f + arg2.f;
742                    resBits = res.i;
743                } else {
744                    doubleInt arg1, arg2, res;
745                    arg1.i = arg1Bits;
746                    arg2.i = arg2Bits;
747                    res.d = arg1.d + arg2.d;
748                    resBits = res.i;
749                }
750
751                result = insertBits(result, hiIndex, loIndex, resBits);
752            }
753            FpDestReg.uqw = result;
754        '''
755
756    class Msubf(MediaOp):
757        code = '''
758            union floatInt
759            {
760                float f;
761                uint32_t i;
762            };
763            union doubleInt
764            {
765                double d;
766                uint64_t i;
767            };
768
769            assert(srcSize == destSize);
770            int size = srcSize;
771            int sizeBits = size * 8;
772            assert(srcSize == 4 || srcSize == 8);
773            int items = (ext & 0x1) ? 1: (sizeof(FloatRegBits) / size);
774            uint64_t result = FpDestReg.uqw;
775
776            for (int i = 0; i < items; i++) {
777                int hiIndex = (i + 1) * sizeBits - 1;
778                int loIndex = (i + 0) * sizeBits;
779                uint64_t arg1Bits = bits(FpSrcReg1.uqw, hiIndex, loIndex);
780                uint64_t arg2Bits = bits(FpSrcReg2.uqw, hiIndex, loIndex);
781                uint64_t resBits;
782
783                if (size == 4) {
784                    floatInt arg1, arg2, res;
785                    arg1.i = arg1Bits;
786                    arg2.i = arg2Bits;
787                    res.f = arg1.f - arg2.f;
788                    resBits = res.i;
789                } else {
790                    doubleInt arg1, arg2, res;
791                    arg1.i = arg1Bits;
792                    arg2.i = arg2Bits;
793                    res.d = arg1.d - arg2.d;
794                    resBits = res.i;
795                }
796
797                result = insertBits(result, hiIndex, loIndex, resBits);
798            }
799            FpDestReg.uqw = result;
800        '''
801
802    class Mmulf(MediaOp):
803        code = '''
804            union floatInt
805            {
806                float f;
807                uint32_t i;
808            };
809            union doubleInt
810            {
811                double d;
812                uint64_t i;
813            };
814
815            assert(srcSize == destSize);
816            int size = srcSize;
817            int sizeBits = size * 8;
818            assert(srcSize == 4 || srcSize == 8);
819            int items = (ext & 0x1) ? 1: (sizeof(FloatRegBits) / size);
820            uint64_t result = FpDestReg.uqw;
821
822            for (int i = 0; i < items; i++) {
823                int hiIndex = (i + 1) * sizeBits - 1;
824                int loIndex = (i + 0) * sizeBits;
825                uint64_t arg1Bits = bits(FpSrcReg1.uqw, hiIndex, loIndex);
826                uint64_t arg2Bits = bits(FpSrcReg2.uqw, hiIndex, loIndex);
827                uint64_t resBits;
828
829                if (size == 4) {
830                    floatInt arg1, arg2, res;
831                    arg1.i = arg1Bits;
832                    arg2.i = arg2Bits;
833                    res.f = arg1.f * arg2.f;
834                    resBits = res.i;
835                } else {
836                    doubleInt arg1, arg2, res;
837                    arg1.i = arg1Bits;
838                    arg2.i = arg2Bits;
839                    res.d = arg1.d * arg2.d;
840                    resBits = res.i;
841                }
842
843                result = insertBits(result, hiIndex, loIndex, resBits);
844            }
845            FpDestReg.uqw = result;
846        '''
847
848    class Mdivf(MediaOp):
849        code = '''
850            union floatInt
851            {
852                float f;
853                uint32_t i;
854            };
855            union doubleInt
856            {
857                double d;
858                uint64_t i;
859            };
860
861            assert(srcSize == destSize);
862            int size = srcSize;
863            int sizeBits = size * 8;
864            assert(srcSize == 4 || srcSize == 8);
865            int items = (ext & 0x1) ? 1: (sizeof(FloatRegBits) / size);
866            uint64_t result = FpDestReg.uqw;
867
868            for (int i = 0; i < items; i++) {
869                int hiIndex = (i + 1) * sizeBits - 1;
870                int loIndex = (i + 0) * sizeBits;
871                uint64_t arg1Bits = bits(FpSrcReg1.uqw, hiIndex, loIndex);
872                uint64_t arg2Bits = bits(FpSrcReg2.uqw, hiIndex, loIndex);
873                uint64_t resBits;
874
875                if (size == 4) {
876                    floatInt arg1, arg2, res;
877                    arg1.i = arg1Bits;
878                    arg2.i = arg2Bits;
879                    res.f = arg1.f / arg2.f;
880                    resBits = res.i;
881                } else {
882                    doubleInt arg1, arg2, res;
883                    arg1.i = arg1Bits;
884                    arg2.i = arg2Bits;
885                    res.d = arg1.d / arg2.d;
886                    resBits = res.i;
887                }
888
889                result = insertBits(result, hiIndex, loIndex, resBits);
890            }
891            FpDestReg.uqw = result;
892        '''
893
894    class Maddi(MediaOp):
895        code = '''
896            assert(srcSize == destSize);
897            int size = srcSize;
898            int sizeBits = size * 8;
899            int items = (ext & 0x1) ? 1: (sizeof(FloatRegBits) / size);
900            uint64_t result = FpDestReg.uqw;
901
902            for (int i = 0; i < items; i++) {
903                int hiIndex = (i + 1) * sizeBits - 1;
904                int loIndex = (i + 0) * sizeBits;
905                uint64_t arg1Bits = bits(FpSrcReg1.uqw, hiIndex, loIndex);
906                uint64_t arg2Bits = bits(FpSrcReg2.uqw, hiIndex, loIndex);
907                uint64_t resBits = arg1Bits + arg2Bits;
908                
909                if (ext & 0x2) {
910                    if (findCarry(sizeBits, resBits, arg1Bits, arg2Bits))
911                        resBits = mask(sizeBits);
912                } else if (ext & 0x4) {
913                    int arg1Sign = bits(arg1Bits, sizeBits - 1);
914                    int arg2Sign = bits(arg2Bits, sizeBits - 1);
915                    int resSign = bits(resBits, sizeBits - 1);
916                    if ((arg1Sign == arg2Sign) && (arg1Sign != resSign)) {
917                        if (resSign == 0)
918                            resBits = (1 << (sizeBits - 1));
919                        else
920                            resBits = mask(sizeBits - 1);
921                    }
922                }
923
924                result = insertBits(result, hiIndex, loIndex, resBits);
925            }
926            FpDestReg.uqw = result;
927        '''
928
929    class Msubi(MediaOp):
930        code = '''
931            assert(srcSize == destSize);
932            int size = srcSize;
933            int sizeBits = size * 8;
934            int items = (ext & 0x1) ? 1: (sizeof(FloatRegBits) / size);
935            uint64_t result = FpDestReg.uqw;
936
937            for (int i = 0; i < items; i++) {
938                int hiIndex = (i + 1) * sizeBits - 1;
939                int loIndex = (i + 0) * sizeBits;
940                uint64_t arg1Bits = bits(FpSrcReg1.uqw, hiIndex, loIndex);
941                uint64_t arg2Bits = bits(FpSrcReg2.uqw, hiIndex, loIndex);
942                uint64_t resBits = arg1Bits - arg2Bits;
943                
944                if (ext & 0x2) {
945                    if (arg2Bits > arg1Bits) {
946                        resBits = 0;
947                    } else if (!findCarry(sizeBits, resBits,
948                                         arg1Bits, ~arg2Bits)) {
949                        resBits = mask(sizeBits);
950                    }
951                } else if (ext & 0x4) {
952                    int arg1Sign = bits(arg1Bits, sizeBits - 1);
953                    int arg2Sign = !bits(arg2Bits, sizeBits - 1);
954                    int resSign = bits(resBits, sizeBits - 1);
955                    if ((arg1Sign == arg2Sign) && (arg1Sign != resSign)) {
956                        if (resSign == 0)
957                            resBits = (1 << (sizeBits - 1));
958                        else
959                            resBits = mask(sizeBits - 1);
960                    }
961                }
962
963                result = insertBits(result, hiIndex, loIndex, resBits);
964            }
965            FpDestReg.uqw = result;
966        '''
967
968    class Mmuli(MediaOp):
969        code = '''
970            int srcBits = srcSize * 8;
971            int destBits = destSize * 8;
972            assert(destBits <= 64);
973            assert(destSize >= srcSize);
974            int items = (ext & 0x1) ? 1: (sizeof(FloatRegBits) / destSize);
975            uint64_t result = FpDestReg.uqw;
976
977            for (int i = 0; i < items; i++) {
978                int offset = 0;
979                if (ext & 16) {
980                    if (ext & 32)
981                        offset = i * (destBits - srcBits);
982                    else
983                        offset = i * (destBits - srcBits) + srcBits;
984                }
985                int srcHiIndex = (i + 1) * srcBits - 1 + offset;
986                int srcLoIndex = (i + 0) * srcBits + offset;
987                uint64_t arg1Bits = bits(FpSrcReg1.uqw, srcHiIndex, srcLoIndex);
988                uint64_t arg2Bits = bits(FpSrcReg2.uqw, srcHiIndex, srcLoIndex);
989                uint64_t resBits;
990
991                if (ext & 0x2) {
992                    int64_t arg1 = arg1Bits |
993                        (0 - (arg1Bits & (1 << (srcBits - 1))));
994                    int64_t arg2 = arg2Bits |
995                        (0 - (arg2Bits & (1 << (srcBits - 1))));
996                    resBits = (uint64_t)(arg1 * arg2);
997                } else {
998                    resBits = arg1Bits * arg2Bits;
999                }
1000
1001                if (ext & 0x4)
1002                    resBits += (1 << (destBits - 1));
1003                
1004                if (ext & 0x8)
1005                    resBits >>= destBits;
1006
1007                int destHiIndex = (i + 1) * destBits - 1;
1008                int destLoIndex = (i + 0) * destBits;
1009                result = insertBits(result, destHiIndex, destLoIndex, resBits);
1010            }
1011            FpDestReg.uqw = result;
1012        '''
1013
1014    class Mavg(MediaOp):
1015        code = '''
1016            assert(srcSize == destSize);
1017            int size = srcSize;
1018            int sizeBits = size * 8;
1019            int items = (ext & 0x1) ? 1: (sizeof(FloatRegBits) / size);
1020            uint64_t result = FpDestReg.uqw;
1021
1022            for (int i = 0; i < items; i++) {
1023                int hiIndex = (i + 1) * sizeBits - 1;
1024                int loIndex = (i + 0) * sizeBits;
1025                uint64_t arg1Bits = bits(FpSrcReg1.uqw, hiIndex, loIndex);
1026                uint64_t arg2Bits = bits(FpSrcReg2.uqw, hiIndex, loIndex);
1027                uint64_t resBits = (arg1Bits + arg2Bits + 1) / 2;
1028                
1029                result = insertBits(result, hiIndex, loIndex, resBits);
1030            }
1031            FpDestReg.uqw = result;
1032        '''
1033
1034    class Msad(MediaOp):
1035        code = '''
1036            int srcBits = srcSize * 8;
1037            int items = sizeof(FloatRegBits) / srcSize;
1038
1039            uint64_t sum = 0;
1040            for (int i = 0; i < items; i++) {
1041                int hiIndex = (i + 1) * srcBits - 1;
1042                int loIndex = (i + 0) * srcBits;
1043                uint64_t arg1Bits = bits(FpSrcReg1.uqw, hiIndex, loIndex);
1044                uint64_t arg2Bits = bits(FpSrcReg2.uqw, hiIndex, loIndex);
1045                int64_t resBits = arg1Bits - arg2Bits;
1046                if (resBits < 0)
1047                    resBits = -resBits;
1048                sum += resBits;
1049            }
1050            FpDestReg.uqw = sum & mask(destSize * 8);
1051        '''
1052
1053    class Msrl(MediaOp):
1054        code = '''
1055
1056            assert(srcSize == destSize);
1057            int size = srcSize;
1058            int sizeBits = size * 8;
1059            int items = (ext & 0x1) ? 1: (sizeof(FloatRegBits) / size);
1060            uint64_t shiftAmt = op2.uqw;
1061            uint64_t result = FpDestReg.uqw;
1062
1063            for (int i = 0; i < items; i++) {
1064                int hiIndex = (i + 1) * sizeBits - 1;
1065                int loIndex = (i + 0) * sizeBits;
1066                uint64_t arg1Bits = bits(FpSrcReg1.uqw, hiIndex, loIndex);
1067                uint64_t resBits;
1068                if (shiftAmt >= sizeBits) {
1069                    resBits = 0;
1070                } else {
1071                    resBits = (arg1Bits >> shiftAmt) &
1072                        mask(sizeBits - shiftAmt);
1073                }
1074
1075                result = insertBits(result, hiIndex, loIndex, resBits);
1076            }
1077            FpDestReg.uqw = result;
1078        '''
1079
1080    class Msra(MediaOp):
1081        code = '''
1082
1083            assert(srcSize == destSize);
1084            int size = srcSize;
1085            int sizeBits = size * 8;
1086            int items = (ext & 0x1) ? 1: (sizeof(FloatRegBits) / size);
1087            uint64_t shiftAmt = op2.uqw;
1088            uint64_t result = FpDestReg.uqw;
1089
1090            for (int i = 0; i < items; i++) {
1091                int hiIndex = (i + 1) * sizeBits - 1;
1092                int loIndex = (i + 0) * sizeBits;
1093                uint64_t arg1Bits = bits(FpSrcReg1.uqw, hiIndex, loIndex);
1094                uint64_t resBits;
1095                if (shiftAmt >= sizeBits) {
1096                    if (bits(arg1Bits, sizeBits - 1))
1097                        resBits = mask(sizeBits);
1098                    else
1099                        resBits = 0;
1100                } else {
1101                    resBits = (arg1Bits >> shiftAmt);
1102                    resBits = resBits |
1103                        (0 - (resBits & (1 << (sizeBits - 1 - shiftAmt))));
1104                }
1105
1106                result = insertBits(result, hiIndex, loIndex, resBits);
1107            }
1108            FpDestReg.uqw = result;
1109        '''
1110
1111    class Msll(MediaOp):
1112        code = '''
1113
1114            assert(srcSize == destSize);
1115            int size = srcSize;
1116            int sizeBits = size * 8;
1117            int items = (ext & 0x1) ? 1: (sizeof(FloatRegBits) / size);
1118            uint64_t shiftAmt = op2.uqw;
1119            uint64_t result = FpDestReg.uqw;
1120
1121            for (int i = 0; i < items; i++) {
1122                int hiIndex = (i + 1) * sizeBits - 1;
1123                int loIndex = (i + 0) * sizeBits;
1124                uint64_t arg1Bits = bits(FpSrcReg1.uqw, hiIndex, loIndex);
1125                uint64_t resBits;
1126                if (shiftAmt >= sizeBits) {
1127                    resBits = 0;
1128                } else {
1129                    resBits = (arg1Bits << shiftAmt);
1130                }
1131
1132                result = insertBits(result, hiIndex, loIndex, resBits);
1133            }
1134            FpDestReg.uqw = result;
1135        '''
1136
1137    class Cvti2f(MediaOp):
1138        def __init__(self, dest, src, \
1139                size = None, destSize = None, srcSize = None, ext = None):
1140            super(Cvti2f, self).__init__(dest, src,\
1141                    "InstRegIndex(0)", size, destSize, srcSize, ext)
1142        code = '''
1143            union floatInt
1144            {
1145                float f;
1146                uint32_t i;
1147            };
1148            union doubleInt
1149            {
1150                double d;
1151                uint64_t i;
1152            };
1153
1154            assert(destSize == 4 || destSize == 8);
1155            assert(srcSize == 4 || srcSize == 8);
1156            int srcSizeBits = srcSize * 8;
1157            int destSizeBits = destSize * 8;
1158            int items;
1159            int srcStart = 0;
1160            int destStart = 0;
1161            if (srcSize == 2 * destSize) {
1162                items = (ext & 0x1) ? 1: sizeof(FloatRegBits) / srcSize;
1163                if (ext & 0x2)
1164                    destStart = destSizeBits * items;
1165            } else if (destSize == 2 * srcSize) {
1166                items = (ext & 0x1) ? 1: sizeof(FloatRegBits) / destSize;
1167                if (ext & 0x2)
1168                    srcStart = srcSizeBits * items;
1169            } else {
1170                items = (ext & 0x1) ? 1: sizeof(FloatRegBits) / destSize;
1171            }
1172            uint64_t result = FpDestReg.uqw;
1173
1174            for (int i = 0; i < items; i++) {
1175                int srcHiIndex = srcStart + (i + 1) * srcSizeBits - 1;
1176                int srcLoIndex = srcStart + (i + 0) * srcSizeBits;
1177                uint64_t argBits = bits(FpSrcReg1.uqw, srcHiIndex, srcLoIndex);
1178                int64_t sArg = argBits | (0 - (argBits & (1 << srcHiIndex)));
1179                double arg = sArg;
1180
1181                if (destSize == 4) {
1182                    floatInt fi;
1183                    fi.f = arg;
1184                    argBits = fi.i;
1185                } else {
1186                    doubleInt di;
1187                    di.d = arg;
1188                    argBits = di.i;
1189                }
1190                int destHiIndex = destStart + (i + 1) * destSizeBits - 1;
1191                int destLoIndex = destStart + (i + 0) * destSizeBits;
1192                result = insertBits(result, destHiIndex, destLoIndex, argBits);
1193            }
1194            FpDestReg.uqw = result;
1195        '''
1196
1197    class Cvtf2f(MediaOp):
1198        def __init__(self, dest, src, \
1199                size = None, destSize = None, srcSize = None, ext = None):
1200            super(Cvtf2f, self).__init__(dest, src,\
1201                    "InstRegIndex(0)", size, destSize, srcSize, ext)
1202        code = '''
1203            union floatInt
1204            {
1205                float f;
1206                uint32_t i;
1207            };
1208            union doubleInt
1209            {
1210                double d;
1211                uint64_t i;
1212            };
1213
1214            assert(destSize == 4 || destSize == 8);
1215            assert(srcSize == 4 || srcSize == 8);
1216            int srcSizeBits = srcSize * 8;
1217            int destSizeBits = destSize * 8;
1218            int items;
1219            int srcStart = 0;
1220            int destStart = 0;
1221            if (srcSize == 2 * destSize) {
1222                items = (ext & 0x1) ? 1: sizeof(FloatRegBits) / srcSize;
1223                if (ext & 0x2)
1224                    destStart = destSizeBits * items;
1225            } else if (destSize == 2 * srcSize) {
1226                items = (ext & 0x1) ? 1: sizeof(FloatRegBits) / destSize;
1227                if (ext & 0x2)
1228                    srcStart = srcSizeBits * items;
1229            } else {
1230                items = (ext & 0x1) ? 1: sizeof(FloatRegBits) / destSize;
1231            }
1232            uint64_t result = FpDestReg.uqw;
1233
1234            for (int i = 0; i < items; i++) {
1235                int srcHiIndex = srcStart + (i + 1) * srcSizeBits - 1;
1236                int srcLoIndex = srcStart + (i + 0) * srcSizeBits;
1237                uint64_t argBits = bits(FpSrcReg1.uqw, srcHiIndex, srcLoIndex);
1238                double arg;
1239
1240                if (srcSize == 4) {
1241                    floatInt fi;
1242                    fi.i = argBits;
1243                    arg = fi.f;
1244                } else {
1245                    doubleInt di;
1246                    di.i = argBits;
1247                    arg = di.d;
1248                }
1249                if (destSize == 4) {
1250                    floatInt fi;
1251                    fi.f = arg;
1252                    argBits = fi.i;
1253                } else {
1254                    doubleInt di;
1255                    di.d = arg;
1256                    argBits = di.i;
1257                }
1258                int destHiIndex = destStart + (i + 1) * destSizeBits - 1;
1259                int destLoIndex = destStart + (i + 0) * destSizeBits;
1260                result = insertBits(result, destHiIndex, destLoIndex, argBits);
1261            }
1262            FpDestReg.uqw = result;
1263        '''
1264
1265    class Mcmpi2r(MediaOp):
1266        code = '''
1267            union floatInt
1268            {
1269                float f;
1270                uint32_t i;
1271            };
1272            union doubleInt
1273            {
1274                double d;
1275                uint64_t i;
1276            };
1277
1278            assert(srcSize == destSize);
1279            int size = srcSize;
1280            int sizeBits = size * 8;
1281            int items = (ext & 0x1) ? 1: (sizeof(FloatRegBits) / size);
1282            uint64_t result = FpDestReg.uqw;
1283
1284            for (int i = 0; i < items; i++) {
1285                int hiIndex = (i + 1) * sizeBits - 1;
1286                int loIndex = (i + 0) * sizeBits;
1287                uint64_t arg1Bits = bits(FpSrcReg1.uqw, hiIndex, loIndex);
1288                int64_t arg1 = arg1Bits |
1289                    (0 - (arg1Bits & (1 << (sizeBits - 1))));
1290                uint64_t arg2Bits = bits(FpSrcReg2.uqw, hiIndex, loIndex);
1291                int64_t arg2 = arg2Bits |
1292                    (0 - (arg2Bits & (1 << (sizeBits - 1))));
1293
1294                uint64_t resBits = 0;
1295                if ((ext & 0x2) == 0 && arg1 == arg2 ||
1296                        (ext & 0x2) == 0x2 && arg1 > arg2)
1297                    resBits = mask(sizeBits);
1298
1299                result = insertBits(result, hiIndex, loIndex, resBits);
1300            }
1301            FpDestReg.uqw = result;
1302        '''
1303}};
1304