mediaop.isa revision 6594:a5dbea7ba3f9
14519Sgblack@eecs.umich.edu/// Copyright (c) 2009 The Regents of The University of Michigan
24519Sgblack@eecs.umich.edu// All rights reserved.
34519Sgblack@eecs.umich.edu//
44519Sgblack@eecs.umich.edu// Redistribution and use in source and binary forms, with or without
54519Sgblack@eecs.umich.edu// modification, are permitted provided that the following conditions are
64519Sgblack@eecs.umich.edu// met: redistributions of source code must retain the above copyright
74519Sgblack@eecs.umich.edu// notice, this list of conditions and the following disclaimer;
84519Sgblack@eecs.umich.edu// redistributions in binary form must reproduce the above copyright
94519Sgblack@eecs.umich.edu// notice, this list of conditions and the following disclaimer in the
104519Sgblack@eecs.umich.edu// documentation and/or other materials provided with the distribution;
114519Sgblack@eecs.umich.edu// neither the name of the copyright holders nor the names of its
124519Sgblack@eecs.umich.edu// contributors may be used to endorse or promote products derived from
134519Sgblack@eecs.umich.edu// this software without specific prior written permission.
144519Sgblack@eecs.umich.edu//
154519Sgblack@eecs.umich.edu// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
164519Sgblack@eecs.umich.edu// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
174519Sgblack@eecs.umich.edu// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
184519Sgblack@eecs.umich.edu// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
194519Sgblack@eecs.umich.edu// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
204519Sgblack@eecs.umich.edu// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
214519Sgblack@eecs.umich.edu// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
224519Sgblack@eecs.umich.edu// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
234519Sgblack@eecs.umich.edu// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
244519Sgblack@eecs.umich.edu// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
254519Sgblack@eecs.umich.edu// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
264519Sgblack@eecs.umich.edu//
274519Sgblack@eecs.umich.edu// Authors: Gabe Black
284519Sgblack@eecs.umich.edu
294519Sgblack@eecs.umich.edudef template MediaOpExecute {{
304519Sgblack@eecs.umich.edu        Fault %(class_name)s::execute(%(CPU_exec_context)s *xc,
314519Sgblack@eecs.umich.edu                Trace::InstRecord *traceData) const
324519Sgblack@eecs.umich.edu        {
334519Sgblack@eecs.umich.edu            Fault fault = NoFault;
344519Sgblack@eecs.umich.edu
354519Sgblack@eecs.umich.edu            %(op_decl)s;
364519Sgblack@eecs.umich.edu            %(op_rd)s;
374519Sgblack@eecs.umich.edu
384519Sgblack@eecs.umich.edu            %(code)s;
394519Sgblack@eecs.umich.edu
404519Sgblack@eecs.umich.edu            //Write the resulting state to the execution context
414519Sgblack@eecs.umich.edu            if(fault == NoFault)
424519Sgblack@eecs.umich.edu            {
434519Sgblack@eecs.umich.edu                %(op_wb)s;
444519Sgblack@eecs.umich.edu            }
454519Sgblack@eecs.umich.edu            return fault;
464519Sgblack@eecs.umich.edu        }
474519Sgblack@eecs.umich.edu}};
484519Sgblack@eecs.umich.edu
494519Sgblack@eecs.umich.edudef template MediaOpRegDeclare {{
504519Sgblack@eecs.umich.edu    class %(class_name)s : public %(base_class)s
514519Sgblack@eecs.umich.edu    {
524519Sgblack@eecs.umich.edu      protected:
534519Sgblack@eecs.umich.edu        void buildMe();
544519Sgblack@eecs.umich.edu
554519Sgblack@eecs.umich.edu      public:
564519Sgblack@eecs.umich.edu        %(class_name)s(ExtMachInst _machInst,
574519Sgblack@eecs.umich.edu                const char * instMnem,
584519Sgblack@eecs.umich.edu                bool isMicro, bool isDelayed, bool isFirst, bool isLast,
594519Sgblack@eecs.umich.edu                InstRegIndex _src1, InstRegIndex _src2, InstRegIndex _dest,
604519Sgblack@eecs.umich.edu                uint8_t _srcSize, uint8_t _destSize, uint16_t _ext);
614519Sgblack@eecs.umich.edu
624519Sgblack@eecs.umich.edu        %(class_name)s(ExtMachInst _machInst,
634519Sgblack@eecs.umich.edu                const char * instMnem,
644519Sgblack@eecs.umich.edu                InstRegIndex _src1, InstRegIndex _src2, InstRegIndex _dest,
654519Sgblack@eecs.umich.edu                uint8_t _srcSize, uint8_t _destSize, uint16_t _ext);
664519Sgblack@eecs.umich.edu
674519Sgblack@eecs.umich.edu        %(BasicExecDeclare)s
684519Sgblack@eecs.umich.edu    };
694519Sgblack@eecs.umich.edu}};
704519Sgblack@eecs.umich.edu
714519Sgblack@eecs.umich.edudef template MediaOpImmDeclare {{
724519Sgblack@eecs.umich.edu
734519Sgblack@eecs.umich.edu    class %(class_name)s : public %(base_class)s
744519Sgblack@eecs.umich.edu    {
754519Sgblack@eecs.umich.edu      protected:
764519Sgblack@eecs.umich.edu        void buildMe();
774519Sgblack@eecs.umich.edu
784519Sgblack@eecs.umich.edu      public:
794519Sgblack@eecs.umich.edu        %(class_name)s(ExtMachInst _machInst,
804519Sgblack@eecs.umich.edu                const char * instMnem,
814519Sgblack@eecs.umich.edu                bool isMicro, bool isDelayed, bool isFirst, bool isLast,
824519Sgblack@eecs.umich.edu                InstRegIndex _src1, uint16_t _imm8, InstRegIndex _dest,
834519Sgblack@eecs.umich.edu                uint8_t _srcSize, uint8_t _destSize, uint16_t _ext);
844519Sgblack@eecs.umich.edu
854519Sgblack@eecs.umich.edu        %(class_name)s(ExtMachInst _machInst,
864519Sgblack@eecs.umich.edu                const char * instMnem,
874519Sgblack@eecs.umich.edu                InstRegIndex _src1, uint16_t _imm8, InstRegIndex _dest,
884519Sgblack@eecs.umich.edu                uint8_t _srcSize, uint8_t _destSize, uint16_t _ext);
894519Sgblack@eecs.umich.edu
904519Sgblack@eecs.umich.edu        %(BasicExecDeclare)s
914519Sgblack@eecs.umich.edu    };
924519Sgblack@eecs.umich.edu}};
934519Sgblack@eecs.umich.edu
944519Sgblack@eecs.umich.edudef template MediaOpRegConstructor {{
954519Sgblack@eecs.umich.edu
964519Sgblack@eecs.umich.edu    inline void %(class_name)s::buildMe()
974519Sgblack@eecs.umich.edu    {
984519Sgblack@eecs.umich.edu        %(constructor)s;
994519Sgblack@eecs.umich.edu    }
1004519Sgblack@eecs.umich.edu
1014519Sgblack@eecs.umich.edu    inline %(class_name)s::%(class_name)s(
1024519Sgblack@eecs.umich.edu            ExtMachInst machInst, const char * instMnem,
1034519Sgblack@eecs.umich.edu            InstRegIndex _src1, InstRegIndex _src2, InstRegIndex _dest,
1044519Sgblack@eecs.umich.edu            uint8_t _srcSize, uint8_t _destSize, uint16_t _ext) :
1054519Sgblack@eecs.umich.edu        %(base_class)s(machInst, "%(mnemonic)s", instMnem,
1064519Sgblack@eecs.umich.edu                false, false, false, false,
1074519Sgblack@eecs.umich.edu                _src1, _src2, _dest, _srcSize, _destSize, _ext,
1084519Sgblack@eecs.umich.edu                %(op_class)s)
1094519Sgblack@eecs.umich.edu    {
1104519Sgblack@eecs.umich.edu        buildMe();
1114519Sgblack@eecs.umich.edu    }
1124519Sgblack@eecs.umich.edu
1134519Sgblack@eecs.umich.edu    inline %(class_name)s::%(class_name)s(
1144519Sgblack@eecs.umich.edu            ExtMachInst machInst, const char * instMnem,
1154519Sgblack@eecs.umich.edu            bool isMicro, bool isDelayed, bool isFirst, bool isLast,
1164519Sgblack@eecs.umich.edu            InstRegIndex _src1, InstRegIndex _src2, InstRegIndex _dest,
1174519Sgblack@eecs.umich.edu            uint8_t _srcSize, uint8_t _destSize, uint16_t _ext) :
1184519Sgblack@eecs.umich.edu        %(base_class)s(machInst, "%(mnemonic)s", instMnem,
1194519Sgblack@eecs.umich.edu                isMicro, isDelayed, isFirst, isLast,
1204519Sgblack@eecs.umich.edu                _src1, _src2, _dest, _srcSize, _destSize, _ext,
1214519Sgblack@eecs.umich.edu                %(op_class)s)
1224519Sgblack@eecs.umich.edu    {
1234519Sgblack@eecs.umich.edu        buildMe();
1244519Sgblack@eecs.umich.edu    }
1254519Sgblack@eecs.umich.edu}};
1264519Sgblack@eecs.umich.edu
1274519Sgblack@eecs.umich.edudef template MediaOpImmConstructor {{
1284519Sgblack@eecs.umich.edu
1294519Sgblack@eecs.umich.edu    inline void %(class_name)s::buildMe()
1304519Sgblack@eecs.umich.edu    {
1314519Sgblack@eecs.umich.edu        %(constructor)s;
1324519Sgblack@eecs.umich.edu    }
1334519Sgblack@eecs.umich.edu
1344519Sgblack@eecs.umich.edu    inline %(class_name)s::%(class_name)s(
1354519Sgblack@eecs.umich.edu            ExtMachInst machInst, const char * instMnem,
1364519Sgblack@eecs.umich.edu            InstRegIndex _src1, uint16_t _imm8, InstRegIndex _dest,
1374519Sgblack@eecs.umich.edu            uint8_t _srcSize, uint8_t _destSize, uint16_t _ext) :
1384519Sgblack@eecs.umich.edu        %(base_class)s(machInst, "%(mnemonic)s", instMnem,
1394519Sgblack@eecs.umich.edu                false, false, false, false,
1404519Sgblack@eecs.umich.edu                _src1, _imm8, _dest, _srcSize, _destSize, _ext,
1414519Sgblack@eecs.umich.edu                %(op_class)s)
1424519Sgblack@eecs.umich.edu    {
1434519Sgblack@eecs.umich.edu        buildMe();
1444519Sgblack@eecs.umich.edu    }
1454519Sgblack@eecs.umich.edu
1464519Sgblack@eecs.umich.edu    inline %(class_name)s::%(class_name)s(
1474519Sgblack@eecs.umich.edu            ExtMachInst machInst, const char * instMnem,
1484519Sgblack@eecs.umich.edu            bool isMicro, bool isDelayed, bool isFirst, bool isLast,
1494519Sgblack@eecs.umich.edu            InstRegIndex _src1, uint16_t _imm8, InstRegIndex _dest,
1504519Sgblack@eecs.umich.edu            uint8_t _srcSize, uint8_t _destSize, uint16_t _ext) :
1514519Sgblack@eecs.umich.edu        %(base_class)s(machInst, "%(mnemonic)s", instMnem,
1524519Sgblack@eecs.umich.edu                isMicro, isDelayed, isFirst, isLast,
1534519Sgblack@eecs.umich.edu                _src1, _imm8, _dest, _srcSize, _destSize, _ext,
1544519Sgblack@eecs.umich.edu                %(op_class)s)
1554519Sgblack@eecs.umich.edu    {
1564519Sgblack@eecs.umich.edu        buildMe();
1574519Sgblack@eecs.umich.edu    }
1584519Sgblack@eecs.umich.edu}};
1594519Sgblack@eecs.umich.edu
1604519Sgblack@eecs.umich.edulet {{
1614519Sgblack@eecs.umich.edu    # Make these empty strings so that concatenating onto
1624519Sgblack@eecs.umich.edu    # them will always work.
1634519Sgblack@eecs.umich.edu    header_output = ""
1644519Sgblack@eecs.umich.edu    decoder_output = ""
1654519Sgblack@eecs.umich.edu    exec_output = ""
1664519Sgblack@eecs.umich.edu
1674519Sgblack@eecs.umich.edu    immTemplates = (
1684519Sgblack@eecs.umich.edu            MediaOpImmDeclare,
1694519Sgblack@eecs.umich.edu            MediaOpImmConstructor,
1704519Sgblack@eecs.umich.edu            MediaOpExecute)
1714519Sgblack@eecs.umich.edu
1724519Sgblack@eecs.umich.edu    regTemplates = (
1734519Sgblack@eecs.umich.edu            MediaOpRegDeclare,
1744519Sgblack@eecs.umich.edu            MediaOpRegConstructor,
1754519Sgblack@eecs.umich.edu            MediaOpExecute)
1764519Sgblack@eecs.umich.edu
1774519Sgblack@eecs.umich.edu    class MediaOpMeta(type):
1784519Sgblack@eecs.umich.edu        def buildCppClasses(self, name, Name, suffix, code):
1794519Sgblack@eecs.umich.edu
1804519Sgblack@eecs.umich.edu            # Globals to stick the output in
1814519Sgblack@eecs.umich.edu            global header_output
1824519Sgblack@eecs.umich.edu            global decoder_output
1834519Sgblack@eecs.umich.edu            global exec_output
1844519Sgblack@eecs.umich.edu
1854519Sgblack@eecs.umich.edu            # If op2 is used anywhere, make register and immediate versions
1864519Sgblack@eecs.umich.edu            # of this code.
1874519Sgblack@eecs.umich.edu            matcher = re.compile("(?<!\\w)(?P<prefix>s?)op2(?P<typeQual>\\.\\w+)?")
1884519Sgblack@eecs.umich.edu            match = matcher.search(code)
1894519Sgblack@eecs.umich.edu            if match:
1904519Sgblack@eecs.umich.edu                typeQual = ""
1914519Sgblack@eecs.umich.edu                if match.group("typeQual"):
1924519Sgblack@eecs.umich.edu                    typeQual = match.group("typeQual")
1934519Sgblack@eecs.umich.edu                src2_name = "%sFpSrcReg2%s" % (match.group("prefix"), typeQual)
1944519Sgblack@eecs.umich.edu                self.buildCppClasses(name, Name, suffix,
1954519Sgblack@eecs.umich.edu                        matcher.sub(src2_name, code))
1964519Sgblack@eecs.umich.edu                self.buildCppClasses(name + "i", Name, suffix + "Imm",
1974519Sgblack@eecs.umich.edu                        matcher.sub("imm8", code))
1984519Sgblack@eecs.umich.edu                return
1994519Sgblack@eecs.umich.edu
2004519Sgblack@eecs.umich.edu            base = "X86ISA::MediaOp"
2014519Sgblack@eecs.umich.edu
2024519Sgblack@eecs.umich.edu            # If imm8 shows up in the code, use the immediate templates, if
2034519Sgblack@eecs.umich.edu            # not, hopefully the register ones will be correct.
2044519Sgblack@eecs.umich.edu            matcher = re.compile("(?<!\w)imm8(?!\w)")
2054519Sgblack@eecs.umich.edu            if matcher.search(code):
2064519Sgblack@eecs.umich.edu                base += "Imm"
2074519Sgblack@eecs.umich.edu                templates = immTemplates
2084519Sgblack@eecs.umich.edu            else:
2094519Sgblack@eecs.umich.edu                base += "Reg"
2104519Sgblack@eecs.umich.edu                templates = regTemplates
2114519Sgblack@eecs.umich.edu
2124519Sgblack@eecs.umich.edu            # Get everything ready for the substitution
2134519Sgblack@eecs.umich.edu            iop = InstObjParams(name, Name + suffix, base, {"code" : code})
2144519Sgblack@eecs.umich.edu
2154519Sgblack@eecs.umich.edu            # Generate the actual code (finally!)
2164519Sgblack@eecs.umich.edu            header_output += templates[0].subst(iop)
2174519Sgblack@eecs.umich.edu            decoder_output += templates[1].subst(iop)
2184519Sgblack@eecs.umich.edu            exec_output += templates[2].subst(iop)
2194519Sgblack@eecs.umich.edu
2204519Sgblack@eecs.umich.edu
2214519Sgblack@eecs.umich.edu        def __new__(mcls, Name, bases, dict):
2224519Sgblack@eecs.umich.edu            abstract = False
2234519Sgblack@eecs.umich.edu            name = Name.lower()
2244528Sgblack@eecs.umich.edu            if "abstract" in dict:
2254519Sgblack@eecs.umich.edu                abstract = dict['abstract']
2264519Sgblack@eecs.umich.edu                del dict['abstract']
2274519Sgblack@eecs.umich.edu
2284519Sgblack@eecs.umich.edu            cls = super(MediaOpMeta, mcls).__new__(mcls, Name, bases, dict)
2294519Sgblack@eecs.umich.edu            if not abstract:
2304519Sgblack@eecs.umich.edu                cls.className = Name
2314519Sgblack@eecs.umich.edu                cls.base_mnemonic = name
2324519Sgblack@eecs.umich.edu                code = cls.code
2334519Sgblack@eecs.umich.edu
2344560Sgblack@eecs.umich.edu                # Set up the C++ classes
2354539Sgblack@eecs.umich.edu                mcls.buildCppClasses(cls, name, Name, "", code)
2364519Sgblack@eecs.umich.edu
2374519Sgblack@eecs.umich.edu                # Hook into the microassembler dict
2384519Sgblack@eecs.umich.edu                global microopClasses
2394519Sgblack@eecs.umich.edu                microopClasses[name] = cls
2404519Sgblack@eecs.umich.edu
2414539Sgblack@eecs.umich.edu                # If op2 is used anywhere, make register and immediate versions
2424519Sgblack@eecs.umich.edu                # of this code.
2434519Sgblack@eecs.umich.edu                matcher = re.compile("op2(?P<typeQual>\\.\\w+)?")
2444539Sgblack@eecs.umich.edu                if matcher.search(code):
2454519Sgblack@eecs.umich.edu                    microopClasses[name + 'i'] = cls
2464528Sgblack@eecs.umich.edu            return cls
2474560Sgblack@eecs.umich.edu
2484519Sgblack@eecs.umich.edu
2494519Sgblack@eecs.umich.edu    class MediaOp(X86Microop):
2504560Sgblack@eecs.umich.edu        __metaclass__ = MediaOpMeta
2514519Sgblack@eecs.umich.edu        # This class itself doesn't act as a microop
2524519Sgblack@eecs.umich.edu        abstract = True
2534519Sgblack@eecs.umich.edu
2544519Sgblack@eecs.umich.edu        def __init__(self, dest, src1, op2,
2554519Sgblack@eecs.umich.edu                size = None, destSize = None, srcSize = None, ext = None):
2564560Sgblack@eecs.umich.edu            self.dest = dest
2574539Sgblack@eecs.umich.edu            self.src1 = src1
2584519Sgblack@eecs.umich.edu            self.op2 = op2
2594519Sgblack@eecs.umich.edu            if size is not None:
2604519Sgblack@eecs.umich.edu                self.srcSize = size
2614519Sgblack@eecs.umich.edu                self.destSize = size
2624519Sgblack@eecs.umich.edu            if srcSize is not None:
2634539Sgblack@eecs.umich.edu                self.srcSize = srcSize
2644519Sgblack@eecs.umich.edu            if destSize is not None:
2654519Sgblack@eecs.umich.edu                self.destSize = destSize
2664539Sgblack@eecs.umich.edu            if self.srcSize is None:
2674519Sgblack@eecs.umich.edu                raise Exception, "Source size not set."
2684519Sgblack@eecs.umich.edu            if self.destSize is None:
2694519Sgblack@eecs.umich.edu                raise Exception, "Dest size not set."
2704519Sgblack@eecs.umich.edu            if ext is None:
2714519Sgblack@eecs.umich.edu                self.ext = 0
2724519Sgblack@eecs.umich.edu            else:
2734519Sgblack@eecs.umich.edu                self.ext = ext 
2744519Sgblack@eecs.umich.edu
2754519Sgblack@eecs.umich.edu        def getAllocator(self, *microFlags):
2764519Sgblack@eecs.umich.edu            className = self.className
2774528Sgblack@eecs.umich.edu            if self.mnemonic == self.base_mnemonic + 'i':
2784519Sgblack@eecs.umich.edu                className += "Imm"
2794519Sgblack@eecs.umich.edu            allocator = '''new %(class_name)s(machInst, macrocodeBlock
2804519Sgblack@eecs.umich.edu                    %(flags)s, %(src1)s, %(op2)s, %(dest)s,
2814528Sgblack@eecs.umich.edu                    %(srcSize)s, %(destSize)s, %(ext)s)''' % {
2824519Sgblack@eecs.umich.edu                "class_name" : className,
2834519Sgblack@eecs.umich.edu                "flags" : self.microFlagsText(microFlags),
2844519Sgblack@eecs.umich.edu                "src1" : self.src1, "op2" : self.op2,
2854519Sgblack@eecs.umich.edu                "dest" : self.dest,
2864519Sgblack@eecs.umich.edu                "srcSize" : self.srcSize,
2874519Sgblack@eecs.umich.edu                "destSize" : self.destSize,
2884519Sgblack@eecs.umich.edu                "ext" : self.ext}
2894519Sgblack@eecs.umich.edu            return allocator
2904519Sgblack@eecs.umich.edu
2914519Sgblack@eecs.umich.edu    class Mov2int(MediaOp):
2924519Sgblack@eecs.umich.edu        def __init__(self, dest, src1, src2 = 0, \
2934539Sgblack@eecs.umich.edu                size = None, destSize = None, srcSize = None, ext = None):
2944519Sgblack@eecs.umich.edu            super(Mov2int, self).__init__(dest, src1,\
2954519Sgblack@eecs.umich.edu                    src2, size, destSize, srcSize, ext)
2964519Sgblack@eecs.umich.edu        code = '''
2974519Sgblack@eecs.umich.edu            int items = sizeof(FloatRegBits) / srcSize;
2984519Sgblack@eecs.umich.edu            int offset = imm8;
2994519Sgblack@eecs.umich.edu            if (bits(src1, 0) && (ext & 0x1))
3004528Sgblack@eecs.umich.edu                offset -= items;
3014528Sgblack@eecs.umich.edu            if (offset >= 0 && offset < items) {
3024519Sgblack@eecs.umich.edu                uint64_t fpSrcReg1 =
3034519Sgblack@eecs.umich.edu                    bits(FpSrcReg1.uqw,
3044519Sgblack@eecs.umich.edu                            (offset + 1) * srcSize * 8 - 1,
3054519Sgblack@eecs.umich.edu                            (offset + 0) * srcSize * 8);
3064519Sgblack@eecs.umich.edu                DestReg = merge(0, fpSrcReg1, destSize);
3074519Sgblack@eecs.umich.edu            } else {
3084539Sgblack@eecs.umich.edu                DestReg = DestReg;
3094519Sgblack@eecs.umich.edu            }
3104519Sgblack@eecs.umich.edu        '''
3114519Sgblack@eecs.umich.edu
3124519Sgblack@eecs.umich.edu    class Mov2fp(MediaOp):
3134519Sgblack@eecs.umich.edu        def __init__(self, dest, src1, src2 = 0, \
3144519Sgblack@eecs.umich.edu                size = None, destSize = None, srcSize = None, ext = None):
3154528Sgblack@eecs.umich.edu            super(Mov2fp, self).__init__(dest, src1,\
3164528Sgblack@eecs.umich.edu                    src2, size, destSize, srcSize, ext)
3174519Sgblack@eecs.umich.edu        code = '''
3184519Sgblack@eecs.umich.edu            int items = sizeof(FloatRegBits) / destSize;
3194560Sgblack@eecs.umich.edu            int offset = imm8;
3204519Sgblack@eecs.umich.edu            if (bits(dest, 0) && (ext & 0x1))
3214528Sgblack@eecs.umich.edu                offset -= items;
3224528Sgblack@eecs.umich.edu            if (offset >= 0 && offset < items) {
3234528Sgblack@eecs.umich.edu                uint64_t srcReg1 = pick(SrcReg1, 0, srcSize);
3244528Sgblack@eecs.umich.edu                FpDestReg.uqw =
3254528Sgblack@eecs.umich.edu                    insertBits(FpDestReg.uqw,
3264528Sgblack@eecs.umich.edu                            (offset + 1) * destSize * 8 - 1,
3274528Sgblack@eecs.umich.edu                            (offset + 0) * destSize * 8, srcReg1);
3284528Sgblack@eecs.umich.edu            } else {
3294528Sgblack@eecs.umich.edu                FpDestReg.uqw = FpDestReg.uqw;
3304519Sgblack@eecs.umich.edu            }
3314519Sgblack@eecs.umich.edu        '''
332
333    class Movsign(MediaOp):
334        def __init__(self, dest, src, \
335                size = None, destSize = None, srcSize = None, ext = None):
336            super(Movsign, self).__init__(dest, src,\
337                    "InstRegIndex(0)", size, destSize, srcSize, ext)
338        code = '''
339            int items = sizeof(FloatRegBits) / srcSize;
340            uint64_t result = 0;
341            int offset = (ext & 0x1) ? items : 0;
342            for (int i = 0; i < items; i++) {
343                uint64_t picked =
344                    bits(FpSrcReg1.uqw, (i + 1) * 8 * srcSize - 1);
345                result = insertBits(result, i + offset, i + offset, picked);
346            }
347            DestReg = DestReg | result;
348        '''
349
350    class Maskmov(MediaOp):
351        code = '''
352            assert(srcSize == destSize);
353            int size = srcSize;
354            int sizeBits = size * 8;
355            int items = (ext & 0x1) ? 1: (sizeof(FloatRegBits) / size);
356            uint64_t result = FpDestReg.uqw;
357
358            for (int i = 0; i < items; i++) {
359                int hiIndex = (i + 1) * sizeBits - 1;
360                int loIndex = (i + 0) * sizeBits;
361                uint64_t arg1Bits = bits(FpSrcReg1.uqw, hiIndex, loIndex);
362                if (bits(FpSrcReg2.uqw, hiIndex))
363                    result = insertBits(result, hiIndex, loIndex, arg1Bits);
364            }
365            FpDestReg.uqw = result;
366        '''
367
368    class Unpack(MediaOp):
369        code = '''
370            assert(srcSize == destSize);
371            int size = destSize;
372            int items = (sizeof(FloatRegBits) / size) / 2;
373            int offset = ext ? items : 0;
374            uint64_t result = 0;
375            for (int i = 0; i < items; i++) {
376                uint64_t pickedLow =
377                    bits(FpSrcReg1.uqw, (i + offset + 1) * 8 * size - 1,
378                                        (i + offset) * 8 * size);
379                result = insertBits(result,
380                                    (2 * i + 1) * 8 * size - 1,
381                                    (2 * i + 0) * 8 * size,
382                                    pickedLow);
383                uint64_t pickedHigh =
384                    bits(FpSrcReg2.uqw, (i + offset + 1) * 8 * size - 1,
385                                        (i + offset) * 8 * size);
386                result = insertBits(result,
387                                    (2 * i + 2) * 8 * size - 1,
388                                    (2 * i + 1) * 8 * size,
389                                    pickedHigh);
390            }
391            FpDestReg.uqw = result;
392        '''
393
394    class Pack(MediaOp):
395        code = '''
396            assert(srcSize == destSize * 2);
397            int items = (sizeof(FloatRegBits) / destSize);
398            int destBits = destSize * 8;
399            int srcBits = srcSize * 8;
400            uint64_t result = 0;
401            int i;
402            for (i = 0; i < items / 2; i++) {
403                uint64_t picked =
404                    bits(FpSrcReg1.uqw, (i + 1) * srcBits - 1,
405                                        (i + 0) * srcBits);
406                unsigned signBit = bits(picked, srcBits - 1);
407                uint64_t overflow = bits(picked, srcBits - 1, destBits - 1);
408
409                // Handle saturation.
410                if (signBit) {
411                    if (overflow != mask(destBits - srcBits + 1)) {
412                        if (ext & 0x1)
413                            picked = (1 << (destBits - 1));
414                        else
415                            picked = 0;
416                    }
417                } else {
418                    if (overflow != 0) {
419                        if (ext & 0x1)
420                            picked = mask(destBits - 1);
421                        else
422                            picked = mask(destBits);
423                    }
424                }
425                result = insertBits(result,
426                                    (i + 1) * destBits - 1,
427                                    (i + 0) * destBits,
428                                    picked);
429            }
430            for (;i < items; i++) {
431                uint64_t picked =
432                    bits(FpSrcReg2.uqw, (i - items + 1) * srcBits - 1,
433                                        (i - items + 0) * srcBits);
434                unsigned signBit = bits(picked, srcBits - 1);
435                uint64_t overflow = bits(picked, srcBits - 1, destBits - 1);
436
437                // Handle saturation.
438                if (signBit) {
439                    if (overflow != mask(destBits - srcBits + 1)) {
440                        if (ext & 0x1)
441                            picked = (1 << (destBits - 1));
442                        else
443                            picked = 0;
444                    }
445                } else {
446                    if (overflow != 0) {
447                        if (ext & 0x1)
448                            picked = mask(destBits - 1);
449                        else
450                            picked = mask(destBits);
451                    }
452                }
453                result = insertBits(result,
454                                    (i + 1) * destBits - 1,
455                                    (i + 0) * destBits,
456                                    picked);
457            }
458            FpDestReg.uqw = result;
459        '''
460
461    class Mxor(MediaOp):
462        def __init__(self, dest, src1, src2):
463            super(Mxor, self).__init__(dest, src1, src2, 1)
464        code = '''
465            FpDestReg.uqw = FpSrcReg1.uqw ^ FpSrcReg2.uqw;
466        '''
467
468    class Mor(MediaOp):
469        def __init__(self, dest, src1, src2):
470            super(Mor, self).__init__(dest, src1, src2, 1)
471        code = '''
472            FpDestReg.uqw = FpSrcReg1.uqw | FpSrcReg2.uqw;
473        '''
474
475    class Mand(MediaOp):
476        def __init__(self, dest, src1, src2):
477            super(Mand, self).__init__(dest, src1, src2, 1)
478        code = '''
479            FpDestReg.uqw = FpSrcReg1.uqw & FpSrcReg2.uqw;
480        '''
481
482    class Mandn(MediaOp):
483        def __init__(self, dest, src1, src2):
484            super(Mandn, self).__init__(dest, src1, src2, 1)
485        code = '''
486            FpDestReg.uqw = ~FpSrcReg1.uqw & FpSrcReg2.uqw;
487        '''
488
489    class Mminf(MediaOp):
490        code = '''
491            union floatInt
492            {
493                float f;
494                uint32_t i;
495            };
496            union doubleInt
497            {
498                double d;
499                uint64_t i;
500            };
501
502            assert(srcSize == destSize);
503            int size = srcSize;
504            int sizeBits = size * 8;
505            assert(srcSize == 4 || srcSize == 8);
506            int items = (ext & 0x1) ? 1: (sizeof(FloatRegBits) / size);
507            uint64_t result = FpDestReg.uqw;
508
509            for (int i = 0; i < items; i++) {
510                double arg1, arg2;
511                int hiIndex = (i + 1) * sizeBits - 1;
512                int loIndex = (i + 0) * sizeBits;
513                uint64_t arg1Bits = bits(FpSrcReg1.uqw, hiIndex, loIndex);
514                uint64_t arg2Bits = bits(FpSrcReg2.uqw, hiIndex, loIndex);
515
516                if (size == 4) {
517                    floatInt fi;
518                    fi.i = arg1Bits;
519                    arg1 = fi.f;
520                    fi.i = arg2Bits;
521                    arg2 = fi.f;
522                } else {
523                    doubleInt di;
524                    di.i = arg1Bits;
525                    arg1 = di.d;
526                    di.i = arg2Bits;
527                    arg2 = di.d;
528                }
529
530                if (arg1 < arg2) {
531                    result = insertBits(result, hiIndex, loIndex, arg1Bits);
532                } else {
533                    result = insertBits(result, hiIndex, loIndex, arg2Bits);
534                }
535            }
536            FpDestReg.uqw = result;
537        '''
538
539    class Mmaxf(MediaOp):
540        code = '''
541            union floatInt
542            {
543                float f;
544                uint32_t i;
545            };
546            union doubleInt
547            {
548                double d;
549                uint64_t i;
550            };
551
552            assert(srcSize == destSize);
553            int size = srcSize;
554            int sizeBits = size * 8;
555            assert(srcSize == 4 || srcSize == 8);
556            int items = (ext & 0x1) ? 1: (sizeof(FloatRegBits) / size);
557            uint64_t result = FpDestReg.uqw;
558
559            for (int i = 0; i < items; i++) {
560                double arg1, arg2;
561                int hiIndex = (i + 1) * sizeBits - 1;
562                int loIndex = (i + 0) * sizeBits;
563                uint64_t arg1Bits = bits(FpSrcReg1.uqw, hiIndex, loIndex);
564                uint64_t arg2Bits = bits(FpSrcReg2.uqw, hiIndex, loIndex);
565
566                if (size == 4) {
567                    floatInt fi;
568                    fi.i = arg1Bits;
569                    arg1 = fi.f;
570                    fi.i = arg2Bits;
571                    arg2 = fi.f;
572                } else {
573                    doubleInt di;
574                    di.i = arg1Bits;
575                    arg1 = di.d;
576                    di.i = arg2Bits;
577                    arg2 = di.d;
578                }
579
580                if (arg1 > arg2) {
581                    result = insertBits(result, hiIndex, loIndex, arg1Bits);
582                } else {
583                    result = insertBits(result, hiIndex, loIndex, arg2Bits);
584                }
585            }
586            FpDestReg.uqw = result;
587        '''
588
589    class Mmini(MediaOp):
590        code = '''
591
592            assert(srcSize == destSize);
593            int size = srcSize;
594            int sizeBits = size * 8;
595            int items = (ext & 0x1) ? 1: (sizeof(FloatRegBits) / size);
596            uint64_t result = FpDestReg.uqw;
597
598            for (int i = 0; i < items; i++) {
599                int hiIndex = (i + 1) * sizeBits - 1;
600                int loIndex = (i + 0) * sizeBits;
601                uint64_t arg1Bits = bits(FpSrcReg1.uqw, hiIndex, loIndex);
602                int64_t arg1 = arg1Bits |
603                    (0 - (arg1Bits & (1 << (sizeBits - 1))));
604                uint64_t arg2Bits = bits(FpSrcReg2.uqw, hiIndex, loIndex);
605                int64_t arg2 = arg2Bits |
606                    (0 - (arg2Bits & (1 << (sizeBits - 1))));
607                uint64_t resBits;
608
609                if (ext & 0x2) {
610                    if (arg1 < arg2) {
611                        resBits = arg1Bits;
612                    } else {
613                        resBits = arg2Bits;
614                    }
615                } else {
616                    if (arg1Bits < arg2Bits) {
617                        resBits = arg1Bits;
618                    } else {
619                        resBits = arg2Bits;
620                    }
621                }
622                result = insertBits(result, hiIndex, loIndex, resBits);
623            }
624            FpDestReg.uqw = result;
625        '''
626
627    class Mmaxi(MediaOp):
628        code = '''
629
630            assert(srcSize == destSize);
631            int size = srcSize;
632            int sizeBits = size * 8;
633            int items = (ext & 0x1) ? 1: (sizeof(FloatRegBits) / size);
634            uint64_t result = FpDestReg.uqw;
635
636            for (int i = 0; i < items; i++) {
637                int hiIndex = (i + 1) * sizeBits - 1;
638                int loIndex = (i + 0) * sizeBits;
639                uint64_t arg1Bits = bits(FpSrcReg1.uqw, hiIndex, loIndex);
640                int64_t arg1 = arg1Bits |
641                    (0 - (arg1Bits & (1 << (sizeBits - 1))));
642                uint64_t arg2Bits = bits(FpSrcReg2.uqw, hiIndex, loIndex);
643                int64_t arg2 = arg2Bits |
644                    (0 - (arg2Bits & (1 << (sizeBits - 1))));
645                uint64_t resBits;
646
647                if (ext & 0x2) {
648                    if (arg1 > arg2) {
649                        resBits = arg1Bits;
650                    } else {
651                        resBits = arg2Bits;
652                    }
653                } else {
654                    if (arg1Bits > arg2Bits) {
655                        resBits = arg1Bits;
656                    } else {
657                        resBits = arg2Bits;
658                    }
659                }
660                result = insertBits(result, hiIndex, loIndex, resBits);
661            }
662            FpDestReg.uqw = result;
663        '''
664
665    class Msqrt(MediaOp):
666        def __init__(self, dest, src, \
667                size = None, destSize = None, srcSize = None, ext = None):
668            super(Msqrt, self).__init__(dest, src,\
669                    "InstRegIndex(0)", size, destSize, srcSize, ext)
670        code = '''
671            union floatInt
672            {
673                float f;
674                uint32_t i;
675            };
676            union doubleInt
677            {
678                double d;
679                uint64_t i;
680            };
681
682            assert(srcSize == destSize);
683            int size = srcSize;
684            int sizeBits = size * 8;
685            assert(srcSize == 4 || srcSize == 8);
686            int items = (ext & 0x1) ? 1: (sizeof(FloatRegBits) / size);
687            uint64_t result = FpDestReg.uqw;
688
689            for (int i = 0; i < items; i++) {
690                int hiIndex = (i + 1) * sizeBits - 1;
691                int loIndex = (i + 0) * sizeBits;
692                uint64_t argBits = bits(FpSrcReg1.uqw, hiIndex, loIndex);
693
694                if (size == 4) {
695                    floatInt fi;
696                    fi.i = argBits;
697                    fi.f = sqrt(fi.f);
698                    argBits = fi.i;
699                } else {
700                    doubleInt di;
701                    di.i = argBits;
702                    di.d = sqrt(di.d);
703                    argBits = di.i;
704                }
705                result = insertBits(result, hiIndex, loIndex, argBits);
706            }
707            FpDestReg.uqw = result;
708        '''
709
710    class Maddf(MediaOp):
711        code = '''
712            union floatInt
713            {
714                float f;
715                uint32_t i;
716            };
717            union doubleInt
718            {
719                double d;
720                uint64_t i;
721            };
722
723            assert(srcSize == destSize);
724            int size = srcSize;
725            int sizeBits = size * 8;
726            assert(srcSize == 4 || srcSize == 8);
727            int items = (ext & 0x1) ? 1: (sizeof(FloatRegBits) / size);
728            uint64_t result = FpDestReg.uqw;
729
730            for (int i = 0; i < items; i++) {
731                int hiIndex = (i + 1) * sizeBits - 1;
732                int loIndex = (i + 0) * sizeBits;
733                uint64_t arg1Bits = bits(FpSrcReg1.uqw, hiIndex, loIndex);
734                uint64_t arg2Bits = bits(FpSrcReg2.uqw, hiIndex, loIndex);
735                uint64_t resBits;
736
737                if (size == 4) {
738                    floatInt arg1, arg2, res;
739                    arg1.i = arg1Bits;
740                    arg2.i = arg2Bits;
741                    res.f = arg1.f + arg2.f;
742                    resBits = res.i;
743                } else {
744                    doubleInt arg1, arg2, res;
745                    arg1.i = arg1Bits;
746                    arg2.i = arg2Bits;
747                    res.d = arg1.d + arg2.d;
748                    resBits = res.i;
749                }
750
751                result = insertBits(result, hiIndex, loIndex, resBits);
752            }
753            FpDestReg.uqw = result;
754        '''
755
756    class Msubf(MediaOp):
757        code = '''
758            union floatInt
759            {
760                float f;
761                uint32_t i;
762            };
763            union doubleInt
764            {
765                double d;
766                uint64_t i;
767            };
768
769            assert(srcSize == destSize);
770            int size = srcSize;
771            int sizeBits = size * 8;
772            assert(srcSize == 4 || srcSize == 8);
773            int items = (ext & 0x1) ? 1: (sizeof(FloatRegBits) / size);
774            uint64_t result = FpDestReg.uqw;
775
776            for (int i = 0; i < items; i++) {
777                int hiIndex = (i + 1) * sizeBits - 1;
778                int loIndex = (i + 0) * sizeBits;
779                uint64_t arg1Bits = bits(FpSrcReg1.uqw, hiIndex, loIndex);
780                uint64_t arg2Bits = bits(FpSrcReg2.uqw, hiIndex, loIndex);
781                uint64_t resBits;
782
783                if (size == 4) {
784                    floatInt arg1, arg2, res;
785                    arg1.i = arg1Bits;
786                    arg2.i = arg2Bits;
787                    res.f = arg1.f - arg2.f;
788                    resBits = res.i;
789                } else {
790                    doubleInt arg1, arg2, res;
791                    arg1.i = arg1Bits;
792                    arg2.i = arg2Bits;
793                    res.d = arg1.d - arg2.d;
794                    resBits = res.i;
795                }
796
797                result = insertBits(result, hiIndex, loIndex, resBits);
798            }
799            FpDestReg.uqw = result;
800        '''
801
802    class Mmulf(MediaOp):
803        code = '''
804            union floatInt
805            {
806                float f;
807                uint32_t i;
808            };
809            union doubleInt
810            {
811                double d;
812                uint64_t i;
813            };
814
815            assert(srcSize == destSize);
816            int size = srcSize;
817            int sizeBits = size * 8;
818            assert(srcSize == 4 || srcSize == 8);
819            int items = (ext & 0x1) ? 1: (sizeof(FloatRegBits) / size);
820            uint64_t result = FpDestReg.uqw;
821
822            for (int i = 0; i < items; i++) {
823                int hiIndex = (i + 1) * sizeBits - 1;
824                int loIndex = (i + 0) * sizeBits;
825                uint64_t arg1Bits = bits(FpSrcReg1.uqw, hiIndex, loIndex);
826                uint64_t arg2Bits = bits(FpSrcReg2.uqw, hiIndex, loIndex);
827                uint64_t resBits;
828
829                if (size == 4) {
830                    floatInt arg1, arg2, res;
831                    arg1.i = arg1Bits;
832                    arg2.i = arg2Bits;
833                    res.f = arg1.f * arg2.f;
834                    resBits = res.i;
835                } else {
836                    doubleInt arg1, arg2, res;
837                    arg1.i = arg1Bits;
838                    arg2.i = arg2Bits;
839                    res.d = arg1.d * arg2.d;
840                    resBits = res.i;
841                }
842
843                result = insertBits(result, hiIndex, loIndex, resBits);
844            }
845            FpDestReg.uqw = result;
846        '''
847
848    class Mdivf(MediaOp):
849        code = '''
850            union floatInt
851            {
852                float f;
853                uint32_t i;
854            };
855            union doubleInt
856            {
857                double d;
858                uint64_t i;
859            };
860
861            assert(srcSize == destSize);
862            int size = srcSize;
863            int sizeBits = size * 8;
864            assert(srcSize == 4 || srcSize == 8);
865            int items = (ext & 0x1) ? 1: (sizeof(FloatRegBits) / size);
866            uint64_t result = FpDestReg.uqw;
867
868            for (int i = 0; i < items; i++) {
869                int hiIndex = (i + 1) * sizeBits - 1;
870                int loIndex = (i + 0) * sizeBits;
871                uint64_t arg1Bits = bits(FpSrcReg1.uqw, hiIndex, loIndex);
872                uint64_t arg2Bits = bits(FpSrcReg2.uqw, hiIndex, loIndex);
873                uint64_t resBits;
874
875                if (size == 4) {
876                    floatInt arg1, arg2, res;
877                    arg1.i = arg1Bits;
878                    arg2.i = arg2Bits;
879                    res.f = arg1.f / arg2.f;
880                    resBits = res.i;
881                } else {
882                    doubleInt arg1, arg2, res;
883                    arg1.i = arg1Bits;
884                    arg2.i = arg2Bits;
885                    res.d = arg1.d / arg2.d;
886                    resBits = res.i;
887                }
888
889                result = insertBits(result, hiIndex, loIndex, resBits);
890            }
891            FpDestReg.uqw = result;
892        '''
893
894    class Maddi(MediaOp):
895        code = '''
896            assert(srcSize == destSize);
897            int size = srcSize;
898            int sizeBits = size * 8;
899            int items = (ext & 0x1) ? 1: (sizeof(FloatRegBits) / size);
900            uint64_t result = FpDestReg.uqw;
901
902            for (int i = 0; i < items; i++) {
903                int hiIndex = (i + 1) * sizeBits - 1;
904                int loIndex = (i + 0) * sizeBits;
905                uint64_t arg1Bits = bits(FpSrcReg1.uqw, hiIndex, loIndex);
906                uint64_t arg2Bits = bits(FpSrcReg2.uqw, hiIndex, loIndex);
907                uint64_t resBits = arg1Bits + arg2Bits;
908                
909                if (ext & 0x2) {
910                    if (findCarry(sizeBits, resBits, arg1Bits, arg2Bits))
911                        resBits = mask(sizeBits);
912                } else if (ext & 0x4) {
913                    int arg1Sign = bits(arg1Bits, sizeBits - 1);
914                    int arg2Sign = bits(arg2Bits, sizeBits - 1);
915                    int resSign = bits(resBits, sizeBits - 1);
916                    if ((arg1Sign == arg2Sign) && (arg1Sign != resSign)) {
917                        if (resSign == 0)
918                            resBits = (1 << (sizeBits - 1));
919                        else
920                            resBits = mask(sizeBits - 1);
921                    }
922                }
923
924                result = insertBits(result, hiIndex, loIndex, resBits);
925            }
926            FpDestReg.uqw = result;
927        '''
928
929    class Msubi(MediaOp):
930        code = '''
931            assert(srcSize == destSize);
932            int size = srcSize;
933            int sizeBits = size * 8;
934            int items = (ext & 0x1) ? 1: (sizeof(FloatRegBits) / size);
935            uint64_t result = FpDestReg.uqw;
936
937            for (int i = 0; i < items; i++) {
938                int hiIndex = (i + 1) * sizeBits - 1;
939                int loIndex = (i + 0) * sizeBits;
940                uint64_t arg1Bits = bits(FpSrcReg1.uqw, hiIndex, loIndex);
941                uint64_t arg2Bits = bits(FpSrcReg2.uqw, hiIndex, loIndex);
942                uint64_t resBits = arg1Bits - arg2Bits;
943                
944                if (ext & 0x2) {
945                    if (arg2Bits > arg1Bits) {
946                        resBits = 0;
947                    } else if (!findCarry(sizeBits, resBits,
948                                         arg1Bits, ~arg2Bits)) {
949                        resBits = mask(sizeBits);
950                    }
951                } else if (ext & 0x4) {
952                    int arg1Sign = bits(arg1Bits, sizeBits - 1);
953                    int arg2Sign = !bits(arg2Bits, sizeBits - 1);
954                    int resSign = bits(resBits, sizeBits - 1);
955                    if ((arg1Sign == arg2Sign) && (arg1Sign != resSign)) {
956                        if (resSign == 0)
957                            resBits = (1 << (sizeBits - 1));
958                        else
959                            resBits = mask(sizeBits - 1);
960                    }
961                }
962
963                result = insertBits(result, hiIndex, loIndex, resBits);
964            }
965            FpDestReg.uqw = result;
966        '''
967
968    class Mmuli(MediaOp):
969        code = '''
970            int srcBits = srcSize * 8;
971            int destBits = destSize * 8;
972            assert(destBits <= 64);
973            assert(destSize >= srcSize);
974            int items = (ext & 0x1) ? 1: (sizeof(FloatRegBits) / destSize);
975            uint64_t result = FpDestReg.uqw;
976
977            for (int i = 0; i < items; i++) {
978                int offset = 0;
979                if (ext & 16) {
980                    if (ext & 32)
981                        offset = i * (destBits - srcBits);
982                    else
983                        offset = i * (destBits - srcBits) + srcBits;
984                }
985                int srcHiIndex = (i + 1) * srcBits - 1 + offset;
986                int srcLoIndex = (i + 0) * srcBits + offset;
987                uint64_t arg1Bits = bits(FpSrcReg1.uqw, srcHiIndex, srcLoIndex);
988                uint64_t arg2Bits = bits(FpSrcReg2.uqw, srcHiIndex, srcLoIndex);
989                uint64_t resBits;
990
991                if (ext & 0x2) {
992                    int64_t arg1 = arg1Bits |
993                        (0 - (arg1Bits & (1 << (srcBits - 1))));
994                    int64_t arg2 = arg2Bits |
995                        (0 - (arg2Bits & (1 << (srcBits - 1))));
996                    resBits = (uint64_t)(arg1 * arg2);
997                } else {
998                    resBits = arg1Bits * arg2Bits;
999                }
1000
1001                if (ext & 0x4)
1002                    resBits += (1 << (destBits - 1));
1003                
1004                if (ext & 0x8)
1005                    resBits >>= destBits;
1006
1007                int destHiIndex = (i + 1) * destBits - 1;
1008                int destLoIndex = (i + 0) * destBits;
1009                result = insertBits(result, destHiIndex, destLoIndex, resBits);
1010            }
1011            FpDestReg.uqw = result;
1012        '''
1013
1014    class Mavg(MediaOp):
1015        code = '''
1016            assert(srcSize == destSize);
1017            int size = srcSize;
1018            int sizeBits = size * 8;
1019            int items = (ext & 0x1) ? 1: (sizeof(FloatRegBits) / size);
1020            uint64_t result = FpDestReg.uqw;
1021
1022            for (int i = 0; i < items; i++) {
1023                int hiIndex = (i + 1) * sizeBits - 1;
1024                int loIndex = (i + 0) * sizeBits;
1025                uint64_t arg1Bits = bits(FpSrcReg1.uqw, hiIndex, loIndex);
1026                uint64_t arg2Bits = bits(FpSrcReg2.uqw, hiIndex, loIndex);
1027                uint64_t resBits = (arg1Bits + arg2Bits + 1) / 2;
1028                
1029                result = insertBits(result, hiIndex, loIndex, resBits);
1030            }
1031            FpDestReg.uqw = result;
1032        '''
1033
1034    class Msad(MediaOp):
1035        code = '''
1036            int srcBits = srcSize * 8;
1037            int items = sizeof(FloatRegBits) / srcSize;
1038
1039            uint64_t sum = 0;
1040            for (int i = 0; i < items; i++) {
1041                int hiIndex = (i + 1) * srcBits - 1;
1042                int loIndex = (i + 0) * srcBits;
1043                uint64_t arg1Bits = bits(FpSrcReg1.uqw, hiIndex, loIndex);
1044                uint64_t arg2Bits = bits(FpSrcReg2.uqw, hiIndex, loIndex);
1045                int64_t resBits = arg1Bits - arg2Bits;
1046                if (resBits < 0)
1047                    resBits = -resBits;
1048                sum += resBits;
1049            }
1050            FpDestReg.uqw = sum & mask(destSize * 8);
1051        '''
1052
1053    class Msrl(MediaOp):
1054        code = '''
1055
1056            assert(srcSize == destSize);
1057            int size = srcSize;
1058            int sizeBits = size * 8;
1059            int items = (ext & 0x1) ? 1: (sizeof(FloatRegBits) / size);
1060            uint64_t shiftAmt = op2.uqw;
1061            uint64_t result = FpDestReg.uqw;
1062
1063            for (int i = 0; i < items; i++) {
1064                int hiIndex = (i + 1) * sizeBits - 1;
1065                int loIndex = (i + 0) * sizeBits;
1066                uint64_t arg1Bits = bits(FpSrcReg1.uqw, hiIndex, loIndex);
1067                uint64_t resBits;
1068                if (shiftAmt >= sizeBits) {
1069                    resBits = 0;
1070                } else {
1071                    resBits = (arg1Bits >> shiftAmt) &
1072                        mask(sizeBits - shiftAmt);
1073                }
1074
1075                result = insertBits(result, hiIndex, loIndex, resBits);
1076            }
1077            FpDestReg.uqw = result;
1078        '''
1079
1080    class Msra(MediaOp):
1081        code = '''
1082
1083            assert(srcSize == destSize);
1084            int size = srcSize;
1085            int sizeBits = size * 8;
1086            int items = (ext & 0x1) ? 1: (sizeof(FloatRegBits) / size);
1087            uint64_t shiftAmt = op2.uqw;
1088            uint64_t result = FpDestReg.uqw;
1089
1090            for (int i = 0; i < items; i++) {
1091                int hiIndex = (i + 1) * sizeBits - 1;
1092                int loIndex = (i + 0) * sizeBits;
1093                uint64_t arg1Bits = bits(FpSrcReg1.uqw, hiIndex, loIndex);
1094                uint64_t resBits;
1095                if (shiftAmt >= sizeBits) {
1096                    if (bits(arg1Bits, sizeBits - 1))
1097                        resBits = mask(sizeBits);
1098                    else
1099                        resBits = 0;
1100                } else {
1101                    resBits = (arg1Bits >> shiftAmt);
1102                    resBits = resBits |
1103                        (0 - (resBits & (1 << (sizeBits - 1 - shiftAmt))));
1104                }
1105
1106                result = insertBits(result, hiIndex, loIndex, resBits);
1107            }
1108            FpDestReg.uqw = result;
1109        '''
1110
1111    class Msll(MediaOp):
1112        code = '''
1113
1114            assert(srcSize == destSize);
1115            int size = srcSize;
1116            int sizeBits = size * 8;
1117            int items = (ext & 0x1) ? 1: (sizeof(FloatRegBits) / size);
1118            uint64_t shiftAmt = op2.uqw;
1119            uint64_t result = FpDestReg.uqw;
1120
1121            for (int i = 0; i < items; i++) {
1122                int hiIndex = (i + 1) * sizeBits - 1;
1123                int loIndex = (i + 0) * sizeBits;
1124                uint64_t arg1Bits = bits(FpSrcReg1.uqw, hiIndex, loIndex);
1125                uint64_t resBits;
1126                if (shiftAmt >= sizeBits) {
1127                    resBits = 0;
1128                } else {
1129                    resBits = (arg1Bits << shiftAmt);
1130                }
1131
1132                result = insertBits(result, hiIndex, loIndex, resBits);
1133            }
1134            FpDestReg.uqw = result;
1135        '''
1136
1137    class Cvti2f(MediaOp):
1138        def __init__(self, dest, src, \
1139                size = None, destSize = None, srcSize = None, ext = None):
1140            super(Cvti2f, self).__init__(dest, src,\
1141                    "InstRegIndex(0)", size, destSize, srcSize, ext)
1142        code = '''
1143            union floatInt
1144            {
1145                float f;
1146                uint32_t i;
1147            };
1148            union doubleInt
1149            {
1150                double d;
1151                uint64_t i;
1152            };
1153
1154            assert(destSize == 4 || destSize == 8);
1155            assert(srcSize == 4 || srcSize == 8);
1156            int srcSizeBits = srcSize * 8;
1157            int destSizeBits = destSize * 8;
1158            int items;
1159            int srcStart = 0;
1160            int destStart = 0;
1161            if (srcSize == 2 * destSize) {
1162                items = (ext & 0x1) ? 1: sizeof(FloatRegBits) / srcSize;
1163                if (ext & 0x2)
1164                    destStart = destSizeBits * items;
1165            } else if (destSize == 2 * srcSize) {
1166                items = (ext & 0x1) ? 1: sizeof(FloatRegBits) / destSize;
1167                if (ext & 0x2)
1168                    srcStart = srcSizeBits * items;
1169            } else {
1170                items = (ext & 0x1) ? 1: sizeof(FloatRegBits) / destSize;
1171            }
1172            uint64_t result = FpDestReg.uqw;
1173
1174            for (int i = 0; i < items; i++) {
1175                int srcHiIndex = srcStart + (i + 1) * srcSizeBits - 1;
1176                int srcLoIndex = srcStart + (i + 0) * srcSizeBits;
1177                uint64_t argBits = bits(FpSrcReg1.uqw, srcHiIndex, srcLoIndex);
1178                int64_t sArg = argBits | (0 - (argBits & (1 << srcHiIndex)));
1179                double arg = sArg;
1180
1181                if (destSize == 4) {
1182                    floatInt fi;
1183                    fi.f = arg;
1184                    argBits = fi.i;
1185                } else {
1186                    doubleInt di;
1187                    di.d = arg;
1188                    argBits = di.i;
1189                }
1190                int destHiIndex = destStart + (i + 1) * destSizeBits - 1;
1191                int destLoIndex = destStart + (i + 0) * destSizeBits;
1192                result = insertBits(result, destHiIndex, destLoIndex, argBits);
1193            }
1194            FpDestReg.uqw = result;
1195        '''
1196
1197    class Cvtf2f(MediaOp):
1198        def __init__(self, dest, src, \
1199                size = None, destSize = None, srcSize = None, ext = None):
1200            super(Cvtf2f, self).__init__(dest, src,\
1201                    "InstRegIndex(0)", size, destSize, srcSize, ext)
1202        code = '''
1203            union floatInt
1204            {
1205                float f;
1206                uint32_t i;
1207            };
1208            union doubleInt
1209            {
1210                double d;
1211                uint64_t i;
1212            };
1213
1214            assert(destSize == 4 || destSize == 8);
1215            assert(srcSize == 4 || srcSize == 8);
1216            int srcSizeBits = srcSize * 8;
1217            int destSizeBits = destSize * 8;
1218            int items;
1219            int srcStart = 0;
1220            int destStart = 0;
1221            if (srcSize == 2 * destSize) {
1222                items = (ext & 0x1) ? 1: sizeof(FloatRegBits) / srcSize;
1223                if (ext & 0x2)
1224                    destStart = destSizeBits * items;
1225            } else if (destSize == 2 * srcSize) {
1226                items = (ext & 0x1) ? 1: sizeof(FloatRegBits) / destSize;
1227                if (ext & 0x2)
1228                    srcStart = srcSizeBits * items;
1229            } else {
1230                items = (ext & 0x1) ? 1: sizeof(FloatRegBits) / destSize;
1231            }
1232            uint64_t result = FpDestReg.uqw;
1233
1234            for (int i = 0; i < items; i++) {
1235                int srcHiIndex = srcStart + (i + 1) * srcSizeBits - 1;
1236                int srcLoIndex = srcStart + (i + 0) * srcSizeBits;
1237                uint64_t argBits = bits(FpSrcReg1.uqw, srcHiIndex, srcLoIndex);
1238                double arg;
1239
1240                if (srcSize == 4) {
1241                    floatInt fi;
1242                    fi.i = argBits;
1243                    arg = fi.f;
1244                } else {
1245                    doubleInt di;
1246                    di.i = argBits;
1247                    arg = di.d;
1248                }
1249                if (destSize == 4) {
1250                    floatInt fi;
1251                    fi.f = arg;
1252                    argBits = fi.i;
1253                } else {
1254                    doubleInt di;
1255                    di.d = arg;
1256                    argBits = di.i;
1257                }
1258                int destHiIndex = destStart + (i + 1) * destSizeBits - 1;
1259                int destLoIndex = destStart + (i + 0) * destSizeBits;
1260                result = insertBits(result, destHiIndex, destLoIndex, argBits);
1261            }
1262            FpDestReg.uqw = result;
1263        '''
1264
1265    class Mcmpi2r(MediaOp):
1266        code = '''
1267            union floatInt
1268            {
1269                float f;
1270                uint32_t i;
1271            };
1272            union doubleInt
1273            {
1274                double d;
1275                uint64_t i;
1276            };
1277
1278            assert(srcSize == destSize);
1279            int size = srcSize;
1280            int sizeBits = size * 8;
1281            int items = (ext & 0x1) ? 1: (sizeof(FloatRegBits) / size);
1282            uint64_t result = FpDestReg.uqw;
1283
1284            for (int i = 0; i < items; i++) {
1285                int hiIndex = (i + 1) * sizeBits - 1;
1286                int loIndex = (i + 0) * sizeBits;
1287                uint64_t arg1Bits = bits(FpSrcReg1.uqw, hiIndex, loIndex);
1288                int64_t arg1 = arg1Bits |
1289                    (0 - (arg1Bits & (1 << (sizeBits - 1))));
1290                uint64_t arg2Bits = bits(FpSrcReg2.uqw, hiIndex, loIndex);
1291                int64_t arg2 = arg2Bits |
1292                    (0 - (arg2Bits & (1 << (sizeBits - 1))));
1293
1294                uint64_t resBits = 0;
1295                if ((ext & 0x2) == 0 && arg1 == arg2 ||
1296                        (ext & 0x2) == 0x2 && arg1 > arg2)
1297                    resBits = mask(sizeBits);
1298
1299                result = insertBits(result, hiIndex, loIndex, resBits);
1300            }
1301            FpDestReg.uqw = result;
1302        '''
1303}};
1304