microasm.isa revision 4336:bd6ab22f8e11
1// -*- mode:c++ -*-
2
3// Copyright (c) 2007 The Hewlett-Packard Development Company
4// All rights reserved.
5//
6// Redistribution and use of this software in source and binary forms,
7// with or without modification, are permitted provided that the
8// following conditions are met:
9//
10// The software must be used only for Non-Commercial Use which means any
11// use which is NOT directed to receiving any direct monetary
12// compensation for, or commercial advantage from such use.  Illustrative
13// examples of non-commercial use are academic research, personal study,
14// teaching, education and corporate research & development.
15// Illustrative examples of commercial use are distributing products for
16// commercial advantage and providing services using the software for
17// commercial advantage.
18//
19// If you wish to use this software or functionality therein that may be
20// covered by patents for commercial use, please contact:
21//     Director of Intellectual Property Licensing
22//     Office of Strategy and Technology
23//     Hewlett-Packard Company
24//     1501 Page Mill Road
25//     Palo Alto, California  94304
26//
27// Redistributions of source code must retain the above copyright notice,
28// this list of conditions and the following disclaimer.  Redistributions
29// in binary form must reproduce the above copyright notice, this list of
30// conditions and the following disclaimer in the documentation and/or
31// other materials provided with the distribution.  Neither the name of
32// the COPYRIGHT HOLDER(s), HEWLETT-PACKARD COMPANY, nor the names of its
33// contributors may be used to endorse or promote products derived from
34// this software without specific prior written permission.  No right of
35// sublicense is granted herewith.  Derivatives of the software and
36// output created using the software may be prepared, but only for
37// Non-Commercial Uses.  Derivatives of the software may be shared with
38// others provided: (i) the others agree to abide by the list of
39// conditions herein which includes the Non-Commercial Use restrictions;
40// and (ii) such Derivatives of the software include the above copyright
41// notice to acknowledge the contribution from this software where
42// applicable, this list of conditions and the disclaimer below.
43//
44// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
45// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
46// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
47// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
48// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
49// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
50// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
51// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
52// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
53// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
54// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
55//
56// Authors: Gabe Black
57
58////////////////////////////////////////////////////////////////////
59//
60//  Code to "specialize" a microcode sequence to use a particular
61//  variety of operands
62//
63
64let {{
65    # This builds either a regular or macro op to implement the sequence of
66    # ops we give it.
67    def genInst(name, Name, ops):
68        # If we can implement this instruction with exactly one microop, just
69        # use that directly.
70        newStmnt = ''
71        if len(ops) == 1:
72            decode_block = "return (X86StaticInst *)(%s);" % \
73                            ops[0].getAllocator()
74            return ('', '', decode_block, '')
75        else:
76            # Build a macroop to contain the sequence of microops we've
77            # been given.
78            return genMacroOp(name, Name, ops)
79}};
80
81let {{
82    # This code builds up a decode block which decodes based on switchval.
83    # vals is a dict which matches case values with what should be decoded to.
84    # builder is called on the exploded contents of "vals" values to generate
85    # whatever code should be used.
86    def doSplitDecode(name, Name, builder, switchVal, vals, default = None):
87        header_output = ''
88        decoder_output = ''
89        decode_block = 'switch(%s) {\n' % switchVal
90        exec_output = ''
91        for (val, todo) in vals.items():
92            (new_header_output,
93             new_decoder_output,
94             new_decode_block,
95             new_exec_output) = builder(name, Name, *todo)
96            header_output += new_header_output
97            decoder_output += new_decoder_output
98            decode_block += '\tcase %s: %s\n' % (val, new_decode_block)
99            exec_output += new_exec_output
100        if default:
101            (new_header_output,
102             new_decoder_output,
103             new_decode_block,
104             new_exec_output) = builder(name, Name, *default)
105            header_output += new_header_output
106            decoder_output += new_decoder_output
107            decode_block += '\tdefault: %s\n' % new_decode_block
108            exec_output += new_exec_output
109        decode_block += '}\n'
110        return (header_output, decoder_output, decode_block, exec_output)
111}};
112
113let {{
114    class OpType(object):
115        parser = re.compile(r"(?P<tag>[A-Z][A-Z]*)(?P<size>[a-z][a-z]*)|(r(?P<reg>[A-Za-z0-9][A-Za-z0-9]*))")
116        def __init__(self, opTypeString):
117            match = OpType.parser.search(opTypeString)
118            if match == None:
119                raise Exception, "Problem parsing operand type %s" % opTypeString
120            self.reg = match.group("reg")
121            self.tag = match.group("tag")
122            self.size = match.group("size")
123}};
124
125let {{
126
127    # This function specializes the given piece of code to use a particular
128    # set of argument types described by "opTypes". These are "implemented"
129    # in reverse order.
130    def specializeInst(name, Name, code, opTypes):
131        opNum = len(opTypes) - 1
132        while len(opTypes):
133            # print "Building a composite op with tags", opTypes
134            # print "And code", code
135            opNum = len(opTypes) - 1
136            # A regular expression to find the operand placeholders we're
137            # interested in.
138            opRe = re.compile("%%(?P<operandNum>%d)(?=[^0-9]|$)" % opNum)
139
140            # Parse the operand type strign we're working with
141            print "About to parse tag %s" % opTypes[opNum]
142            opType = OpType(opTypes[opNum])
143
144            if opType.reg:
145                #Figure out what to do with fixed register operands
146                if opType.reg in ("Ax", "Bx", "Cx", "Dx"):
147                    code = opRe.sub("{INTREG_R%s}" % opType.reg.upper(), code)
148                elif opType.reg == "Al":
149                    # We need a way to specify register width
150                    code = opRe.sub("{INTREG_RAX}", code)
151                else:
152                    print "Didn't know how to encode fixed register %s!" % opType.reg
153            elif opType.tag == None or opType.size == None:
154                raise Exception, "Problem parsing operand tag: %s" % opType.tag
155            elif opType.tag in ("C", "D", "G", "P", "S", "T", "V"):
156                # Use the "reg" field of the ModRM byte to select the register
157                code = opRe.sub("{(uint8_t)MODRM_REG}", code)
158            elif opType.tag in ("E", "Q", "W"):
159                # This might refer to memory or to a register. We need to
160                # divide it up farther.
161                regCode = opRe.sub("{(uint8_t)MODRM_RM}", code)
162                regTypes = copy.copy(opTypes)
163                regTypes.pop(-1)
164                # This needs to refer to memory, but we'll fill in the details
165                # later. It needs to take into account unaligned memory
166                # addresses.
167                memCode = opRe.sub("0", code)
168                memTypes = copy.copy(opTypes)
169                memTypes.pop(-1)
170                return doSplitDecode(name, Name, specializeInst, "MODRM_MOD",
171                    {"3" : (regCode, regTypes)}, (memCode, memTypes))
172            elif opType.tag in ("I", "J"):
173                # Immediates are already in the instruction, so don't leave in
174                # those parameters
175                code = opRe.sub("", code)
176            elif opType.tag == "M":
177                # This needs to refer to memory, but we'll fill in the details
178                # later. It needs to take into account unaligned memory
179                # addresses.
180                code = opRe.sub("0", code)
181            elif opType.tag in ("PR", "R", "VR"):
182                # There should probably be a check here to verify that mod
183                # is equal to 11b
184                code = opRe.sub("{(uint8_t)MODRM_RM}", code)
185            else:
186                raise Exception, "Unrecognized tag %s." % opType.tag
187            opTypes.pop(-1)
188
189        # At this point, we've built up "code" to have all the necessary extra
190        # instructions needed to implement whatever types of operands were
191        # specified. Now we'll assemble it it into a microOp sequence.
192        ops = assembleMicro(code)
193
194        # Build a macroop to contain the sequence of microops we've
195        # constructed. The decode block will be used to fill in our
196        # inner decode structure, and the rest will be concatenated and
197        # passed back.
198        return genInst(name, Name, ops)
199}};
200
201////////////////////////////////////////////////////////////////////
202//
203//  The microcode assembler
204//
205
206let {{
207    class MicroOpStatement(object):
208        def __init__(self):
209            self.className = ''
210            self.label = ''
211            self.args = []
212
213        # This converts a list of python bools into
214        # a comma seperated list of C++ bools.
215        def microFlagsText(self, vals):
216            text = ""
217            for val in vals:
218                if val:
219                    text += ", true"
220                else:
221                    text += ", false"
222            return text
223
224        def getAllocator(self, *microFlags):
225            args = ''
226            for arg in self.args:
227                if arg.has_key("operandConst"):
228                    args += ", %s" % arg["operandConst"]
229                elif arg.has_key("operandCode"):
230                    args += ", %s" % arg["operandCode"]
231                elif arg.has_key("operandLabel"):
232                    raise Exception, "Found a label while creating allocator string."
233                else:
234                    raise Exception, "Unrecognized operand type."
235            return 'new %s(machInst%s%s)' % (self.className, self.microFlagsText(microFlags), args)
236}};
237
238let {{
239    def buildLabelDict(ops):
240        labels = {}
241        micropc = 0
242        for op in ops:
243            if op.label:
244                labels[op.label] = count
245            micropc += 1
246        return labels
247}};
248
249let{{
250    def assembleMicro(code):
251        # This function takes in a block of microcode assembly and returns
252        # a python list of objects which describe it.
253
254        # Keep this around in case we need it later
255        orig_code = code
256        # A list of the statements we've found thus far
257        statements = []
258
259        # Regular expressions to pull each piece of the statement out at a
260        # time. Each expression expects the thing it's looking for to be at
261        # the beginning of the line, so the previous component is stripped
262        # before continuing.
263        labelRe = re.compile(r'^[ \t]*(?P<label>[a-zA-Z_]\w*)[ \t]:')
264        lineRe = re.compile(r'^(?P<line>[^\n][^\n]*)$')
265        classRe = re.compile(r'^[ \t]*(?P<className>[a-zA-Z_]\w*)')
266        # This recognizes three different flavors of operands:
267        # 1. Raw decimal numbers composed of digits between 0 and 9
268        # 2. Code beginning with "{" and continuing until the first "}"
269        #         ^ This one might need revising
270        # 3. A label, which starts with a capital or small letter, or
271        #    underscore, which is optionally followed by a sequence of
272        #    capital or small letters, underscores, or digts between 0 and 9
273        opRe = re.compile( \
274            r'^[ \t]*((?P<operandLabel>[a-zA-Z_]\w*)|(?P<operandConst>[0-9][0-9]*)|(\{(?P<operandCode>[^}]*)\}))')
275        lineMatch = lineRe.search(code)
276        while lineMatch != None:
277            statement = MicroOpStatement()
278            # Get a line and seperate it from the rest of the code
279            line = lineMatch.group("line")
280            orig_line = line
281            # print "Parsing line %s" % line
282            code = lineRe.sub('', code, 1)
283
284            # Find the label, if any
285            labelMatch = labelRe.search(line)
286            if labelMatch != None:
287                statement.label = labelMatch.group("label")
288                # print "Found label %s." % statement.label
289            # Clear the label from the statement
290            line = labelRe.sub('', line, 1)
291
292            # Find the class name which is roughly equivalent to the op name
293            classMatch = classRe.search(line)
294            if classMatch == None:
295                raise Exception, "Couldn't find class name in statement: %s" \
296                        % orig_line
297            else:
298                statement.className = classMatch.group("className")
299                # print "Found class name %s." % statement.className
300
301            # Clear the class name from the statement
302            line = classRe.sub('', line, 1)
303
304            #Find as many arguments as you can
305            statement.args = []
306            opMatch = opRe.search(line)
307            while opMatch is not None:
308                statement.args.append({})
309                # args is a list of dicts which collect different
310                # representations of operand values. Different forms might be
311                # needed in different places, for instance to replace a label
312                # with an offset.
313                for opType in ("operandLabel", "operandConst", "operandCode"):
314                    if opMatch.group(opType):
315                        statement.args[-1][opType] = opMatch.group(opType)
316                if len(statement.args[-1]) == 0:
317                    print "Problem parsing operand in statement: %s" \
318                            % orig_line
319                line = opRe.sub('', line, 1)
320                # print "Found operand %s." % statement.args[-1]
321                opMatch = opRe.search(line)
322            # print "Found operands", statement.args
323
324            # Add this statement to our collection
325            statements.append(statement)
326
327            # Get the next line
328            lineMatch = lineRe.search(code)
329
330        # Decode the labels into displacements
331        labels = buildLabelDict(statements)
332        micropc = 0
333        for statement in statements:
334            for arg in statement.args:
335                if arg.has_key("operandLabel"):
336                    if not labels.has_key(arg["operandLabel"]):
337                        raise Exception, "Unrecognized label: %s." % arg["operandLabel"]
338                    # This is assuming that intra microcode branches go to
339                    # the next micropc + displacement, or
340                    # micropc + 1 + displacement.
341                    arg["operandConst"] = labels[arg["operandLabel"]] - micropc - 1
342            micropc += 1
343        return statements
344}};
345