mult.isa revision 7160:3f4333b1d4af
14486Sbinkertn@umich.edu// -*- mode:c++ -*-
24486Sbinkertn@umich.edu
34486Sbinkertn@umich.edu// Copyright (c) 2010 ARM Limited
44486Sbinkertn@umich.edu// All rights reserved
54486Sbinkertn@umich.edu//
64486Sbinkertn@umich.edu// The license below extends only to copyright in the software and shall
74486Sbinkertn@umich.edu// not be construed as granting a license to any other intellectual
84486Sbinkertn@umich.edu// property including but not limited to intellectual property relating
94486Sbinkertn@umich.edu// to a hardware implementation of the functionality of the software
104486Sbinkertn@umich.edu// licensed hereunder.  You may use the software subject to the license
114486Sbinkertn@umich.edu// terms below provided that you ensure that this notice is replicated
124486Sbinkertn@umich.edu// unmodified and in its entirety in all distributions of the software,
134486Sbinkertn@umich.edu// modified or unmodified, in source code or in binary form.
144486Sbinkertn@umich.edu//
154486Sbinkertn@umich.edu// Redistribution and use in source and binary forms, with or without
164486Sbinkertn@umich.edu// modification, are permitted provided that the following conditions are
174486Sbinkertn@umich.edu// met: redistributions of source code must retain the above copyright
184486Sbinkertn@umich.edu// notice, this list of conditions and the following disclaimer;
194486Sbinkertn@umich.edu// redistributions in binary form must reproduce the above copyright
204486Sbinkertn@umich.edu// notice, this list of conditions and the following disclaimer in the
214486Sbinkertn@umich.edu// documentation and/or other materials provided with the distribution;
224486Sbinkertn@umich.edu// neither the name of the copyright holders nor the names of its
234486Sbinkertn@umich.edu// contributors may be used to endorse or promote products derived from
244486Sbinkertn@umich.edu// this software without specific prior written permission.
254486Sbinkertn@umich.edu//
264486Sbinkertn@umich.edu// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
274486Sbinkertn@umich.edu// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
284486Sbinkertn@umich.edu// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
294486Sbinkertn@umich.edu// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
304486Sbinkertn@umich.edu// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
314486Sbinkertn@umich.edu// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
324486Sbinkertn@umich.edu// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
334486Sbinkertn@umich.edu// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
344486Sbinkertn@umich.edu// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
354486Sbinkertn@umich.edu// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
364486Sbinkertn@umich.edu// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
374486Sbinkertn@umich.edu//
387580SAli.Saidi@arm.com// Authors: Gabe Black
394486Sbinkertn@umich.edu
404486Sbinkertn@umich.edulet {{
414486Sbinkertn@umich.edu
424486Sbinkertn@umich.edu    header_output = ""
434486Sbinkertn@umich.edu    decoder_output = ""
444486Sbinkertn@umich.edu    exec_output = ""
454486Sbinkertn@umich.edu
464486Sbinkertn@umich.edu    calcQCode = '''
474486Sbinkertn@umich.edu        cprintf("canOverflow: %%d\\n", Reg0 < resTemp);
484486Sbinkertn@umich.edu        replaceBits(CondCodes, 27, Reg0 < resTemp);
494486Sbinkertn@umich.edu    '''
504486Sbinkertn@umich.edu
514486Sbinkertn@umich.edu    calcCcCode = '''
524486Sbinkertn@umich.edu        uint16_t _iz, _in;
534486Sbinkertn@umich.edu        _in = (resTemp >> %(negBit)d) & 1;
54        _iz = ((%(zType)s)resTemp == 0);
55
56        CondCodes =  _in << 31 | _iz << 30 | (CondCodes & 0x3FFFFFFF);
57
58        DPRINTF(Arm, "(in, iz) = (%%d, %%d)\\n", _in, _iz);
59       '''
60
61    def buildMultInst(mnem, doCc, unCc, regs, code, flagType):
62        global header_output, decoder_output, exec_output
63        cCode = carryCode[flagType]
64        vCode = overflowCode[flagType]
65        zType = "uint32_t"
66        negBit = 31
67        if flagType == "llbit":
68            zType = "uint64_t"
69            negBit = 63
70        if flagType == "overflow":
71            ccCode = calcQCode
72        else:
73            ccCode = calcCcCode % {
74                "negBit": negBit,
75                "zType": zType
76            }
77
78        if not regs in (3, 4):
79            raise Exception, "Multiplication instructions with %d " + \
80                             "registers are not implemented"
81
82        if regs == 3:
83            base = 'Mult3'
84        else:
85            base = 'Mult4'
86
87        Name = "New" + mnem.capitalize()
88
89        if unCc:
90            iop = InstObjParams(mnem, Name, base,
91                                {"code" : code,
92                                 "predicate_test": predicateTest})
93        if doCc:
94            iopCc = InstObjParams(mnem + "s", Name + "Cc", base,
95                                  {"code" : code + ccCode,
96                                   "predicate_test": predicateTest})
97
98        if regs == 3:
99            declare = Mult3Declare
100            constructor = Mult3Constructor
101        else:
102            declare = Mult4Declare
103            constructor = Mult4Constructor
104
105        if unCc:
106            header_output += declare.subst(iop)
107            decoder_output += constructor.subst(iop)
108            exec_output += PredOpExecute.subst(iop)
109        if doCc:
110            header_output += declare.subst(iopCc)
111            decoder_output += constructor.subst(iopCc)
112            exec_output += PredOpExecute.subst(iopCc)
113
114    def buildMult3Inst(mnem, code, flagType = "logic"):
115        buildMultInst(mnem, True, True, 3, code, flagType)
116
117    def buildMult3InstCc(mnem, code, flagType = "logic"):
118        buildMultInst(mnem, True, False, 3, code, flagType)
119
120    def buildMult3InstUnCc(mnem, code, flagType = "logic"):
121        buildMultInst(mnem, False, True, 3, code, flagType)
122
123    def buildMult4Inst(mnem, code, flagType = "logic"):
124        buildMultInst(mnem, True, True, 4, code, flagType)
125
126    def buildMult4InstCc(mnem, code, flagType = "logic"):
127        buildMultInst(mnem, True, False, 4, code, flagType)
128
129    def buildMult4InstUnCc(mnem, code, flagType = "logic"):
130        buildMultInst(mnem, False, True, 4, code, flagType)
131
132    buildMult4Inst    ("mla", "Reg0 = resTemp = Reg1 * Reg2 + Reg3;")
133    buildMult4InstUnCc("mls", "Reg0 = resTemp = Reg3 - Reg1 * Reg2;")
134    buildMult3Inst    ("mul", "Reg0 = resTemp = Reg1 * Reg2;")
135    buildMult4InstCc  ("smlabb", '''Reg0 = resTemp =
136                                        sext<16>(bits(Reg1, 15, 0)) *
137                                        sext<16>(bits(Reg2, 15, 0)) +
138                                        Reg3.sw;
139                                 ''', "overflow")
140    buildMult4InstCc  ("smlabt", '''Reg0 = resTemp =
141                                        sext<16>(bits(Reg1, 15, 0)) *
142                                        sext<16>(bits(Reg2, 31, 16)) +
143                                        Reg3.sw;
144                                 ''', "overflow")
145    buildMult4InstCc  ("smlatb", '''Reg0 = resTemp =
146                                        sext<16>(bits(Reg1, 31, 16)) *
147                                        sext<16>(bits(Reg2, 15, 0)) +
148                                        Reg3.sw;
149                                 ''', "overflow")
150    buildMult4InstCc  ("smlatt", '''Reg0 = resTemp =
151                                        sext<16>(bits(Reg1, 31, 16)) *
152                                        sext<16>(bits(Reg2, 31, 16)) +
153                                        Reg3.sw;
154                                 ''', "overflow")
155    buildMult4InstCc  ("smlad", '''Reg0 = resTemp =
156                                        sext<16>(bits(Reg1, 31, 16)) *
157                                        sext<16>(bits(Reg2, 31, 16)) +
158                                        sext<16>(bits(Reg1, 15, 0)) *
159                                        sext<16>(bits(Reg2, 15, 0)) +
160                                        Reg3.sw;
161                                ''', "overflow")
162    buildMult4InstCc  ("smladx", '''Reg0 = resTemp =
163                                         sext<16>(bits(Reg1, 31, 16)) *
164                                         sext<16>(bits(Reg2, 15, 0)) +
165                                         sext<16>(bits(Reg1, 15, 0)) *
166                                         sext<16>(bits(Reg2, 31, 16)) +
167                                         Reg3.sw;
168                                 ''', "overflow")
169    buildMult4Inst    ("smlal", '''resTemp = sext<32>(Reg2) * sext<32>(Reg3) +
170                                       (int64_t)((Reg1.ud << 32) | Reg0.ud);
171                                   Reg0.ud = (uint32_t)resTemp;
172                                   Reg1.ud = (uint32_t)(resTemp >> 32);
173                                ''', "llbit")
174    buildMult4InstUnCc("smlalbb", '''resTemp = sext<16>(bits(Reg2, 15, 0)) *
175                                               sext<16>(bits(Reg3, 15, 0)) +
176                                               (int64_t)((Reg1.ud << 32) |
177                                                         Reg0.ud);
178                                     Reg0.ud = (uint32_t)resTemp;
179                                     Reg1.ud = (uint32_t)(resTemp >> 32);
180                                  ''')
181    buildMult4InstUnCc("smlalbt", '''resTemp = sext<16>(bits(Reg2, 15, 0)) *
182                                               sext<16>(bits(Reg3, 31, 16)) +
183                                               (int64_t)((Reg1.ud << 32) |
184                                                         Reg0.ud);
185                                     Reg0.ud = (uint32_t)resTemp;
186                                     Reg1.ud = (uint32_t)(resTemp >> 32);
187                                  ''')
188    buildMult4InstUnCc("smlaltb", '''resTemp = sext<16>(bits(Reg2, 31, 16)) *
189                                               sext<16>(bits(Reg3, 15, 0)) +
190                                               (int64_t)((Reg1.ud << 32) |
191                                                         Reg0.ud);
192                                     Reg0.ud = (uint32_t)resTemp;
193                                     Reg1.ud = (uint32_t)(resTemp >> 32);
194                                  ''')
195    buildMult4InstUnCc("smlaltt", '''resTemp = sext<16>(bits(Reg2, 31, 16)) *
196                                               sext<16>(bits(Reg3, 31, 16)) +
197                                               (int64_t)((Reg1.ud << 32) |
198                                                         Reg0.ud);
199                                     Reg0.ud = (uint32_t)resTemp;
200                                     Reg1.ud = (uint32_t)(resTemp >> 32);
201                                  ''')
202    buildMult4InstUnCc("smlald", '''resTemp =
203                                        sext<16>(bits(Reg2, 31, 16)) *
204                                        sext<16>(bits(Reg3, 31, 16)) +
205                                        sext<16>(bits(Reg2, 15, 0)) *
206                                        sext<16>(bits(Reg3, 15, 0)) +
207                                        (int64_t)((Reg1.ud << 32) |
208                                                  Reg0.ud);
209                                    Reg0.ud = (uint32_t)resTemp;
210                                    Reg1.ud = (uint32_t)(resTemp >> 32);
211                                 ''')
212    buildMult4InstUnCc("smlaldx", '''resTemp =
213                                         sext<16>(bits(Reg2, 31, 16)) *
214                                         sext<16>(bits(Reg3, 15, 0)) +
215                                         sext<16>(bits(Reg2, 15, 0)) *
216                                         sext<16>(bits(Reg3, 31, 16)) +
217                                         (int64_t)((Reg1.ud << 32) |
218                                                   Reg0.ud);
219                                     Reg0.ud = (uint32_t)resTemp;
220                                     Reg1.ud = (uint32_t)(resTemp >> 32);
221                                  ''')
222    buildMult4InstCc  ("smlawb", '''Reg0 = resTemp =
223                                        (Reg1.sw *
224                                         sext<16>(bits(Reg2, 15, 0)) +
225                                         (Reg3.sw << 16)) >> 16;
226                                 ''', "overflow")
227    buildMult4InstCc  ("smlawt", '''Reg0 = resTemp =
228                                        (Reg1.sw *
229                                         sext<16>(bits(Reg2, 31, 16)) +
230                                         (Reg3.sw << 16)) >> 16;
231                                 ''', "overflow")
232    buildMult4InstCc  ("smlsd", '''Reg0 = resTemp =
233                                       sext<16>(bits(Reg1, 15, 0)) *
234                                       sext<16>(bits(Reg2, 15, 0)) -
235                                       sext<16>(bits(Reg1, 31, 16)) *
236                                       sext<16>(bits(Reg2, 31, 16)) +
237                                       Reg3.sw;
238                                ''', "overflow")
239    buildMult4InstCc  ("smlsdx", '''Reg0 = resTemp =
240                                        sext<16>(bits(Reg1, 15, 0)) *
241                                        sext<16>(bits(Reg2, 31, 16)) -
242                                        sext<16>(bits(Reg1, 31, 16)) *
243                                        sext<16>(bits(Reg2, 15, 0)) +
244                                        Reg3.sw;
245                                 ''', "overflow")
246    buildMult4InstUnCc("smlsld", '''resTemp =
247                                        sext<16>(bits(Reg2, 15, 0)) *
248                                        sext<16>(bits(Reg3, 15, 0)) -
249                                        sext<16>(bits(Reg2, 31, 16)) *
250                                        sext<16>(bits(Reg3, 31, 16)) +
251                                        (int64_t)((Reg1.ud << 32) |
252                                                  Reg0.ud);
253                                    Reg0.ud = (uint32_t)resTemp;
254                                    Reg1.ud = (uint32_t)(resTemp >> 32);
255                                 ''')
256    buildMult4InstUnCc("smlsldx", '''resTemp =
257                                         sext<16>(bits(Reg2, 15, 0)) *
258                                         sext<16>(bits(Reg3, 31, 16)) -
259                                         sext<16>(bits(Reg2, 31, 16)) *
260                                         sext<16>(bits(Reg3, 15, 0)) +
261                                         (int64_t)((Reg1.ud << 32) |
262                                                   Reg0.ud);
263                                     Reg0.ud = (uint32_t)resTemp;
264                                     Reg1.ud = (uint32_t)(resTemp >> 32);
265                                  ''')
266    buildMult4InstUnCc("smmla", '''Reg0 = resTemp =
267                                       ((int64_t)(Reg3.ud << 32) +
268                                        Reg1.sw * Reg2.sw) >> 32;
269                                ''')
270    buildMult4InstUnCc("smmlar", '''Reg0 = resTemp =
271                                        ((int64_t)(Reg3.ud << 32) +
272                                         Reg1.sw * Reg2.sw +
273                                         ULL(0x80000000)) >> 32;
274                                 ''')
275    buildMult4InstUnCc("smmls", '''Reg0 = resTemp =
276                                       ((int64_t)(Reg3.ud << 32) -
277                                        Reg1.sw * Reg2.sw) >> 32;
278                                ''')
279    buildMult4InstUnCc("smmlsr", '''Reg0 = resTemp =
280                                        ((int64_t)(Reg3.ud << 32) -
281                                         Reg1.sw * Reg2.sw +
282                                         ULL(0x80000000)) >> 32;
283                                 ''')
284    buildMult3InstUnCc("smmul", '''Reg0 = resTemp =
285                                       ((int64_t)Reg1 *
286                                        (int64_t)Reg2) >> 32;
287                                ''')
288    buildMult3InstUnCc("smmulr", '''Reg0 = resTemp =
289                                        ((int64_t)Reg1 *
290                                         (int64_t)Reg2 +
291                                         ULL(0x80000000)) >> 32;
292                                 ''')
293    buildMult3InstCc  ("smuad", '''Reg0 = resTemp =
294                                        sext<16>(bits(Reg1, 15, 0)) *
295                                        sext<16>(bits(Reg2, 15, 0)) +
296                                        sext<16>(bits(Reg1, 31, 16)) *
297                                        sext<16>(bits(Reg2, 31, 16));
298                                ''', "overflow")
299    buildMult3InstCc  ("smuadx", '''Reg0 = resTemp =
300                                        sext<16>(bits(Reg1, 15, 0)) *
301                                        sext<16>(bits(Reg2, 31, 16)) +
302                                        sext<16>(bits(Reg1, 31, 16)) *
303                                        sext<16>(bits(Reg2, 15, 0));
304                                 ''', "overflow")
305    buildMult3InstUnCc("smulbb", '''Reg0 = resTemp =
306                                         sext<16>(bits(Reg1, 15, 0)) *
307                                         sext<16>(bits(Reg2, 15, 0));
308                                 ''')
309    buildMult3InstUnCc("smulbt", '''Reg0 = resTemp =
310                                         sext<16>(bits(Reg1, 31, 16)) *
311                                         sext<16>(bits(Reg2, 15, 0));
312                                 ''')
313    buildMult3InstUnCc("smultb", '''Reg0 = resTemp =
314                                         sext<16>(bits(Reg1, 15, 0)) *
315                                         sext<16>(bits(Reg2, 31, 16));
316                                 ''')
317    buildMult3InstUnCc("smultt", '''Reg0 = resTemp =
318                                         sext<16>(bits(Reg1, 31, 16)) *
319                                         sext<16>(bits(Reg2, 31, 16));
320                                 ''')
321    buildMult4Inst    ("smull", '''resTemp = Reg2.sw * Reg3.sw;
322                                   Reg0 = (int32_t)resTemp;
323                                   Reg1 = (int32_t)(resTemp >> 32);
324                                ''', "llbit")
325    buildMult3InstUnCc("smulwb", '''Reg0 = resTemp =
326                                        (Reg1.sw *
327                                         sext<16>(bits(Reg2, 15, 0))) >> 16;
328                                 ''')
329    buildMult3InstUnCc("smulwt", '''Reg0 = resTemp =
330                                        (Reg1.sw *
331                                         sext<16>(bits(Reg2, 31, 16))) >> 16;
332                                 ''')
333    buildMult3InstUnCc("smusd", '''Reg0 = resTemp =
334                                        sext<16>(bits(Reg1, 15, 0)) *
335                                        sext<16>(bits(Reg2, 15, 0)) -
336                                        sext<16>(bits(Reg1, 31, 16)) *
337                                        sext<16>(bits(Reg2, 31, 16));
338                                ''')
339    buildMult3InstUnCc("smusdx", '''Reg0 = resTemp =
340                                        sext<16>(bits(Reg1, 15, 0)) *
341                                        sext<16>(bits(Reg2, 31, 16)) -
342                                        sext<16>(bits(Reg1, 31, 16)) *
343                                        sext<16>(bits(Reg2, 15, 0));
344                                 ''')
345    buildMult4InstUnCc("umaal", '''resTemp = Reg2.ud * Reg3.ud +
346                                             Reg0.ud + Reg1.ud;
347                                   Reg0.ud = (uint32_t)resTemp;
348                                   Reg1.ud = (uint32_t)(resTemp >> 32);
349                                ''')
350    buildMult4Inst    ("umlal", '''resTemp = Reg2.ud * Reg3.ud + Reg0.ud +
351                                             (Reg1.ud << 32);
352                                   Reg0.ud = (uint32_t)resTemp;
353                                   Reg1.ud = (uint32_t)(resTemp >> 32);
354                                ''', "llbit")
355    buildMult4Inst    ("umull", '''resTemp = Reg2.ud * Reg3.ud;
356                                   Reg0 = (uint32_t)resTemp;
357                                   Reg1 = (uint32_t)(resTemp >> 32);
358                                ''', "llbit")
359}};
360