mult.isa (7422:feddb9077def) mult.isa (7760:e93e7e0caae1)
1// -*- mode:c++ -*-
2
3// Copyright (c) 2010 ARM Limited
4// All rights reserved
5//
6// The license below extends only to copyright in the software and shall
7// not be construed as granting a license to any other intellectual
8// property including but not limited to intellectual property relating
9// to a hardware implementation of the functionality of the software
10// licensed hereunder. You may use the software subject to the license
11// terms below provided that you ensure that this notice is replicated
12// unmodified and in its entirety in all distributions of the software,
13// modified or unmodified, in source code or in binary form.
14//
15// Redistribution and use in source and binary forms, with or without
16// modification, are permitted provided that the following conditions are
17// met: redistributions of source code must retain the above copyright
18// notice, this list of conditions and the following disclaimer;
19// redistributions in binary form must reproduce the above copyright
20// notice, this list of conditions and the following disclaimer in the
21// documentation and/or other materials provided with the distribution;
22// neither the name of the copyright holders nor the names of its
23// contributors may be used to endorse or promote products derived from
24// this software without specific prior written permission.
25//
26// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
27// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
28// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
29// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
30// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
31// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
32// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
33// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
34// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
35// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
36// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
37//
38// Authors: Gabe Black
39
40let {{
41
42 header_output = ""
43 decoder_output = ""
44 exec_output = ""
45
46 calcQCode = '''
47 CondCodes = CondCodes | ((resTemp & 1) << 27);
48 '''
49
50 calcCcCode = '''
51 uint16_t _iz, _in;
52 _in = (resTemp >> %(negBit)d) & 1;
53 _iz = ((%(zType)s)resTemp == 0);
54
55 CondCodes = _in << 31 | _iz << 30 | (CondCodes & 0x3FFFFFFF);
56
57 DPRINTF(Arm, "(in, iz) = (%%d, %%d)\\n", _in, _iz);
58 '''
59
60 def buildMultInst(mnem, doCc, unCc, regs, code, flagType):
61 global header_output, decoder_output, exec_output
62 cCode = carryCode[flagType]
63 vCode = overflowCode[flagType]
64 zType = "uint32_t"
65 negBit = 31
66 if flagType == "llbit":
67 zType = "uint64_t"
68 negBit = 63
69 if flagType == "overflow":
70 ccCode = calcQCode
71 else:
72 ccCode = calcCcCode % {
73 "negBit": negBit,
74 "zType": zType
75 }
76
77 if not regs in (3, 4):
78 raise Exception, "Multiplication instructions with %d " + \
79 "registers are not implemented"
80
81 if regs == 3:
82 base = 'Mult3'
83 else:
84 base = 'Mult4'
85
86 Name = mnem.capitalize()
87
88 if unCc:
89 iop = InstObjParams(mnem, Name, base,
90 {"code" : code,
1// -*- mode:c++ -*-
2
3// Copyright (c) 2010 ARM Limited
4// All rights reserved
5//
6// The license below extends only to copyright in the software and shall
7// not be construed as granting a license to any other intellectual
8// property including but not limited to intellectual property relating
9// to a hardware implementation of the functionality of the software
10// licensed hereunder. You may use the software subject to the license
11// terms below provided that you ensure that this notice is replicated
12// unmodified and in its entirety in all distributions of the software,
13// modified or unmodified, in source code or in binary form.
14//
15// Redistribution and use in source and binary forms, with or without
16// modification, are permitted provided that the following conditions are
17// met: redistributions of source code must retain the above copyright
18// notice, this list of conditions and the following disclaimer;
19// redistributions in binary form must reproduce the above copyright
20// notice, this list of conditions and the following disclaimer in the
21// documentation and/or other materials provided with the distribution;
22// neither the name of the copyright holders nor the names of its
23// contributors may be used to endorse or promote products derived from
24// this software without specific prior written permission.
25//
26// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
27// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
28// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
29// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
30// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
31// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
32// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
33// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
34// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
35// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
36// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
37//
38// Authors: Gabe Black
39
40let {{
41
42 header_output = ""
43 decoder_output = ""
44 exec_output = ""
45
46 calcQCode = '''
47 CondCodes = CondCodes | ((resTemp & 1) << 27);
48 '''
49
50 calcCcCode = '''
51 uint16_t _iz, _in;
52 _in = (resTemp >> %(negBit)d) & 1;
53 _iz = ((%(zType)s)resTemp == 0);
54
55 CondCodes = _in << 31 | _iz << 30 | (CondCodes & 0x3FFFFFFF);
56
57 DPRINTF(Arm, "(in, iz) = (%%d, %%d)\\n", _in, _iz);
58 '''
59
60 def buildMultInst(mnem, doCc, unCc, regs, code, flagType):
61 global header_output, decoder_output, exec_output
62 cCode = carryCode[flagType]
63 vCode = overflowCode[flagType]
64 zType = "uint32_t"
65 negBit = 31
66 if flagType == "llbit":
67 zType = "uint64_t"
68 negBit = 63
69 if flagType == "overflow":
70 ccCode = calcQCode
71 else:
72 ccCode = calcCcCode % {
73 "negBit": negBit,
74 "zType": zType
75 }
76
77 if not regs in (3, 4):
78 raise Exception, "Multiplication instructions with %d " + \
79 "registers are not implemented"
80
81 if regs == 3:
82 base = 'Mult3'
83 else:
84 base = 'Mult4'
85
86 Name = mnem.capitalize()
87
88 if unCc:
89 iop = InstObjParams(mnem, Name, base,
90 {"code" : code,
91 "predicate_test": predicateTest})
91 "predicate_test": predicateTest,
92 "op_class": "IntMultOp" })
92 if doCc:
93 iopCc = InstObjParams(mnem + "s", Name + "Cc", base,
94 {"code" : code + ccCode,
93 if doCc:
94 iopCc = InstObjParams(mnem + "s", Name + "Cc", base,
95 {"code" : code + ccCode,
95 "predicate_test": condPredicateTest})
96 "predicate_test": condPredicateTest,
97 "op_class": "IntMultOp" })
96
97 if regs == 3:
98 declare = Mult3Declare
99 constructor = Mult3Constructor
100 else:
101 declare = Mult4Declare
102 constructor = Mult4Constructor
103
104 if unCc:
105 header_output += declare.subst(iop)
106 decoder_output += constructor.subst(iop)
107 exec_output += PredOpExecute.subst(iop)
108 if doCc:
109 header_output += declare.subst(iopCc)
110 decoder_output += constructor.subst(iopCc)
111 exec_output += PredOpExecute.subst(iopCc)
112
113 def buildMult3Inst(mnem, code, flagType = "logic"):
114 buildMultInst(mnem, True, True, 3, code, flagType)
115
116 def buildMult3InstCc(mnem, code, flagType = "logic"):
117 buildMultInst(mnem, True, False, 3, code, flagType)
118
119 def buildMult3InstUnCc(mnem, code, flagType = "logic"):
120 buildMultInst(mnem, False, True, 3, code, flagType)
121
122 def buildMult4Inst(mnem, code, flagType = "logic"):
123 buildMultInst(mnem, True, True, 4, code, flagType)
124
125 def buildMult4InstCc(mnem, code, flagType = "logic"):
126 buildMultInst(mnem, True, False, 4, code, flagType)
127
128 def buildMult4InstUnCc(mnem, code, flagType = "logic"):
129 buildMultInst(mnem, False, True, 4, code, flagType)
130
131 buildMult4Inst ("mla", "Reg0 = resTemp = Reg1 * Reg2 + Reg3;")
132 buildMult4InstUnCc("mls", "Reg0 = resTemp = Reg3 - Reg1 * Reg2;")
133 buildMult3Inst ("mul", "Reg0 = resTemp = Reg1 * Reg2;")
134 buildMult4InstCc ("smlabb", '''Reg0 = resTemp =
135 sext<16>(bits(Reg1, 15, 0)) *
136 sext<16>(bits(Reg2.sw, 15, 0)) +
137 Reg3.sw;
138 resTemp = bits(resTemp, 32) !=
139 bits(resTemp, 31);
140 ''', "overflow")
141 buildMult4InstCc ("smlabt", '''Reg0 = resTemp =
142 sext<16>(bits(Reg1, 15, 0)) *
143 sext<16>(bits(Reg2.sw, 31, 16)) +
144 Reg3.sw;
145 resTemp = bits(resTemp, 32) !=
146 bits(resTemp, 31);
147 ''', "overflow")
148 buildMult4InstCc ("smlatb", '''Reg0 = resTemp =
149 sext<16>(bits(Reg1, 31, 16)) *
150 sext<16>(bits(Reg2.sw, 15, 0)) +
151 Reg3.sw;
152 resTemp = bits(resTemp, 32) !=
153 bits(resTemp, 31);
154 ''', "overflow")
155 buildMult4InstCc ("smlatt", '''Reg0 = resTemp =
156 sext<16>(bits(Reg1, 31, 16)) *
157 sext<16>(bits(Reg2.sw, 31, 16)) +
158 Reg3.sw;
159 resTemp = bits(resTemp, 32) !=
160 bits(resTemp, 31);
161 ''', "overflow")
162 buildMult4InstCc ("smlad", '''Reg0 = resTemp =
163 sext<16>(bits(Reg1, 31, 16)) *
164 sext<16>(bits(Reg2, 31, 16)) +
165 sext<16>(bits(Reg1, 15, 0)) *
166 sext<16>(bits(Reg2, 15, 0)) +
167 Reg3.sw;
168 resTemp = bits(resTemp, 32) !=
169 bits(resTemp, 31);
170 ''', "overflow")
171 buildMult4InstCc ("smladx", '''Reg0 = resTemp =
172 sext<16>(bits(Reg1, 31, 16)) *
173 sext<16>(bits(Reg2, 15, 0)) +
174 sext<16>(bits(Reg1, 15, 0)) *
175 sext<16>(bits(Reg2, 31, 16)) +
176 Reg3.sw;
177 resTemp = bits(resTemp, 32) !=
178 bits(resTemp, 31);
179 ''', "overflow")
180 buildMult4Inst ("smlal", '''resTemp = sext<32>(Reg2) * sext<32>(Reg3) +
181 (int64_t)((Reg1.ud << 32) | Reg0.ud);
182 Reg0.ud = (uint32_t)resTemp;
183 Reg1.ud = (uint32_t)(resTemp >> 32);
184 ''', "llbit")
185 buildMult4InstUnCc("smlalbb", '''resTemp = sext<16>(bits(Reg2, 15, 0)) *
186 sext<16>(bits(Reg3, 15, 0)) +
187 (int64_t)((Reg1.ud << 32) |
188 Reg0.ud);
189 Reg0.ud = (uint32_t)resTemp;
190 Reg1.ud = (uint32_t)(resTemp >> 32);
191 ''')
192 buildMult4InstUnCc("smlalbt", '''resTemp = sext<16>(bits(Reg2, 15, 0)) *
193 sext<16>(bits(Reg3, 31, 16)) +
194 (int64_t)((Reg1.ud << 32) |
195 Reg0.ud);
196 Reg0.ud = (uint32_t)resTemp;
197 Reg1.ud = (uint32_t)(resTemp >> 32);
198 ''')
199 buildMult4InstUnCc("smlaltb", '''resTemp = sext<16>(bits(Reg2, 31, 16)) *
200 sext<16>(bits(Reg3, 15, 0)) +
201 (int64_t)((Reg1.ud << 32) |
202 Reg0.ud);
203 Reg0.ud = (uint32_t)resTemp;
204 Reg1.ud = (uint32_t)(resTemp >> 32);
205 ''')
206 buildMult4InstUnCc("smlaltt", '''resTemp = sext<16>(bits(Reg2, 31, 16)) *
207 sext<16>(bits(Reg3, 31, 16)) +
208 (int64_t)((Reg1.ud << 32) |
209 Reg0.ud);
210 Reg0.ud = (uint32_t)resTemp;
211 Reg1.ud = (uint32_t)(resTemp >> 32);
212 ''')
213 buildMult4InstUnCc("smlald", '''resTemp =
214 sext<16>(bits(Reg2, 31, 16)) *
215 sext<16>(bits(Reg3, 31, 16)) +
216 sext<16>(bits(Reg2, 15, 0)) *
217 sext<16>(bits(Reg3, 15, 0)) +
218 (int64_t)((Reg1.ud << 32) |
219 Reg0.ud);
220 Reg0.ud = (uint32_t)resTemp;
221 Reg1.ud = (uint32_t)(resTemp >> 32);
222 ''')
223 buildMult4InstUnCc("smlaldx", '''resTemp =
224 sext<16>(bits(Reg2, 31, 16)) *
225 sext<16>(bits(Reg3, 15, 0)) +
226 sext<16>(bits(Reg2, 15, 0)) *
227 sext<16>(bits(Reg3, 31, 16)) +
228 (int64_t)((Reg1.ud << 32) |
229 Reg0.ud);
230 Reg0.ud = (uint32_t)resTemp;
231 Reg1.ud = (uint32_t)(resTemp >> 32);
232 ''')
233 buildMult4InstCc ("smlawb", '''Reg0 = resTemp =
234 (Reg1.sw *
235 sext<16>(bits(Reg2, 15, 0)) +
236 ((int64_t)Reg3.sw << 16)) >> 16;
237 resTemp = bits(resTemp, 32) !=
238 bits(resTemp, 31);
239 ''', "overflow")
240 buildMult4InstCc ("smlawt", '''Reg0 = resTemp =
241 (Reg1.sw *
242 sext<16>(bits(Reg2, 31, 16)) +
243 ((int64_t)Reg3.sw << 16)) >> 16;
244 resTemp = bits(resTemp, 32) !=
245 bits(resTemp, 31);
246 ''', "overflow")
247 buildMult4InstCc ("smlsd", '''Reg0 = resTemp =
248 sext<16>(bits(Reg1, 15, 0)) *
249 sext<16>(bits(Reg2, 15, 0)) -
250 sext<16>(bits(Reg1, 31, 16)) *
251 sext<16>(bits(Reg2, 31, 16)) +
252 Reg3.sw;
253 resTemp = bits(resTemp, 32) !=
254 bits(resTemp, 31);
255 ''', "overflow")
256 buildMult4InstCc ("smlsdx", '''Reg0 = resTemp =
257 sext<16>(bits(Reg1, 15, 0)) *
258 sext<16>(bits(Reg2, 31, 16)) -
259 sext<16>(bits(Reg1, 31, 16)) *
260 sext<16>(bits(Reg2, 15, 0)) +
261 Reg3.sw;
262 resTemp = bits(resTemp, 32) !=
263 bits(resTemp, 31);
264 ''', "overflow")
265 buildMult4InstUnCc("smlsld", '''resTemp =
266 sext<16>(bits(Reg2, 15, 0)) *
267 sext<16>(bits(Reg3, 15, 0)) -
268 sext<16>(bits(Reg2, 31, 16)) *
269 sext<16>(bits(Reg3, 31, 16)) +
270 (int64_t)((Reg1.ud << 32) |
271 Reg0.ud);
272 Reg0.ud = (uint32_t)resTemp;
273 Reg1.ud = (uint32_t)(resTemp >> 32);
274 ''')
275 buildMult4InstUnCc("smlsldx", '''resTemp =
276 sext<16>(bits(Reg2, 15, 0)) *
277 sext<16>(bits(Reg3, 31, 16)) -
278 sext<16>(bits(Reg2, 31, 16)) *
279 sext<16>(bits(Reg3, 15, 0)) +
280 (int64_t)((Reg1.ud << 32) |
281 Reg0.ud);
282 Reg0.ud = (uint32_t)resTemp;
283 Reg1.ud = (uint32_t)(resTemp >> 32);
284 ''')
285 buildMult4InstUnCc("smmla", '''Reg0 = resTemp =
286 ((int64_t)(Reg3.ud << 32) +
287 (int64_t)Reg1.sw *
288 (int64_t)Reg2.sw) >> 32;
289 ''')
290 buildMult4InstUnCc("smmlar", '''Reg0 = resTemp =
291 ((int64_t)(Reg3.ud << 32) +
292 (int64_t)Reg1.sw *
293 (int64_t)Reg2.sw +
294 ULL(0x80000000)) >> 32;
295 ''')
296 buildMult4InstUnCc("smmls", '''Reg0 = resTemp =
297 ((int64_t)(Reg3.ud << 32) -
298 (int64_t)Reg1.sw *
299 (int64_t)Reg2.sw) >> 32;
300 ''')
301 buildMult4InstUnCc("smmlsr", '''Reg0 = resTemp =
302 ((int64_t)(Reg3.ud << 32) -
303 (int64_t)Reg1.sw *
304 (int64_t)Reg2.sw +
305 ULL(0x80000000)) >> 32;
306 ''')
307 buildMult3InstUnCc("smmul", '''Reg0 = resTemp =
308 ((int64_t)Reg1.sw *
309 (int64_t)Reg2.sw) >> 32;
310 ''')
311 buildMult3InstUnCc("smmulr", '''Reg0 = resTemp =
312 ((int64_t)Reg1.sw *
313 (int64_t)Reg2.sw +
314 ULL(0x80000000)) >> 32;
315 ''')
316 buildMult3InstCc ("smuad", '''Reg0 = resTemp =
317 sext<16>(bits(Reg1, 15, 0)) *
318 sext<16>(bits(Reg2, 15, 0)) +
319 sext<16>(bits(Reg1, 31, 16)) *
320 sext<16>(bits(Reg2, 31, 16));
321 resTemp = bits(resTemp, 32) !=
322 bits(resTemp, 31);
323 ''', "overflow")
324 buildMult3InstCc ("smuadx", '''Reg0 = resTemp =
325 sext<16>(bits(Reg1, 15, 0)) *
326 sext<16>(bits(Reg2, 31, 16)) +
327 sext<16>(bits(Reg1, 31, 16)) *
328 sext<16>(bits(Reg2, 15, 0));
329 resTemp = bits(resTemp, 32) !=
330 bits(resTemp, 31);
331 ''', "overflow")
332 buildMult3InstUnCc("smulbb", '''Reg0 = resTemp =
333 sext<16>(bits(Reg1, 15, 0)) *
334 sext<16>(bits(Reg2, 15, 0));
335 ''')
336 buildMult3InstUnCc("smulbt", '''Reg0 = resTemp =
337 sext<16>(bits(Reg1, 15, 0)) *
338 sext<16>(bits(Reg2, 31, 16));
339 ''')
340 buildMult3InstUnCc("smultb", '''Reg0 = resTemp =
341 sext<16>(bits(Reg1, 31, 16)) *
342 sext<16>(bits(Reg2, 15, 0));
343 ''')
344 buildMult3InstUnCc("smultt", '''Reg0 = resTemp =
345 sext<16>(bits(Reg1, 31, 16)) *
346 sext<16>(bits(Reg2, 31, 16));
347 ''')
348 buildMult4Inst ("smull", '''resTemp = (int64_t)Reg2.sw *
349 (int64_t)Reg3.sw;
350 Reg0 = (int32_t)resTemp;
351 Reg1 = (int32_t)(resTemp >> 32);
352 ''', "llbit")
353 buildMult3InstUnCc("smulwb", '''Reg0 = resTemp =
354 (Reg1.sw *
355 sext<16>(bits(Reg2, 15, 0))) >> 16;
356 ''')
357 buildMult3InstUnCc("smulwt", '''Reg0 = resTemp =
358 (Reg1.sw *
359 sext<16>(bits(Reg2, 31, 16))) >> 16;
360 ''')
361 buildMult3InstUnCc("smusd", '''Reg0 = resTemp =
362 sext<16>(bits(Reg1, 15, 0)) *
363 sext<16>(bits(Reg2, 15, 0)) -
364 sext<16>(bits(Reg1, 31, 16)) *
365 sext<16>(bits(Reg2, 31, 16));
366 ''')
367 buildMult3InstUnCc("smusdx", '''Reg0 = resTemp =
368 sext<16>(bits(Reg1, 15, 0)) *
369 sext<16>(bits(Reg2, 31, 16)) -
370 sext<16>(bits(Reg1, 31, 16)) *
371 sext<16>(bits(Reg2, 15, 0));
372 ''')
373 buildMult4InstUnCc("umaal", '''resTemp = Reg2.ud * Reg3.ud +
374 Reg0.ud + Reg1.ud;
375 Reg0.ud = (uint32_t)resTemp;
376 Reg1.ud = (uint32_t)(resTemp >> 32);
377 ''')
378 buildMult4Inst ("umlal", '''resTemp = Reg2.ud * Reg3.ud + Reg0.ud +
379 (Reg1.ud << 32);
380 Reg0.ud = (uint32_t)resTemp;
381 Reg1.ud = (uint32_t)(resTemp >> 32);
382 ''', "llbit")
383 buildMult4Inst ("umull", '''resTemp = Reg2.ud * Reg3.ud;
384 Reg0 = (uint32_t)resTemp;
385 Reg1 = (uint32_t)(resTemp >> 32);
386 ''', "llbit")
387}};
98
99 if regs == 3:
100 declare = Mult3Declare
101 constructor = Mult3Constructor
102 else:
103 declare = Mult4Declare
104 constructor = Mult4Constructor
105
106 if unCc:
107 header_output += declare.subst(iop)
108 decoder_output += constructor.subst(iop)
109 exec_output += PredOpExecute.subst(iop)
110 if doCc:
111 header_output += declare.subst(iopCc)
112 decoder_output += constructor.subst(iopCc)
113 exec_output += PredOpExecute.subst(iopCc)
114
115 def buildMult3Inst(mnem, code, flagType = "logic"):
116 buildMultInst(mnem, True, True, 3, code, flagType)
117
118 def buildMult3InstCc(mnem, code, flagType = "logic"):
119 buildMultInst(mnem, True, False, 3, code, flagType)
120
121 def buildMult3InstUnCc(mnem, code, flagType = "logic"):
122 buildMultInst(mnem, False, True, 3, code, flagType)
123
124 def buildMult4Inst(mnem, code, flagType = "logic"):
125 buildMultInst(mnem, True, True, 4, code, flagType)
126
127 def buildMult4InstCc(mnem, code, flagType = "logic"):
128 buildMultInst(mnem, True, False, 4, code, flagType)
129
130 def buildMult4InstUnCc(mnem, code, flagType = "logic"):
131 buildMultInst(mnem, False, True, 4, code, flagType)
132
133 buildMult4Inst ("mla", "Reg0 = resTemp = Reg1 * Reg2 + Reg3;")
134 buildMult4InstUnCc("mls", "Reg0 = resTemp = Reg3 - Reg1 * Reg2;")
135 buildMult3Inst ("mul", "Reg0 = resTemp = Reg1 * Reg2;")
136 buildMult4InstCc ("smlabb", '''Reg0 = resTemp =
137 sext<16>(bits(Reg1, 15, 0)) *
138 sext<16>(bits(Reg2.sw, 15, 0)) +
139 Reg3.sw;
140 resTemp = bits(resTemp, 32) !=
141 bits(resTemp, 31);
142 ''', "overflow")
143 buildMult4InstCc ("smlabt", '''Reg0 = resTemp =
144 sext<16>(bits(Reg1, 15, 0)) *
145 sext<16>(bits(Reg2.sw, 31, 16)) +
146 Reg3.sw;
147 resTemp = bits(resTemp, 32) !=
148 bits(resTemp, 31);
149 ''', "overflow")
150 buildMult4InstCc ("smlatb", '''Reg0 = resTemp =
151 sext<16>(bits(Reg1, 31, 16)) *
152 sext<16>(bits(Reg2.sw, 15, 0)) +
153 Reg3.sw;
154 resTemp = bits(resTemp, 32) !=
155 bits(resTemp, 31);
156 ''', "overflow")
157 buildMult4InstCc ("smlatt", '''Reg0 = resTemp =
158 sext<16>(bits(Reg1, 31, 16)) *
159 sext<16>(bits(Reg2.sw, 31, 16)) +
160 Reg3.sw;
161 resTemp = bits(resTemp, 32) !=
162 bits(resTemp, 31);
163 ''', "overflow")
164 buildMult4InstCc ("smlad", '''Reg0 = resTemp =
165 sext<16>(bits(Reg1, 31, 16)) *
166 sext<16>(bits(Reg2, 31, 16)) +
167 sext<16>(bits(Reg1, 15, 0)) *
168 sext<16>(bits(Reg2, 15, 0)) +
169 Reg3.sw;
170 resTemp = bits(resTemp, 32) !=
171 bits(resTemp, 31);
172 ''', "overflow")
173 buildMult4InstCc ("smladx", '''Reg0 = resTemp =
174 sext<16>(bits(Reg1, 31, 16)) *
175 sext<16>(bits(Reg2, 15, 0)) +
176 sext<16>(bits(Reg1, 15, 0)) *
177 sext<16>(bits(Reg2, 31, 16)) +
178 Reg3.sw;
179 resTemp = bits(resTemp, 32) !=
180 bits(resTemp, 31);
181 ''', "overflow")
182 buildMult4Inst ("smlal", '''resTemp = sext<32>(Reg2) * sext<32>(Reg3) +
183 (int64_t)((Reg1.ud << 32) | Reg0.ud);
184 Reg0.ud = (uint32_t)resTemp;
185 Reg1.ud = (uint32_t)(resTemp >> 32);
186 ''', "llbit")
187 buildMult4InstUnCc("smlalbb", '''resTemp = sext<16>(bits(Reg2, 15, 0)) *
188 sext<16>(bits(Reg3, 15, 0)) +
189 (int64_t)((Reg1.ud << 32) |
190 Reg0.ud);
191 Reg0.ud = (uint32_t)resTemp;
192 Reg1.ud = (uint32_t)(resTemp >> 32);
193 ''')
194 buildMult4InstUnCc("smlalbt", '''resTemp = sext<16>(bits(Reg2, 15, 0)) *
195 sext<16>(bits(Reg3, 31, 16)) +
196 (int64_t)((Reg1.ud << 32) |
197 Reg0.ud);
198 Reg0.ud = (uint32_t)resTemp;
199 Reg1.ud = (uint32_t)(resTemp >> 32);
200 ''')
201 buildMult4InstUnCc("smlaltb", '''resTemp = sext<16>(bits(Reg2, 31, 16)) *
202 sext<16>(bits(Reg3, 15, 0)) +
203 (int64_t)((Reg1.ud << 32) |
204 Reg0.ud);
205 Reg0.ud = (uint32_t)resTemp;
206 Reg1.ud = (uint32_t)(resTemp >> 32);
207 ''')
208 buildMult4InstUnCc("smlaltt", '''resTemp = sext<16>(bits(Reg2, 31, 16)) *
209 sext<16>(bits(Reg3, 31, 16)) +
210 (int64_t)((Reg1.ud << 32) |
211 Reg0.ud);
212 Reg0.ud = (uint32_t)resTemp;
213 Reg1.ud = (uint32_t)(resTemp >> 32);
214 ''')
215 buildMult4InstUnCc("smlald", '''resTemp =
216 sext<16>(bits(Reg2, 31, 16)) *
217 sext<16>(bits(Reg3, 31, 16)) +
218 sext<16>(bits(Reg2, 15, 0)) *
219 sext<16>(bits(Reg3, 15, 0)) +
220 (int64_t)((Reg1.ud << 32) |
221 Reg0.ud);
222 Reg0.ud = (uint32_t)resTemp;
223 Reg1.ud = (uint32_t)(resTemp >> 32);
224 ''')
225 buildMult4InstUnCc("smlaldx", '''resTemp =
226 sext<16>(bits(Reg2, 31, 16)) *
227 sext<16>(bits(Reg3, 15, 0)) +
228 sext<16>(bits(Reg2, 15, 0)) *
229 sext<16>(bits(Reg3, 31, 16)) +
230 (int64_t)((Reg1.ud << 32) |
231 Reg0.ud);
232 Reg0.ud = (uint32_t)resTemp;
233 Reg1.ud = (uint32_t)(resTemp >> 32);
234 ''')
235 buildMult4InstCc ("smlawb", '''Reg0 = resTemp =
236 (Reg1.sw *
237 sext<16>(bits(Reg2, 15, 0)) +
238 ((int64_t)Reg3.sw << 16)) >> 16;
239 resTemp = bits(resTemp, 32) !=
240 bits(resTemp, 31);
241 ''', "overflow")
242 buildMult4InstCc ("smlawt", '''Reg0 = resTemp =
243 (Reg1.sw *
244 sext<16>(bits(Reg2, 31, 16)) +
245 ((int64_t)Reg3.sw << 16)) >> 16;
246 resTemp = bits(resTemp, 32) !=
247 bits(resTemp, 31);
248 ''', "overflow")
249 buildMult4InstCc ("smlsd", '''Reg0 = resTemp =
250 sext<16>(bits(Reg1, 15, 0)) *
251 sext<16>(bits(Reg2, 15, 0)) -
252 sext<16>(bits(Reg1, 31, 16)) *
253 sext<16>(bits(Reg2, 31, 16)) +
254 Reg3.sw;
255 resTemp = bits(resTemp, 32) !=
256 bits(resTemp, 31);
257 ''', "overflow")
258 buildMult4InstCc ("smlsdx", '''Reg0 = resTemp =
259 sext<16>(bits(Reg1, 15, 0)) *
260 sext<16>(bits(Reg2, 31, 16)) -
261 sext<16>(bits(Reg1, 31, 16)) *
262 sext<16>(bits(Reg2, 15, 0)) +
263 Reg3.sw;
264 resTemp = bits(resTemp, 32) !=
265 bits(resTemp, 31);
266 ''', "overflow")
267 buildMult4InstUnCc("smlsld", '''resTemp =
268 sext<16>(bits(Reg2, 15, 0)) *
269 sext<16>(bits(Reg3, 15, 0)) -
270 sext<16>(bits(Reg2, 31, 16)) *
271 sext<16>(bits(Reg3, 31, 16)) +
272 (int64_t)((Reg1.ud << 32) |
273 Reg0.ud);
274 Reg0.ud = (uint32_t)resTemp;
275 Reg1.ud = (uint32_t)(resTemp >> 32);
276 ''')
277 buildMult4InstUnCc("smlsldx", '''resTemp =
278 sext<16>(bits(Reg2, 15, 0)) *
279 sext<16>(bits(Reg3, 31, 16)) -
280 sext<16>(bits(Reg2, 31, 16)) *
281 sext<16>(bits(Reg3, 15, 0)) +
282 (int64_t)((Reg1.ud << 32) |
283 Reg0.ud);
284 Reg0.ud = (uint32_t)resTemp;
285 Reg1.ud = (uint32_t)(resTemp >> 32);
286 ''')
287 buildMult4InstUnCc("smmla", '''Reg0 = resTemp =
288 ((int64_t)(Reg3.ud << 32) +
289 (int64_t)Reg1.sw *
290 (int64_t)Reg2.sw) >> 32;
291 ''')
292 buildMult4InstUnCc("smmlar", '''Reg0 = resTemp =
293 ((int64_t)(Reg3.ud << 32) +
294 (int64_t)Reg1.sw *
295 (int64_t)Reg2.sw +
296 ULL(0x80000000)) >> 32;
297 ''')
298 buildMult4InstUnCc("smmls", '''Reg0 = resTemp =
299 ((int64_t)(Reg3.ud << 32) -
300 (int64_t)Reg1.sw *
301 (int64_t)Reg2.sw) >> 32;
302 ''')
303 buildMult4InstUnCc("smmlsr", '''Reg0 = resTemp =
304 ((int64_t)(Reg3.ud << 32) -
305 (int64_t)Reg1.sw *
306 (int64_t)Reg2.sw +
307 ULL(0x80000000)) >> 32;
308 ''')
309 buildMult3InstUnCc("smmul", '''Reg0 = resTemp =
310 ((int64_t)Reg1.sw *
311 (int64_t)Reg2.sw) >> 32;
312 ''')
313 buildMult3InstUnCc("smmulr", '''Reg0 = resTemp =
314 ((int64_t)Reg1.sw *
315 (int64_t)Reg2.sw +
316 ULL(0x80000000)) >> 32;
317 ''')
318 buildMult3InstCc ("smuad", '''Reg0 = resTemp =
319 sext<16>(bits(Reg1, 15, 0)) *
320 sext<16>(bits(Reg2, 15, 0)) +
321 sext<16>(bits(Reg1, 31, 16)) *
322 sext<16>(bits(Reg2, 31, 16));
323 resTemp = bits(resTemp, 32) !=
324 bits(resTemp, 31);
325 ''', "overflow")
326 buildMult3InstCc ("smuadx", '''Reg0 = resTemp =
327 sext<16>(bits(Reg1, 15, 0)) *
328 sext<16>(bits(Reg2, 31, 16)) +
329 sext<16>(bits(Reg1, 31, 16)) *
330 sext<16>(bits(Reg2, 15, 0));
331 resTemp = bits(resTemp, 32) !=
332 bits(resTemp, 31);
333 ''', "overflow")
334 buildMult3InstUnCc("smulbb", '''Reg0 = resTemp =
335 sext<16>(bits(Reg1, 15, 0)) *
336 sext<16>(bits(Reg2, 15, 0));
337 ''')
338 buildMult3InstUnCc("smulbt", '''Reg0 = resTemp =
339 sext<16>(bits(Reg1, 15, 0)) *
340 sext<16>(bits(Reg2, 31, 16));
341 ''')
342 buildMult3InstUnCc("smultb", '''Reg0 = resTemp =
343 sext<16>(bits(Reg1, 31, 16)) *
344 sext<16>(bits(Reg2, 15, 0));
345 ''')
346 buildMult3InstUnCc("smultt", '''Reg0 = resTemp =
347 sext<16>(bits(Reg1, 31, 16)) *
348 sext<16>(bits(Reg2, 31, 16));
349 ''')
350 buildMult4Inst ("smull", '''resTemp = (int64_t)Reg2.sw *
351 (int64_t)Reg3.sw;
352 Reg0 = (int32_t)resTemp;
353 Reg1 = (int32_t)(resTemp >> 32);
354 ''', "llbit")
355 buildMult3InstUnCc("smulwb", '''Reg0 = resTemp =
356 (Reg1.sw *
357 sext<16>(bits(Reg2, 15, 0))) >> 16;
358 ''')
359 buildMult3InstUnCc("smulwt", '''Reg0 = resTemp =
360 (Reg1.sw *
361 sext<16>(bits(Reg2, 31, 16))) >> 16;
362 ''')
363 buildMult3InstUnCc("smusd", '''Reg0 = resTemp =
364 sext<16>(bits(Reg1, 15, 0)) *
365 sext<16>(bits(Reg2, 15, 0)) -
366 sext<16>(bits(Reg1, 31, 16)) *
367 sext<16>(bits(Reg2, 31, 16));
368 ''')
369 buildMult3InstUnCc("smusdx", '''Reg0 = resTemp =
370 sext<16>(bits(Reg1, 15, 0)) *
371 sext<16>(bits(Reg2, 31, 16)) -
372 sext<16>(bits(Reg1, 31, 16)) *
373 sext<16>(bits(Reg2, 15, 0));
374 ''')
375 buildMult4InstUnCc("umaal", '''resTemp = Reg2.ud * Reg3.ud +
376 Reg0.ud + Reg1.ud;
377 Reg0.ud = (uint32_t)resTemp;
378 Reg1.ud = (uint32_t)(resTemp >> 32);
379 ''')
380 buildMult4Inst ("umlal", '''resTemp = Reg2.ud * Reg3.ud + Reg0.ud +
381 (Reg1.ud << 32);
382 Reg0.ud = (uint32_t)resTemp;
383 Reg1.ud = (uint32_t)(resTemp >> 32);
384 ''', "llbit")
385 buildMult4Inst ("umull", '''resTemp = Reg2.ud * Reg3.ud;
386 Reg0 = (uint32_t)resTemp;
387 Reg1 = (uint32_t)(resTemp >> 32);
388 ''', "llbit")
389}};