macromem.cc revision 8229
19888Sandreas@sandberg.pp.se/*
29888Sandreas@sandberg.pp.se * Copyright (c) 2010 ARM Limited
39888Sandreas@sandberg.pp.se * All rights reserved
49888Sandreas@sandberg.pp.se *
59888Sandreas@sandberg.pp.se * The license below extends only to copyright in the software and shall
69888Sandreas@sandberg.pp.se * not be construed as granting a license to any other intellectual
79888Sandreas@sandberg.pp.se * property including but not limited to intellectual property relating
89888Sandreas@sandberg.pp.se * to a hardware implementation of the functionality of the software
99888Sandreas@sandberg.pp.se * licensed hereunder.  You may use the software subject to the license
109888Sandreas@sandberg.pp.se * terms below provided that you ensure that this notice is replicated
119888Sandreas@sandberg.pp.se * unmodified and in its entirety in all distributions of the software,
129888Sandreas@sandberg.pp.se * modified or unmodified, in source code or in binary form.
139888Sandreas@sandberg.pp.se *
149888Sandreas@sandberg.pp.se * Copyright (c) 2007-2008 The Florida State University
159888Sandreas@sandberg.pp.se * All rights reserved.
169888Sandreas@sandberg.pp.se *
179888Sandreas@sandberg.pp.se * Redistribution and use in source and binary forms, with or without
189888Sandreas@sandberg.pp.se * modification, are permitted provided that the following conditions are
199888Sandreas@sandberg.pp.se * met: redistributions of source code must retain the above copyright
209888Sandreas@sandberg.pp.se * notice, this list of conditions and the following disclaimer;
219888Sandreas@sandberg.pp.se * redistributions in binary form must reproduce the above copyright
229888Sandreas@sandberg.pp.se * notice, this list of conditions and the following disclaimer in the
239888Sandreas@sandberg.pp.se * documentation and/or other materials provided with the distribution;
249888Sandreas@sandberg.pp.se * neither the name of the copyright holders nor the names of its
259888Sandreas@sandberg.pp.se * contributors may be used to endorse or promote products derived from
269888Sandreas@sandberg.pp.se * this software without specific prior written permission.
279888Sandreas@sandberg.pp.se *
289888Sandreas@sandberg.pp.se * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
299888Sandreas@sandberg.pp.se * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
309888Sandreas@sandberg.pp.se * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
319888Sandreas@sandberg.pp.se * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
329888Sandreas@sandberg.pp.se * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
339888Sandreas@sandberg.pp.se * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
349888Sandreas@sandberg.pp.se * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
359888Sandreas@sandberg.pp.se * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
369888Sandreas@sandberg.pp.se * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
379888Sandreas@sandberg.pp.se * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
389888Sandreas@sandberg.pp.se * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
399888Sandreas@sandberg.pp.se *
409888Sandreas@sandberg.pp.se * Authors: Stephen Hines
419888Sandreas@sandberg.pp.se */
429888Sandreas@sandberg.pp.se
439888Sandreas@sandberg.pp.se#include <sstream>
449888Sandreas@sandberg.pp.se
459888Sandreas@sandberg.pp.se#include "arch/arm/insts/macromem.hh"
469888Sandreas@sandberg.pp.se#include "arch/arm/decoder.hh"
479888Sandreas@sandberg.pp.se
489888Sandreas@sandberg.pp.seusing namespace std;
499888Sandreas@sandberg.pp.seusing namespace ArmISAInst;
509888Sandreas@sandberg.pp.se
519888Sandreas@sandberg.pp.senamespace ArmISA
529888Sandreas@sandberg.pp.se{
539888Sandreas@sandberg.pp.se
549888Sandreas@sandberg.pp.seMacroMemOp::MacroMemOp(const char *mnem, ExtMachInst machInst,
559888Sandreas@sandberg.pp.se                       OpClass __opClass, IntRegIndex rn,
569888Sandreas@sandberg.pp.se                       bool index, bool up, bool user, bool writeback,
579888Sandreas@sandberg.pp.se                       bool load, uint32_t reglist) :
589888Sandreas@sandberg.pp.se    PredMacroOp(mnem, machInst, __opClass)
599888Sandreas@sandberg.pp.se{
609888Sandreas@sandberg.pp.se    uint32_t regs = reglist;
619888Sandreas@sandberg.pp.se    uint32_t ones = number_of_ones(reglist);
629888Sandreas@sandberg.pp.se    // Remember that writeback adds a uop or two and the temp register adds one
639888Sandreas@sandberg.pp.se    numMicroops = ones + (writeback ? (load ? 2 : 1) : 0) + 1;
649888Sandreas@sandberg.pp.se
659888Sandreas@sandberg.pp.se    // It's technically legal to do a lot of nothing
669888Sandreas@sandberg.pp.se    if (!ones)
679888Sandreas@sandberg.pp.se        numMicroops = 1;
689888Sandreas@sandberg.pp.se
699888Sandreas@sandberg.pp.se    microOps = new StaticInstPtr[numMicroops];
709888Sandreas@sandberg.pp.se    uint32_t addr = 0;
719888Sandreas@sandberg.pp.se
729888Sandreas@sandberg.pp.se    if (!up)
739888Sandreas@sandberg.pp.se        addr = (ones << 2) - 4;
749888Sandreas@sandberg.pp.se
759888Sandreas@sandberg.pp.se    if (!index)
769888Sandreas@sandberg.pp.se        addr += 4;
779888Sandreas@sandberg.pp.se
789888Sandreas@sandberg.pp.se    StaticInstPtr *uop = microOps;
799888Sandreas@sandberg.pp.se
809888Sandreas@sandberg.pp.se    // Add 0 to Rn and stick it in ureg0.
819888Sandreas@sandberg.pp.se    // This is equivalent to a move.
829888Sandreas@sandberg.pp.se    *uop = new MicroAddiUop(machInst, INTREG_UREG0, rn, 0);
839888Sandreas@sandberg.pp.se
849888Sandreas@sandberg.pp.se    unsigned reg = 0;
859888Sandreas@sandberg.pp.se    unsigned regIdx = 0;
869888Sandreas@sandberg.pp.se    bool force_user = user & !bits(reglist, 15);
879888Sandreas@sandberg.pp.se    bool exception_ret = user & bits(reglist, 15);
889888Sandreas@sandberg.pp.se
899888Sandreas@sandberg.pp.se    for (int i = 0; i < ones; i++) {
909888Sandreas@sandberg.pp.se        // Find the next register.
919888Sandreas@sandberg.pp.se        while (!bits(regs, reg))
929888Sandreas@sandberg.pp.se            reg++;
939888Sandreas@sandberg.pp.se        replaceBits(regs, reg, 0);
949888Sandreas@sandberg.pp.se
959888Sandreas@sandberg.pp.se        regIdx = reg;
969888Sandreas@sandberg.pp.se        if (force_user) {
979888Sandreas@sandberg.pp.se            regIdx = intRegInMode(MODE_USER, regIdx);
989888Sandreas@sandberg.pp.se        }
999888Sandreas@sandberg.pp.se
1009888Sandreas@sandberg.pp.se        if (load) {
1019888Sandreas@sandberg.pp.se            if (writeback && i == ones - 1) {
1029888Sandreas@sandberg.pp.se                // If it's a writeback and this is the last register
1039888Sandreas@sandberg.pp.se                // do the load into a temporary register which we'll move
1049888Sandreas@sandberg.pp.se                // into the final one later
1059888Sandreas@sandberg.pp.se                *++uop = new MicroLdrUop(machInst, INTREG_UREG1, INTREG_UREG0,
106                        up, addr);
107            } else {
108                // Otherwise just do it normally
109                if (reg == INTREG_PC && exception_ret) {
110                    // This must be the exception return form of ldm.
111                    *++uop = new MicroLdrRetUop(machInst, regIdx,
112                                               INTREG_UREG0, up, addr);
113                } else {
114                    *++uop = new MicroLdrUop(machInst, regIdx,
115                                            INTREG_UREG0, up, addr);
116                }
117            }
118        } else {
119            *++uop = new MicroStrUop(machInst, regIdx, INTREG_UREG0, up, addr);
120        }
121
122        if (up)
123            addr += 4;
124        else
125            addr -= 4;
126    }
127
128    if (writeback && ones) {
129        // put the register update after we're done all loading
130        if (up)
131            *++uop = new MicroAddiUop(machInst, rn, rn, ones * 4);
132        else
133            *++uop = new MicroSubiUop(machInst, rn, rn, ones * 4);
134
135        // If this was a load move the last temporary value into place
136        // this way we can't take an exception after we update the base
137        // register.
138        if (load && reg == INTREG_PC && exception_ret) {
139            *++uop = new MicroUopRegMovRet(machInst, 0, INTREG_UREG1);
140        } else if (load) {
141            *++uop = new MicroUopRegMov(machInst, regIdx, INTREG_UREG1);
142            if (reg == INTREG_PC) {
143                (*uop)->setFlag(StaticInstBase::IsControl);
144                (*uop)->setFlag(StaticInstBase::IsCondControl);
145                (*uop)->setFlag(StaticInstBase::IsIndirectControl);
146                // This is created as a RAS POP
147                if (rn == INTREG_SP)
148                    (*uop)->setFlag(StaticInstBase::IsReturn);
149
150            }
151        }
152    }
153
154    (*uop)->setLastMicroop();
155
156    for (StaticInstPtr *curUop = microOps;
157            !(*curUop)->isLastMicroop(); curUop++) {
158        MicroOp * uopPtr = dynamic_cast<MicroOp *>(curUop->get());
159        assert(uopPtr);
160        uopPtr->setDelayedCommit();
161    }
162}
163
164VldMultOp::VldMultOp(const char *mnem, ExtMachInst machInst, OpClass __opClass,
165                     unsigned elems, RegIndex rn, RegIndex vd, unsigned regs,
166                     unsigned inc, uint32_t size, uint32_t align, RegIndex rm) :
167    PredMacroOp(mnem, machInst, __opClass)
168{
169    assert(regs > 0 && regs <= 4);
170    assert(regs % elems == 0);
171
172    numMicroops = (regs > 2) ? 2 : 1;
173    bool wb = (rm != 15);
174    bool deinterleave = (elems > 1);
175
176    if (wb) numMicroops++;
177    if (deinterleave) numMicroops += (regs / elems);
178    microOps = new StaticInstPtr[numMicroops];
179
180    RegIndex rMid = deinterleave ? NumFloatArchRegs : vd * 2;
181
182    uint32_t noAlign = TLB::MustBeOne;
183
184    unsigned uopIdx = 0;
185    switch (regs) {
186      case 4:
187        microOps[uopIdx++] = newNeonMemInst<MicroLdrNeon16Uop>(
188                size, machInst, rMid, rn, 0, align);
189        microOps[uopIdx++] = newNeonMemInst<MicroLdrNeon16Uop>(
190                size, machInst, rMid + 4, rn, 16, noAlign);
191        break;
192      case 3:
193        microOps[uopIdx++] = newNeonMemInst<MicroLdrNeon16Uop>(
194                size, machInst, rMid, rn, 0, align);
195        microOps[uopIdx++] = newNeonMemInst<MicroLdrNeon8Uop>(
196                size, machInst, rMid + 4, rn, 16, noAlign);
197        break;
198      case 2:
199        microOps[uopIdx++] = newNeonMemInst<MicroLdrNeon16Uop>(
200                size, machInst, rMid, rn, 0, align);
201        break;
202      case 1:
203        microOps[uopIdx++] = newNeonMemInst<MicroLdrNeon8Uop>(
204                size, machInst, rMid, rn, 0, align);
205        break;
206      default:
207        // Unknown number of registers
208        microOps[uopIdx++] = new Unknown(machInst);
209    }
210    if (wb) {
211        if (rm != 15 && rm != 13) {
212            microOps[uopIdx++] =
213                new MicroAddUop(machInst, rn, rn, rm, 0, ArmISA::LSL);
214        } else {
215            microOps[uopIdx++] =
216                new MicroAddiUop(machInst, rn, rn, regs * 8);
217        }
218    }
219    if (deinterleave) {
220        switch (elems) {
221          case 4:
222            assert(regs == 4);
223            microOps[uopIdx++] = newNeonMixInst<MicroDeintNeon8Uop>(
224                    size, machInst, vd * 2, rMid, inc * 2);
225            break;
226          case 3:
227            assert(regs == 3);
228            microOps[uopIdx++] = newNeonMixInst<MicroDeintNeon6Uop>(
229                    size, machInst, vd * 2, rMid, inc * 2);
230            break;
231          case 2:
232            assert(regs == 4 || regs == 2);
233            if (regs == 4) {
234                microOps[uopIdx++] = newNeonMixInst<MicroDeintNeon4Uop>(
235                        size, machInst, vd * 2, rMid, inc * 2);
236                microOps[uopIdx++] = newNeonMixInst<MicroDeintNeon4Uop>(
237                        size, machInst, vd * 2 + 2, rMid + 4, inc * 2);
238            } else {
239                microOps[uopIdx++] = newNeonMixInst<MicroDeintNeon4Uop>(
240                        size, machInst, vd * 2, rMid, inc * 2);
241            }
242            break;
243          default:
244            // Bad number of elements to deinterleave
245            microOps[uopIdx++] = new Unknown(machInst);
246        }
247    }
248    assert(uopIdx == numMicroops);
249
250    for (unsigned i = 0; i < numMicroops - 1; i++) {
251        MicroOp * uopPtr = dynamic_cast<MicroOp *>(microOps[i].get());
252        assert(uopPtr);
253        uopPtr->setDelayedCommit();
254    }
255    microOps[numMicroops - 1]->setLastMicroop();
256}
257
258VldSingleOp::VldSingleOp(const char *mnem, ExtMachInst machInst,
259                         OpClass __opClass, bool all, unsigned elems,
260                         RegIndex rn, RegIndex vd, unsigned regs,
261                         unsigned inc, uint32_t size, uint32_t align,
262                         RegIndex rm, unsigned lane) :
263    PredMacroOp(mnem, machInst, __opClass)
264{
265    assert(regs > 0 && regs <= 4);
266    assert(regs % elems == 0);
267
268    unsigned eBytes = (1 << size);
269    unsigned loadSize = eBytes * elems;
270    unsigned loadRegs M5_VAR_USED = (loadSize + sizeof(FloatRegBits) - 1) /
271                        sizeof(FloatRegBits);
272
273    assert(loadRegs > 0 && loadRegs <= 4);
274
275    numMicroops = 1;
276    bool wb = (rm != 15);
277
278    if (wb) numMicroops++;
279    numMicroops += (regs / elems);
280    microOps = new StaticInstPtr[numMicroops];
281
282    RegIndex ufp0 = NumFloatArchRegs;
283
284    unsigned uopIdx = 0;
285    switch (loadSize) {
286      case 1:
287        microOps[uopIdx++] = new MicroLdrNeon1Uop<uint8_t>(
288                machInst, ufp0, rn, 0, align);
289        break;
290      case 2:
291        if (eBytes == 2) {
292            microOps[uopIdx++] = new MicroLdrNeon2Uop<uint16_t>(
293                    machInst, ufp0, rn, 0, align);
294        } else {
295            microOps[uopIdx++] = new MicroLdrNeon2Uop<uint8_t>(
296                    machInst, ufp0, rn, 0, align);
297        }
298        break;
299      case 3:
300        microOps[uopIdx++] = new MicroLdrNeon3Uop<uint8_t>(
301                machInst, ufp0, rn, 0, align);
302        break;
303      case 4:
304        switch (eBytes) {
305          case 1:
306            microOps[uopIdx++] = new MicroLdrNeon4Uop<uint8_t>(
307                    machInst, ufp0, rn, 0, align);
308            break;
309          case 2:
310            microOps[uopIdx++] = new MicroLdrNeon4Uop<uint16_t>(
311                    machInst, ufp0, rn, 0, align);
312            break;
313          case 4:
314            microOps[uopIdx++] = new MicroLdrNeon4Uop<uint32_t>(
315                    machInst, ufp0, rn, 0, align);
316            break;
317        }
318        break;
319      case 6:
320        microOps[uopIdx++] = new MicroLdrNeon6Uop<uint16_t>(
321                machInst, ufp0, rn, 0, align);
322        break;
323      case 8:
324        switch (eBytes) {
325          case 2:
326            microOps[uopIdx++] = new MicroLdrNeon8Uop<uint16_t>(
327                    machInst, ufp0, rn, 0, align);
328            break;
329          case 4:
330            microOps[uopIdx++] = new MicroLdrNeon8Uop<uint32_t>(
331                    machInst, ufp0, rn, 0, align);
332            break;
333        }
334        break;
335      case 12:
336        microOps[uopIdx++] = new MicroLdrNeon12Uop<uint32_t>(
337                machInst, ufp0, rn, 0, align);
338        break;
339      case 16:
340        microOps[uopIdx++] = new MicroLdrNeon16Uop<uint32_t>(
341                machInst, ufp0, rn, 0, align);
342        break;
343      default:
344        // Unrecognized load size
345        microOps[uopIdx++] = new Unknown(machInst);
346    }
347    if (wb) {
348        if (rm != 15 && rm != 13) {
349            microOps[uopIdx++] =
350                new MicroAddUop(machInst, rn, rn, rm, 0, ArmISA::LSL);
351        } else {
352            microOps[uopIdx++] =
353                new MicroAddiUop(machInst, rn, rn, loadSize);
354        }
355    }
356    switch (elems) {
357      case 4:
358        assert(regs == 4);
359        switch (size) {
360          case 0:
361            if (all) {
362                microOps[uopIdx++] = new MicroUnpackAllNeon2to8Uop<uint8_t>(
363                        machInst, vd * 2, ufp0, inc * 2);
364            } else {
365                microOps[uopIdx++] = new MicroUnpackNeon2to8Uop<uint8_t>(
366                        machInst, vd * 2, ufp0, inc * 2, lane);
367            }
368            break;
369          case 1:
370            if (all) {
371                microOps[uopIdx++] = new MicroUnpackAllNeon2to8Uop<uint16_t>(
372                        machInst, vd * 2, ufp0, inc * 2);
373            } else {
374                microOps[uopIdx++] = new MicroUnpackNeon2to8Uop<uint16_t>(
375                        machInst, vd * 2, ufp0, inc * 2, lane);
376            }
377            break;
378          case 2:
379            if (all) {
380                microOps[uopIdx++] = new MicroUnpackAllNeon4to8Uop<uint32_t>(
381                        machInst, vd * 2, ufp0, inc * 2);
382            } else {
383                microOps[uopIdx++] = new MicroUnpackNeon4to8Uop<uint32_t>(
384                        machInst, vd * 2, ufp0, inc * 2, lane);
385            }
386            break;
387          default:
388            // Bad size
389            microOps[uopIdx++] = new Unknown(machInst);
390            break;
391        }
392        break;
393      case 3:
394        assert(regs == 3);
395        switch (size) {
396          case 0:
397            if (all) {
398                microOps[uopIdx++] = new MicroUnpackAllNeon2to6Uop<uint8_t>(
399                        machInst, vd * 2, ufp0, inc * 2);
400            } else {
401                microOps[uopIdx++] = new MicroUnpackNeon2to6Uop<uint8_t>(
402                        machInst, vd * 2, ufp0, inc * 2, lane);
403            }
404            break;
405          case 1:
406            if (all) {
407                microOps[uopIdx++] = new MicroUnpackAllNeon2to6Uop<uint16_t>(
408                        machInst, vd * 2, ufp0, inc * 2);
409            } else {
410                microOps[uopIdx++] = new MicroUnpackNeon2to6Uop<uint16_t>(
411                        machInst, vd * 2, ufp0, inc * 2, lane);
412            }
413            break;
414          case 2:
415            if (all) {
416                microOps[uopIdx++] = new MicroUnpackAllNeon4to6Uop<uint32_t>(
417                        machInst, vd * 2, ufp0, inc * 2);
418            } else {
419                microOps[uopIdx++] = new MicroUnpackNeon4to6Uop<uint32_t>(
420                        machInst, vd * 2, ufp0, inc * 2, lane);
421            }
422            break;
423          default:
424            // Bad size
425            microOps[uopIdx++] = new Unknown(machInst);
426            break;
427        }
428        break;
429      case 2:
430        assert(regs == 2);
431        assert(loadRegs <= 2);
432        switch (size) {
433          case 0:
434            if (all) {
435                microOps[uopIdx++] = new MicroUnpackAllNeon2to4Uop<uint8_t>(
436                        machInst, vd * 2, ufp0, inc * 2);
437            } else {
438                microOps[uopIdx++] = new MicroUnpackNeon2to4Uop<uint8_t>(
439                        machInst, vd * 2, ufp0, inc * 2, lane);
440            }
441            break;
442          case 1:
443            if (all) {
444                microOps[uopIdx++] = new MicroUnpackAllNeon2to4Uop<uint16_t>(
445                        machInst, vd * 2, ufp0, inc * 2);
446            } else {
447                microOps[uopIdx++] = new MicroUnpackNeon2to4Uop<uint16_t>(
448                        machInst, vd * 2, ufp0, inc * 2, lane);
449            }
450            break;
451          case 2:
452            if (all) {
453                microOps[uopIdx++] = new MicroUnpackAllNeon2to4Uop<uint32_t>(
454                        machInst, vd * 2, ufp0, inc * 2);
455            } else {
456                microOps[uopIdx++] = new MicroUnpackNeon2to4Uop<uint32_t>(
457                        machInst, vd * 2, ufp0, inc * 2, lane);
458            }
459            break;
460          default:
461            // Bad size
462            microOps[uopIdx++] = new Unknown(machInst);
463            break;
464        }
465        break;
466      case 1:
467        assert(regs == 1 || (all && regs == 2));
468        assert(loadRegs <= 2);
469        for (unsigned offset = 0; offset < regs; offset++) {
470            switch (size) {
471              case 0:
472                if (all) {
473                    microOps[uopIdx++] =
474                        new MicroUnpackAllNeon2to2Uop<uint8_t>(
475                            machInst, (vd + offset) * 2, ufp0, inc * 2);
476                } else {
477                    microOps[uopIdx++] =
478                        new MicroUnpackNeon2to2Uop<uint8_t>(
479                            machInst, (vd + offset) * 2, ufp0, inc * 2, lane);
480                }
481                break;
482              case 1:
483                if (all) {
484                    microOps[uopIdx++] =
485                        new MicroUnpackAllNeon2to2Uop<uint16_t>(
486                            machInst, (vd + offset) * 2, ufp0, inc * 2);
487                } else {
488                    microOps[uopIdx++] =
489                        new MicroUnpackNeon2to2Uop<uint16_t>(
490                            machInst, (vd + offset) * 2, ufp0, inc * 2, lane);
491                }
492                break;
493              case 2:
494                if (all) {
495                    microOps[uopIdx++] =
496                        new MicroUnpackAllNeon2to2Uop<uint32_t>(
497                            machInst, (vd + offset) * 2, ufp0, inc * 2);
498                } else {
499                    microOps[uopIdx++] =
500                        new MicroUnpackNeon2to2Uop<uint32_t>(
501                            machInst, (vd + offset) * 2, ufp0, inc * 2, lane);
502                }
503                break;
504              default:
505                // Bad size
506                microOps[uopIdx++] = new Unknown(machInst);
507                break;
508            }
509        }
510        break;
511      default:
512        // Bad number of elements to unpack
513        microOps[uopIdx++] = new Unknown(machInst);
514    }
515    assert(uopIdx == numMicroops);
516
517    for (unsigned i = 0; i < numMicroops - 1; i++) {
518        MicroOp * uopPtr = dynamic_cast<MicroOp *>(microOps[i].get());
519        assert(uopPtr);
520        uopPtr->setDelayedCommit();
521    }
522    microOps[numMicroops - 1]->setLastMicroop();
523}
524
525VstMultOp::VstMultOp(const char *mnem, ExtMachInst machInst, OpClass __opClass,
526                     unsigned elems, RegIndex rn, RegIndex vd, unsigned regs,
527                     unsigned inc, uint32_t size, uint32_t align, RegIndex rm) :
528    PredMacroOp(mnem, machInst, __opClass)
529{
530    assert(regs > 0 && regs <= 4);
531    assert(regs % elems == 0);
532
533    numMicroops = (regs > 2) ? 2 : 1;
534    bool wb = (rm != 15);
535    bool interleave = (elems > 1);
536
537    if (wb) numMicroops++;
538    if (interleave) numMicroops += (regs / elems);
539    microOps = new StaticInstPtr[numMicroops];
540
541    uint32_t noAlign = TLB::MustBeOne;
542
543    RegIndex rMid = interleave ? NumFloatArchRegs : vd * 2;
544
545    unsigned uopIdx = 0;
546    if (interleave) {
547        switch (elems) {
548          case 4:
549            assert(regs == 4);
550            microOps[uopIdx++] = newNeonMixInst<MicroInterNeon8Uop>(
551                    size, machInst, rMid, vd * 2, inc * 2);
552            break;
553          case 3:
554            assert(regs == 3);
555            microOps[uopIdx++] = newNeonMixInst<MicroInterNeon6Uop>(
556                    size, machInst, rMid, vd * 2, inc * 2);
557            break;
558          case 2:
559            assert(regs == 4 || regs == 2);
560            if (regs == 4) {
561                microOps[uopIdx++] = newNeonMixInst<MicroInterNeon4Uop>(
562                        size, machInst, rMid, vd * 2, inc * 2);
563                microOps[uopIdx++] = newNeonMixInst<MicroInterNeon4Uop>(
564                        size, machInst, rMid + 4, vd * 2 + 2, inc * 2);
565            } else {
566                microOps[uopIdx++] = newNeonMixInst<MicroInterNeon4Uop>(
567                        size, machInst, rMid, vd * 2, inc * 2);
568            }
569            break;
570          default:
571            // Bad number of elements to interleave
572            microOps[uopIdx++] = new Unknown(machInst);
573        }
574    }
575    switch (regs) {
576      case 4:
577        microOps[uopIdx++] = newNeonMemInst<MicroStrNeon16Uop>(
578                size, machInst, rMid, rn, 0, align);
579        microOps[uopIdx++] = newNeonMemInst<MicroStrNeon16Uop>(
580                size, machInst, rMid + 4, rn, 16, noAlign);
581        break;
582      case 3:
583        microOps[uopIdx++] = newNeonMemInst<MicroStrNeon16Uop>(
584                size, machInst, rMid, rn, 0, align);
585        microOps[uopIdx++] = newNeonMemInst<MicroStrNeon8Uop>(
586                size, machInst, rMid + 4, rn, 16, noAlign);
587        break;
588      case 2:
589        microOps[uopIdx++] = newNeonMemInst<MicroStrNeon16Uop>(
590                size, machInst, rMid, rn, 0, align);
591        break;
592      case 1:
593        microOps[uopIdx++] = newNeonMemInst<MicroStrNeon8Uop>(
594                size, machInst, rMid, rn, 0, align);
595        break;
596      default:
597        // Unknown number of registers
598        microOps[uopIdx++] = new Unknown(machInst);
599    }
600    if (wb) {
601        if (rm != 15 && rm != 13) {
602            microOps[uopIdx++] =
603                new MicroAddUop(machInst, rn, rn, rm, 0, ArmISA::LSL);
604        } else {
605            microOps[uopIdx++] =
606                new MicroAddiUop(machInst, rn, rn, regs * 8);
607        }
608    }
609    assert(uopIdx == numMicroops);
610
611    for (unsigned i = 0; i < numMicroops - 1; i++) {
612        MicroOp * uopPtr = dynamic_cast<MicroOp *>(microOps[i].get());
613        assert(uopPtr);
614        uopPtr->setDelayedCommit();
615    }
616    microOps[numMicroops - 1]->setLastMicroop();
617}
618
619VstSingleOp::VstSingleOp(const char *mnem, ExtMachInst machInst,
620                         OpClass __opClass, bool all, unsigned elems,
621                         RegIndex rn, RegIndex vd, unsigned regs,
622                         unsigned inc, uint32_t size, uint32_t align,
623                         RegIndex rm, unsigned lane) :
624    PredMacroOp(mnem, machInst, __opClass)
625{
626    assert(!all);
627    assert(regs > 0 && regs <= 4);
628    assert(regs % elems == 0);
629
630    unsigned eBytes = (1 << size);
631    unsigned storeSize = eBytes * elems;
632    unsigned storeRegs M5_VAR_USED = (storeSize + sizeof(FloatRegBits) - 1) /
633                         sizeof(FloatRegBits);
634
635    assert(storeRegs > 0 && storeRegs <= 4);
636
637    numMicroops = 1;
638    bool wb = (rm != 15);
639
640    if (wb) numMicroops++;
641    numMicroops += (regs / elems);
642    microOps = new StaticInstPtr[numMicroops];
643
644    RegIndex ufp0 = NumFloatArchRegs;
645
646    unsigned uopIdx = 0;
647    switch (elems) {
648      case 4:
649        assert(regs == 4);
650        switch (size) {
651          case 0:
652            microOps[uopIdx++] = new MicroPackNeon8to2Uop<uint8_t>(
653                    machInst, ufp0, vd * 2, inc * 2, lane);
654            break;
655          case 1:
656            microOps[uopIdx++] = new MicroPackNeon8to2Uop<uint16_t>(
657                    machInst, ufp0, vd * 2, inc * 2, lane);
658            break;
659          case 2:
660            microOps[uopIdx++] = new MicroPackNeon8to4Uop<uint32_t>(
661                    machInst, ufp0, vd * 2, inc * 2, lane);
662            break;
663          default:
664            // Bad size
665            microOps[uopIdx++] = new Unknown(machInst);
666            break;
667        }
668        break;
669      case 3:
670        assert(regs == 3);
671        switch (size) {
672          case 0:
673            microOps[uopIdx++] = new MicroPackNeon6to2Uop<uint8_t>(
674                    machInst, ufp0, vd * 2, inc * 2, lane);
675            break;
676          case 1:
677            microOps[uopIdx++] = new MicroPackNeon6to2Uop<uint16_t>(
678                    machInst, ufp0, vd * 2, inc * 2, lane);
679            break;
680          case 2:
681            microOps[uopIdx++] = new MicroPackNeon6to4Uop<uint32_t>(
682                    machInst, ufp0, vd * 2, inc * 2, lane);
683            break;
684          default:
685            // Bad size
686            microOps[uopIdx++] = new Unknown(machInst);
687            break;
688        }
689        break;
690      case 2:
691        assert(regs == 2);
692        assert(storeRegs <= 2);
693        switch (size) {
694          case 0:
695            microOps[uopIdx++] = new MicroPackNeon4to2Uop<uint8_t>(
696                    machInst, ufp0, vd * 2, inc * 2, lane);
697            break;
698          case 1:
699            microOps[uopIdx++] = new MicroPackNeon4to2Uop<uint16_t>(
700                    machInst, ufp0, vd * 2, inc * 2, lane);
701            break;
702          case 2:
703            microOps[uopIdx++] = new MicroPackNeon4to2Uop<uint32_t>(
704                    machInst, ufp0, vd * 2, inc * 2, lane);
705            break;
706          default:
707            // Bad size
708            microOps[uopIdx++] = new Unknown(machInst);
709            break;
710        }
711        break;
712      case 1:
713        assert(regs == 1 || (all && regs == 2));
714        assert(storeRegs <= 2);
715        for (unsigned offset = 0; offset < regs; offset++) {
716            switch (size) {
717              case 0:
718                microOps[uopIdx++] = new MicroPackNeon2to2Uop<uint8_t>(
719                        machInst, ufp0, (vd + offset) * 2, inc * 2, lane);
720                break;
721              case 1:
722                microOps[uopIdx++] = new MicroPackNeon2to2Uop<uint16_t>(
723                        machInst, ufp0, (vd + offset) * 2, inc * 2, lane);
724                break;
725              case 2:
726                microOps[uopIdx++] = new MicroPackNeon2to2Uop<uint32_t>(
727                        machInst, ufp0, (vd + offset) * 2, inc * 2, lane);
728                break;
729              default:
730                // Bad size
731                microOps[uopIdx++] = new Unknown(machInst);
732                break;
733            }
734        }
735        break;
736      default:
737        // Bad number of elements to unpack
738        microOps[uopIdx++] = new Unknown(machInst);
739    }
740    switch (storeSize) {
741      case 1:
742        microOps[uopIdx++] = new MicroStrNeon1Uop<uint8_t>(
743                machInst, ufp0, rn, 0, align);
744        break;
745      case 2:
746        if (eBytes == 2) {
747            microOps[uopIdx++] = new MicroStrNeon2Uop<uint16_t>(
748                    machInst, ufp0, rn, 0, align);
749        } else {
750            microOps[uopIdx++] = new MicroStrNeon2Uop<uint8_t>(
751                    machInst, ufp0, rn, 0, align);
752        }
753        break;
754      case 3:
755        microOps[uopIdx++] = new MicroStrNeon3Uop<uint8_t>(
756                machInst, ufp0, rn, 0, align);
757        break;
758      case 4:
759        switch (eBytes) {
760          case 1:
761            microOps[uopIdx++] = new MicroStrNeon4Uop<uint8_t>(
762                    machInst, ufp0, rn, 0, align);
763            break;
764          case 2:
765            microOps[uopIdx++] = new MicroStrNeon4Uop<uint16_t>(
766                    machInst, ufp0, rn, 0, align);
767            break;
768          case 4:
769            microOps[uopIdx++] = new MicroStrNeon4Uop<uint32_t>(
770                    machInst, ufp0, rn, 0, align);
771            break;
772        }
773        break;
774      case 6:
775        microOps[uopIdx++] = new MicroStrNeon6Uop<uint16_t>(
776                machInst, ufp0, rn, 0, align);
777        break;
778      case 8:
779        switch (eBytes) {
780          case 2:
781            microOps[uopIdx++] = new MicroStrNeon8Uop<uint16_t>(
782                    machInst, ufp0, rn, 0, align);
783            break;
784          case 4:
785            microOps[uopIdx++] = new MicroStrNeon8Uop<uint32_t>(
786                    machInst, ufp0, rn, 0, align);
787            break;
788        }
789        break;
790      case 12:
791        microOps[uopIdx++] = new MicroStrNeon12Uop<uint32_t>(
792                machInst, ufp0, rn, 0, align);
793        break;
794      case 16:
795        microOps[uopIdx++] = new MicroStrNeon16Uop<uint32_t>(
796                machInst, ufp0, rn, 0, align);
797        break;
798      default:
799        // Bad store size
800        microOps[uopIdx++] = new Unknown(machInst);
801    }
802    if (wb) {
803        if (rm != 15 && rm != 13) {
804            microOps[uopIdx++] =
805                new MicroAddUop(machInst, rn, rn, rm, 0, ArmISA::LSL);
806        } else {
807            microOps[uopIdx++] =
808                new MicroAddiUop(machInst, rn, rn, storeSize);
809        }
810    }
811    assert(uopIdx == numMicroops);
812
813    for (unsigned i = 0; i < numMicroops - 1; i++) {
814        MicroOp * uopPtr = dynamic_cast<MicroOp *>(microOps[i].get());
815        assert(uopPtr);
816        uopPtr->setDelayedCommit();
817    }
818    microOps[numMicroops - 1]->setLastMicroop();
819}
820
821MacroVFPMemOp::MacroVFPMemOp(const char *mnem, ExtMachInst machInst,
822                             OpClass __opClass, IntRegIndex rn,
823                             RegIndex vd, bool single, bool up,
824                             bool writeback, bool load, uint32_t offset) :
825    PredMacroOp(mnem, machInst, __opClass)
826{
827    int i = 0;
828
829    // The lowest order bit selects fldmx (set) or fldmd (clear). These seem
830    // to be functionally identical except that fldmx is deprecated. For now
831    // we'll assume they're otherwise interchangable.
832    int count = (single ? offset : (offset / 2));
833    if (count == 0 || count > NumFloatArchRegs)
834        warn_once("Bad offset field for VFP load/store multiple.\n");
835    if (count == 0) {
836        // Force there to be at least one microop so the macroop makes sense.
837        writeback = true;
838    }
839    if (count > NumFloatArchRegs)
840        count = NumFloatArchRegs;
841
842    numMicroops = count * (single ? 1 : 2) + (writeback ? 1 : 0);
843    microOps = new StaticInstPtr[numMicroops];
844
845    int64_t addr = 0;
846
847    if (!up)
848        addr = 4 * offset;
849
850    bool tempUp = up;
851    for (int j = 0; j < count; j++) {
852        if (load) {
853            if (single) {
854                microOps[i++] = new MicroLdrFpUop(machInst, vd++, rn,
855                                                  tempUp, addr);
856            } else {
857                microOps[i++] = new MicroLdrDBFpUop(machInst, vd++, rn,
858                                                    tempUp, addr);
859                microOps[i++] = new MicroLdrDTFpUop(machInst, vd++, rn, tempUp,
860                                                    addr + (up ? 4 : -4));
861            }
862        } else {
863            if (single) {
864                microOps[i++] = new MicroStrFpUop(machInst, vd++, rn,
865                                                  tempUp, addr);
866            } else {
867                microOps[i++] = new MicroStrDBFpUop(machInst, vd++, rn,
868                                                    tempUp, addr);
869                microOps[i++] = new MicroStrDTFpUop(machInst, vd++, rn, tempUp,
870                                                    addr + (up ? 4 : -4));
871            }
872        }
873        if (!tempUp) {
874            addr -= (single ? 4 : 8);
875            // The microops don't handle negative displacement, so turn if we
876            // hit zero, flip polarity and start adding.
877            if (addr <= 0) {
878                tempUp = true;
879                addr = -addr;
880            }
881        } else {
882            addr += (single ? 4 : 8);
883        }
884    }
885
886    if (writeback) {
887        if (up) {
888            microOps[i++] =
889                new MicroAddiUop(machInst, rn, rn, 4 * offset);
890        } else {
891            microOps[i++] =
892                new MicroSubiUop(machInst, rn, rn, 4 * offset);
893        }
894    }
895
896    assert(numMicroops == i);
897    microOps[numMicroops - 1]->setLastMicroop();
898
899    for (StaticInstPtr *curUop = microOps;
900            !(*curUop)->isLastMicroop(); curUop++) {
901        MicroOp * uopPtr = dynamic_cast<MicroOp *>(curUop->get());
902        assert(uopPtr);
903        uopPtr->setDelayedCommit();
904    }
905}
906
907std::string
908MicroIntImmOp::generateDisassembly(Addr pc, const SymbolTable *symtab) const
909{
910    std::stringstream ss;
911    printMnemonic(ss);
912    printReg(ss, ura);
913    ss << ", ";
914    printReg(ss, urb);
915    ss << ", ";
916    ccprintf(ss, "#%d", imm);
917    return ss.str();
918}
919
920std::string
921MicroSetPCCPSR::generateDisassembly(Addr pc, const SymbolTable *symtab) const
922{
923    std::stringstream ss;
924    printMnemonic(ss);
925    ss << "[PC,CPSR]";
926    return ss.str();
927}
928
929std::string
930MicroIntMov::generateDisassembly(Addr pc, const SymbolTable *symtab) const
931{
932    std::stringstream ss;
933    printMnemonic(ss);
934    printReg(ss, ura);
935    ss << ", ";
936    printReg(ss, urb);
937    return ss.str();
938}
939
940std::string
941MicroIntOp::generateDisassembly(Addr pc, const SymbolTable *symtab) const
942{
943    std::stringstream ss;
944    printMnemonic(ss);
945    printReg(ss, ura);
946    ss << ", ";
947    printReg(ss, urb);
948    ss << ", ";
949    printReg(ss, urc);
950    return ss.str();
951}
952
953std::string
954MicroMemOp::generateDisassembly(Addr pc, const SymbolTable *symtab) const
955{
956    std::stringstream ss;
957    printMnemonic(ss);
958    printReg(ss, ura);
959    ss << ", [";
960    printReg(ss, urb);
961    ss << ", ";
962    ccprintf(ss, "#%d", imm);
963    ss << "]";
964    return ss.str();
965}
966
967}
968