pseudo_inst.cc revision 11639:2e8d4bd8108d
1/*
2 * Copyright (c) 2013-2015 Advanced Micro Devices, Inc.
3 * All rights reserved.
4 *
5 * For use for simulation and test purposes only
6 *
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions are met:
9 *
10 * 1. Redistributions of source code must retain the above copyright notice,
11 * this list of conditions and the following disclaimer.
12 *
13 * 2. Redistributions in binary form must reproduce the above copyright notice,
14 * this list of conditions and the following disclaimer in the documentation
15 * and/or other materials provided with the distribution.
16 *
17 * 3. Neither the name of the copyright holder nor the names of its contributors
18 * may be used to endorse or promote products derived from this software
19 * without specific prior written permission.
20 *
21 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
22 * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
23 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
24 * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
25 * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
26 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
27 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
28 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
29 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
30 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
31 * POSSIBILITY OF SUCH DAMAGE.
32 *
33 * Author: Marc Orr
34 */
35
36#include <csignal>
37
38#include "arch/hsail/insts/decl.hh"
39#include "arch/hsail/insts/mem.hh"
40
41namespace HsailISA
42{
43    // Pseudo (or magic) instructions are overloaded on the hsail call
44    // instruction, because of its flexible parameter signature.
45
46    // To add a new magic instruction:
47    // 1. Add an entry to the enum.
48    // 2. Implement it in the switch statement below (Call::exec).
49    // 3. Add a utility function to hsa/hsail-gpu-compute/util/magicinst.h,
50    //    so its easy to call from an OpenCL kernel.
51
52    // This enum should be identical to the enum in
53    // hsa/hsail-gpu-compute/util/magicinst.h
54    enum
55    {
56        MAGIC_PRINT_WF_32 = 0,
57        MAGIC_PRINT_WF_64,
58        MAGIC_PRINT_LANE,
59        MAGIC_PRINT_LANE_64,
60        MAGIC_PRINT_WF_FLOAT,
61        MAGIC_SIM_BREAK,
62        MAGIC_PREF_SUM,
63        MAGIC_REDUCTION,
64        MAGIC_MASKLANE_LOWER,
65        MAGIC_MASKLANE_UPPER,
66        MAGIC_JOIN_WF_BAR,
67        MAGIC_WAIT_WF_BAR,
68        MAGIC_PANIC,
69        MAGIC_ATOMIC_NR_ADD_GLOBAL_U32_REG,
70        MAGIC_ATOMIC_NR_ADD_GROUP_U32_REG,
71        MAGIC_LOAD_GLOBAL_U32_REG,
72        MAGIC_XACT_CAS_LD,
73        MAGIC_MOST_SIG_THD,
74        MAGIC_MOST_SIG_BROADCAST,
75        MAGIC_PRINT_WFID_32,
76        MAGIC_PRINT_WFID_64
77    };
78
79    void
80    Call::execPseudoInst(Wavefront *w, GPUDynInstPtr gpuDynInst)
81    {
82        const VectorMask &mask = w->getPred();
83
84        int op = 0;
85        bool got_op = false;
86
87        for (int lane = 0; lane < w->computeUnit->wfSize(); ++lane) {
88            if (mask[lane]) {
89                int src_val0 = src1.get<int>(w, lane, 0);
90                if (got_op) {
91                    if (src_val0 != op) {
92                        fatal("Multiple magic instructions per PC not "
93                              "supported\n");
94                    }
95                } else {
96                    op = src_val0;
97                    got_op = true;
98                }
99            }
100        }
101
102        switch(op) {
103          case MAGIC_PRINT_WF_32:
104            MagicPrintWF32(w);
105            break;
106          case MAGIC_PRINT_WF_64:
107            MagicPrintWF64(w);
108            break;
109          case MAGIC_PRINT_LANE:
110            MagicPrintLane(w);
111            break;
112          case MAGIC_PRINT_LANE_64:
113            MagicPrintLane64(w);
114            break;
115          case MAGIC_PRINT_WF_FLOAT:
116            MagicPrintWFFloat(w);
117            break;
118          case MAGIC_SIM_BREAK:
119            MagicSimBreak(w);
120            break;
121          case MAGIC_PREF_SUM:
122            MagicPrefixSum(w);
123            break;
124          case MAGIC_REDUCTION:
125            MagicReduction(w);
126            break;
127          case MAGIC_MASKLANE_LOWER:
128            MagicMaskLower(w);
129            break;
130          case MAGIC_MASKLANE_UPPER:
131            MagicMaskUpper(w);
132            break;
133          case MAGIC_JOIN_WF_BAR:
134            MagicJoinWFBar(w);
135            break;
136          case MAGIC_WAIT_WF_BAR:
137            MagicWaitWFBar(w);
138            break;
139          case MAGIC_PANIC:
140            MagicPanic(w);
141            break;
142
143          // atomic instructions
144          case MAGIC_ATOMIC_NR_ADD_GLOBAL_U32_REG:
145            MagicAtomicNRAddGlobalU32Reg(w, gpuDynInst);
146            break;
147
148          case MAGIC_ATOMIC_NR_ADD_GROUP_U32_REG:
149            MagicAtomicNRAddGroupU32Reg(w, gpuDynInst);
150            break;
151
152          case MAGIC_LOAD_GLOBAL_U32_REG:
153            MagicLoadGlobalU32Reg(w, gpuDynInst);
154            break;
155
156          case MAGIC_XACT_CAS_LD:
157            MagicXactCasLd(w);
158            break;
159
160          case MAGIC_MOST_SIG_THD:
161            MagicMostSigThread(w);
162            break;
163
164          case MAGIC_MOST_SIG_BROADCAST:
165            MagicMostSigBroadcast(w);
166            break;
167
168          case MAGIC_PRINT_WFID_32:
169            MagicPrintWF32ID(w);
170            break;
171
172          case MAGIC_PRINT_WFID_64:
173            MagicPrintWFID64(w);
174            break;
175
176          default: fatal("unrecognized magic instruction: %d\n", op);
177        }
178    }
179
180    void
181    Call::MagicPrintLane(Wavefront *w)
182    {
183    #if TRACING_ON
184        const VectorMask &mask = w->getPred();
185        for (int lane = 0; lane < w->computeUnit->wfSize(); ++lane) {
186            if (mask[lane]) {
187                int src_val1 = src1.get<int>(w, lane, 1);
188                int src_val2 = src1.get<int>(w, lane, 2);
189                if (src_val2) {
190                    DPRINTFN("krl_prt (%s): CU%d, WF[%d][%d], lane %d: 0x%x\n",
191                             disassemble(), w->computeUnit->cu_id, w->simdId,
192                             w->wfSlotId, lane, src_val1);
193                } else {
194                    DPRINTFN("krl_prt (%s): CU%d, WF[%d][%d], lane %d: %d\n",
195                             disassemble(), w->computeUnit->cu_id, w->simdId,
196                             w->wfSlotId, lane, src_val1);
197                }
198            }
199        }
200    #endif
201    }
202
203    void
204    Call::MagicPrintLane64(Wavefront *w)
205    {
206    #if TRACING_ON
207        const VectorMask &mask = w->getPred();
208        for (int lane = 0; lane < w->computeUnit->wfSize(); ++lane) {
209            if (mask[lane]) {
210                int64_t src_val1 = src1.get<int64_t>(w, lane, 1);
211                int src_val2 = src1.get<int>(w, lane, 2);
212                if (src_val2) {
213                    DPRINTFN("krl_prt (%s): CU%d, WF[%d][%d], lane %d: 0x%x\n",
214                             disassemble(), w->computeUnit->cu_id, w->simdId,
215                             w->wfSlotId, lane, src_val1);
216                } else {
217                    DPRINTFN("krl_prt (%s): CU%d, WF[%d][%d], lane %d: %d\n",
218                             disassemble(), w->computeUnit->cu_id, w->simdId,
219                             w->wfSlotId, lane, src_val1);
220                }
221            }
222        }
223    #endif
224    }
225
226    void
227    Call::MagicPrintWF32(Wavefront *w)
228    {
229    #if TRACING_ON
230        const VectorMask &mask = w->getPred();
231        std::string res_str;
232        res_str = csprintf("krl_prt (%s)\n", disassemble());
233
234        for (int lane = 0; lane < w->computeUnit->wfSize(); ++lane) {
235            if (!(lane & 7)) {
236                res_str += csprintf("DB%03d: ", (int)w->wfDynId);
237            }
238
239            if (mask[lane]) {
240                int src_val1 = src1.get<int>(w, lane, 1);
241                int src_val2 = src1.get<int>(w, lane, 2);
242
243                if (src_val2) {
244                    res_str += csprintf("%08x", src_val1);
245                } else {
246                    res_str += csprintf("%08d", src_val1);
247                }
248            } else {
249                res_str += csprintf("xxxxxxxx");
250            }
251
252            if ((lane & 7) == 7) {
253                res_str += csprintf("\n");
254            } else {
255                res_str += csprintf(" ");
256            }
257        }
258
259        res_str += "\n\n";
260        DPRINTFN(res_str.c_str());
261    #endif
262    }
263
264    void
265    Call::MagicPrintWF32ID(Wavefront *w)
266    {
267    #if TRACING_ON
268        const VectorMask &mask = w->getPred();
269        std::string res_str;
270        int src_val3 = -1;
271        res_str = csprintf("krl_prt (%s)\n", disassemble());
272
273        for (int lane = 0; lane < w->computeUnit->wfSize(); ++lane) {
274            if (!(lane & 7)) {
275                res_str += csprintf("DB%03d: ", (int)w->wfDynId);
276            }
277
278            if (mask[lane]) {
279                int src_val1 = src1.get<int>(w, lane, 1);
280                int src_val2 = src1.get<int>(w, lane, 2);
281                src_val3 = src1.get<int>(w, lane, 3);
282
283                if (src_val2) {
284                    res_str += csprintf("%08x", src_val1);
285                } else {
286                    res_str += csprintf("%08d", src_val1);
287                }
288            } else {
289                res_str += csprintf("xxxxxxxx");
290            }
291
292            if ((lane & 7) == 7) {
293                res_str += csprintf("\n");
294            } else {
295                res_str += csprintf(" ");
296            }
297        }
298
299        res_str += "\n\n";
300        if (w->wfDynId == src_val3) {
301            DPRINTFN(res_str.c_str());
302        }
303    #endif
304    }
305
306    void
307    Call::MagicPrintWF64(Wavefront *w)
308    {
309    #if TRACING_ON
310        const VectorMask &mask = w->getPred();
311        std::string res_str;
312        res_str = csprintf("krl_prt (%s)\n", disassemble());
313
314        for (int lane = 0; lane < w->computeUnit->wfSize(); ++lane) {
315            if (!(lane & 3)) {
316                res_str += csprintf("DB%03d: ", (int)w->wfDynId);
317            }
318
319            if (mask[lane]) {
320                int64_t src_val1 = src1.get<int64_t>(w, lane, 1);
321                int src_val2 = src1.get<int>(w, lane, 2);
322
323                if (src_val2) {
324                    res_str += csprintf("%016x", src_val1);
325                } else {
326                    res_str += csprintf("%016d", src_val1);
327                }
328            } else {
329                res_str += csprintf("xxxxxxxxxxxxxxxx");
330            }
331
332            if ((lane & 3) == 3) {
333                res_str += csprintf("\n");
334            } else {
335                res_str += csprintf(" ");
336            }
337        }
338
339        res_str += "\n\n";
340        DPRINTFN(res_str.c_str());
341    #endif
342    }
343
344    void
345    Call::MagicPrintWFID64(Wavefront *w)
346    {
347    #if TRACING_ON
348        const VectorMask &mask = w->getPred();
349        std::string res_str;
350        int src_val3 = -1;
351        res_str = csprintf("krl_prt (%s)\n", disassemble());
352
353        for (int lane = 0; lane < w->computeUnit->wfSize(); ++lane) {
354            if (!(lane & 3)) {
355                res_str += csprintf("DB%03d: ", (int)w->wfDynId);
356            }
357
358            if (mask[lane]) {
359                int64_t src_val1 = src1.get<int64_t>(w, lane, 1);
360                int src_val2 = src1.get<int>(w, lane, 2);
361                src_val3 = src1.get<int>(w, lane, 3);
362
363                if (src_val2) {
364                    res_str += csprintf("%016x", src_val1);
365                } else {
366                    res_str += csprintf("%016d", src_val1);
367                }
368            } else {
369                res_str += csprintf("xxxxxxxxxxxxxxxx");
370            }
371
372            if ((lane & 3) == 3) {
373                res_str += csprintf("\n");
374            } else {
375                res_str += csprintf(" ");
376            }
377        }
378
379        res_str += "\n\n";
380        if (w->wfDynId == src_val3) {
381            DPRINTFN(res_str.c_str());
382        }
383    #endif
384    }
385
386    void
387    Call::MagicPrintWFFloat(Wavefront *w)
388    {
389    #if TRACING_ON
390        const VectorMask &mask = w->getPred();
391        std::string res_str;
392        res_str = csprintf("krl_prt (%s)\n", disassemble());
393
394        for (int lane = 0; lane < w->computeUnit->wfSize(); ++lane) {
395            if (!(lane & 7)) {
396                res_str += csprintf("DB%03d: ", (int)w->wfDynId);
397            }
398
399            if (mask[lane]) {
400                float src_val1 = src1.get<float>(w, lane, 1);
401                res_str += csprintf("%08f", src_val1);
402            } else {
403                res_str += csprintf("xxxxxxxx");
404            }
405
406            if ((lane & 7) == 7) {
407                res_str += csprintf("\n");
408            } else {
409                res_str += csprintf(" ");
410            }
411        }
412
413        res_str += "\n\n";
414        DPRINTFN(res_str.c_str());
415    #endif
416    }
417
418    // raises a signal that GDB will catch
419    // when done with the break, type "signal 0" in gdb to continue
420    void
421    Call::MagicSimBreak(Wavefront *w)
422    {
423        std::string res_str;
424        // print out state for this wavefront and then break
425        res_str = csprintf("Breakpoint encountered for wavefront %i\n",
426                           w->wfSlotId);
427
428        res_str += csprintf("  Kern ID: %i\n", w->kernId);
429        res_str += csprintf("  Phase ID: %i\n", w->simdId);
430        res_str += csprintf("  Executing on CU #%i\n", w->computeUnit->cu_id);
431        res_str += csprintf("  Exec mask: ");
432
433        for (int i = w->computeUnit->wfSize() - 1; i >= 0; --i) {
434            if (w->execMask(i))
435                res_str += "1";
436            else
437                res_str += "0";
438
439            if ((i & 7) == 7)
440                res_str += " ";
441        }
442
443        res_str += csprintf("(0x%016llx)\n", w->execMask().to_ullong());
444
445        res_str += "\nHelpful debugging hints:\n";
446        res_str += "   Check out w->s_reg / w->d_reg for register state\n";
447
448        res_str += "\n\n";
449        DPRINTFN(res_str.c_str());
450        fflush(stdout);
451
452        raise(SIGTRAP);
453    }
454
455    void
456    Call::MagicPrefixSum(Wavefront *w)
457    {
458        const VectorMask &mask = w->getPred();
459        int res = 0;
460
461        for (int lane = 0; lane < w->computeUnit->wfSize(); ++lane) {
462            if (mask[lane]) {
463                int src_val1 = src1.get<int>(w, lane, 1);
464                dest.set<int>(w, lane, res);
465                res += src_val1;
466            }
467        }
468    }
469
470    void
471    Call::MagicReduction(Wavefront *w)
472    {
473        // reduction magic instruction
474        //   The reduction instruction takes up to 64 inputs (one from
475        //   each thread in a WF) and sums them. It returns the sum to
476        //   each thread in the WF.
477        const VectorMask &mask = w->getPred();
478        int res = 0;
479
480        for (int lane = 0; lane < w->computeUnit->wfSize(); ++lane) {
481            if (mask[lane]) {
482                int src_val1 = src1.get<int>(w, lane, 1);
483                res += src_val1;
484            }
485        }
486
487        for (int lane = 0; lane < w->computeUnit->wfSize(); ++lane) {
488            if (mask[lane]) {
489                dest.set<int>(w, lane, res);
490            }
491        }
492    }
493
494    void
495    Call::MagicMaskLower(Wavefront *w)
496    {
497        const VectorMask &mask = w->getPred();
498        int res = 0;
499
500        for (int lane = 0; lane < w->computeUnit->wfSize(); ++lane) {
501            if (mask[lane]) {
502                int src_val1 = src1.get<int>(w, lane, 1);
503
504                if (src_val1) {
505                    if (lane < (w->computeUnit->wfSize()/2)) {
506                        res = res | ((uint32_t)(1) << lane);
507                    }
508                }
509            }
510        }
511
512        for (int lane = 0; lane < w->computeUnit->wfSize(); ++lane) {
513            if (mask[lane]) {
514                dest.set<int>(w, lane, res);
515            }
516        }
517    }
518
519    void
520    Call::MagicMaskUpper(Wavefront *w)
521    {
522        const VectorMask &mask = w->getPred();
523        int res = 0;
524        for (int lane = 0; lane < w->computeUnit->wfSize(); ++lane) {
525            if (mask[lane]) {
526                int src_val1 = src1.get<int>(w, lane, 1);
527
528                if (src_val1) {
529                    if (lane >= (w->computeUnit->wfSize()/2)) {
530                        res = res | ((uint32_t)(1) <<
531                                     (lane - (w->computeUnit->wfSize()/2)));
532                    }
533                }
534            }
535        }
536
537        for (int lane = 0; lane < w->computeUnit->wfSize(); ++lane) {
538            if (mask[lane]) {
539                dest.set<int>(w, lane, res);
540            }
541        }
542    }
543
544    void
545    Call::MagicJoinWFBar(Wavefront *w)
546    {
547        const VectorMask &mask = w->getPred();
548        int max_cnt = 0;
549
550        for (int lane = 0; lane < w->computeUnit->wfSize(); ++lane) {
551            if (mask[lane]) {
552                w->barCnt[lane]++;
553
554                if (w->barCnt[lane] > max_cnt) {
555                    max_cnt = w->barCnt[lane];
556                }
557            }
558        }
559
560        if (max_cnt > w->maxBarCnt) {
561            w->maxBarCnt = max_cnt;
562        }
563    }
564
565    void
566    Call::MagicWaitWFBar(Wavefront *w)
567    {
568        const VectorMask &mask = w->getPred();
569        int max_cnt = 0;
570
571        for (int lane = 0; lane < w->computeUnit->wfSize(); ++lane) {
572            if (mask[lane]) {
573                w->barCnt[lane]--;
574            }
575
576            if (w->barCnt[lane] > max_cnt) {
577                max_cnt = w->barCnt[lane];
578            }
579        }
580
581        if (max_cnt < w->maxBarCnt) {
582            w->maxBarCnt = max_cnt;
583        }
584
585        w->instructionBuffer.erase(w->instructionBuffer.begin() + 1,
586                                   w->instructionBuffer.end());
587        if (w->pendingFetch)
588            w->dropFetch = true;
589    }
590
591    void
592    Call::MagicPanic(Wavefront *w)
593    {
594        const VectorMask &mask = w->getPred();
595
596        for (int lane = 0; lane < w->computeUnit->wfSize(); ++lane) {
597            if (mask[lane]) {
598                int src_val1 = src1.get<int>(w, lane, 1);
599                panic("OpenCL Code failed assertion #%d. Triggered by lane %s",
600                      src_val1, lane);
601            }
602        }
603    }
604
605    void
606    Call::calcAddr(Wavefront *w, GPUDynInstPtr m)
607    {
608        // the address is in src1 | src2
609        for (int lane = 0; lane < w->computeUnit->wfSize(); ++lane) {
610            int src_val1 = src1.get<int>(w, lane, 1);
611            int src_val2 = src1.get<int>(w, lane, 2);
612            Addr addr = (((Addr) src_val1) << 32) | ((Addr) src_val2);
613
614            m->addr[lane] = addr;
615        }
616
617    }
618
619    void
620    Call::MagicAtomicNRAddGlobalU32Reg(Wavefront *w, GPUDynInstPtr gpuDynInst)
621    {
622        GPUDynInstPtr m = gpuDynInst;
623
624        calcAddr(w, m);
625
626        for (int lane = 0; lane < w->computeUnit->wfSize(); ++lane) {
627            ((int*)m->a_data)[lane] = src1.get<int>(w, lane, 3);
628        }
629
630        m->m_op = brigAtomicToMemOpType(Brig::BRIG_OPCODE_ATOMICNORET,
631                                        Brig::BRIG_ATOMIC_ADD);
632        m->m_type = U32::memType;
633        m->v_type = U32::vgprType;
634
635        m->exec_mask = w->execMask();
636        m->statusBitVector = 0;
637        m->equiv = 0;  // atomics don't have an equivalence class operand
638        m->n_reg = 1;
639        m->memoryOrder = Enums::MEMORY_ORDER_NONE;
640        m->scope = Enums::MEMORY_SCOPE_NONE;
641
642        m->simdId = w->simdId;
643        m->wfSlotId = w->wfSlotId;
644        m->wfDynId = w->wfDynId;
645        m->latency.init(&w->computeUnit->shader->tick_cnt);
646
647        m->s_type = SEG_GLOBAL;
648        m->pipeId = GLBMEM_PIPE;
649        m->latency.set(w->computeUnit->shader->ticks(64));
650        w->computeUnit->globalMemoryPipe.getGMReqFIFO().push(m);
651        w->outstandingReqsWrGm++;
652        w->wrGmReqsInPipe--;
653        w->outstandingReqsRdGm++;
654        w->rdGmReqsInPipe--;
655        w->outstandingReqs++;
656        w->memReqsInPipe--;
657    }
658
659    void
660    Call::MagicAtomicNRAddGroupU32Reg(Wavefront *w, GPUDynInstPtr gpuDynInst)
661    {
662        GPUDynInstPtr m = gpuDynInst;
663        calcAddr(w, m);
664
665        for (int lane = 0; lane < w->computeUnit->wfSize(); ++lane) {
666            ((int*)m->a_data)[lane] = src1.get<int>(w, lane, 1);
667        }
668
669        m->m_op = brigAtomicToMemOpType(Brig::BRIG_OPCODE_ATOMICNORET,
670                                        Brig::BRIG_ATOMIC_ADD);
671        m->m_type = U32::memType;
672        m->v_type = U32::vgprType;
673
674        m->exec_mask = w->execMask();
675        m->statusBitVector = 0;
676        m->equiv = 0;  // atomics don't have an equivalence class operand
677        m->n_reg = 1;
678        m->memoryOrder = Enums::MEMORY_ORDER_NONE;
679        m->scope = Enums::MEMORY_SCOPE_NONE;
680
681        m->simdId = w->simdId;
682        m->wfSlotId = w->wfSlotId;
683        m->wfDynId = w->wfDynId;
684        m->latency.init(&w->computeUnit->shader->tick_cnt);
685
686        m->s_type = SEG_GLOBAL;
687        m->pipeId = GLBMEM_PIPE;
688        m->latency.set(w->computeUnit->shader->ticks(64));
689        w->computeUnit->globalMemoryPipe.getGMReqFIFO().push(m);
690        w->outstandingReqsWrGm++;
691        w->wrGmReqsInPipe--;
692        w->outstandingReqsRdGm++;
693        w->rdGmReqsInPipe--;
694        w->outstandingReqs++;
695        w->memReqsInPipe--;
696    }
697
698    void
699    Call::MagicLoadGlobalU32Reg(Wavefront *w, GPUDynInstPtr gpuDynInst)
700    {
701        GPUDynInstPtr m = gpuDynInst;
702        // calculate the address
703        calcAddr(w, m);
704
705        m->m_op = Enums::MO_LD;
706        m->m_type = U32::memType;  //MemDataType::memType;
707        m->v_type = U32::vgprType; //DestDataType::vgprType;
708
709        m->exec_mask = w->execMask();
710        m->statusBitVector = 0;
711        m->equiv = 0;
712        m->n_reg = 1;
713        m->memoryOrder = Enums::MEMORY_ORDER_NONE;
714        m->scope = Enums::MEMORY_SCOPE_NONE;
715
716        // FIXME
717        //m->dst_reg = this->dest.regIndex();
718
719        m->simdId = w->simdId;
720        m->wfSlotId = w->wfSlotId;
721        m->wfDynId = w->wfDynId;
722        m->latency.init(&w->computeUnit->shader->tick_cnt);
723
724        m->s_type = SEG_GLOBAL;
725        m->pipeId = GLBMEM_PIPE;
726        m->latency.set(w->computeUnit->shader->ticks(1));
727        w->computeUnit->globalMemoryPipe.getGMReqFIFO().push(m);
728        w->outstandingReqsRdGm++;
729        w->rdGmReqsInPipe--;
730        w->outstandingReqs++;
731        w->memReqsInPipe--;
732    }
733
734    void
735    Call::MagicXactCasLd(Wavefront *w)
736    {
737        const VectorMask &mask = w->getPred();
738        int src_val1 = 0;
739
740        for (int lane = 0; lane < w->computeUnit->wfSize(); ++lane) {
741            if (mask[lane]) {
742                src_val1 = src1.get<int>(w, lane, 1);
743                break;
744            }
745        }
746
747        if (!w->computeUnit->xactCasLoadMap.count(src_val1)) {
748            w->computeUnit->xactCasLoadMap[src_val1] = ComputeUnit::waveQueue();
749            w->computeUnit->xactCasLoadMap[src_val1].waveIDQueue.clear();
750        }
751
752        w->computeUnit->xactCasLoadMap[src_val1].waveIDQueue
753            .push_back(ComputeUnit::waveIdentifier(w->simdId, w->wfSlotId));
754    }
755
756    void
757    Call::MagicMostSigThread(Wavefront *w)
758    {
759        const VectorMask &mask = w->getPred();
760        unsigned mst = true;
761
762        for (int lane = w->computeUnit->wfSize() - 1; lane >= 0; --lane) {
763            if (mask[lane]) {
764                dest.set<int>(w, lane, mst);
765                mst = false;
766            }
767        }
768    }
769
770    void
771    Call::MagicMostSigBroadcast(Wavefront *w)
772    {
773        const VectorMask &mask = w->getPred();
774        int res = 0;
775        bool got_res = false;
776
777        for (int lane = w->computeUnit->wfSize() - 1; lane >= 0; --lane) {
778            if (mask[lane]) {
779                if (!got_res) {
780                    res = src1.get<int>(w, lane, 1);
781                    got_res = true;
782                }
783                dest.set<int>(w, lane, res);
784            }
785        }
786    }
787
788} // namespace HsailISA
789