macromem.isa (12134:604f47f63877) macromem.isa (13544:0b4e5446167c)
1// -*- mode:c++ -*-
2
3// Copyright (c) 2010-2014 ARM Limited
4// All rights reserved
5//
6// The license below extends only to copyright in the software and shall
7// not be construed as granting a license to any other intellectual
8// property including but not limited to intellectual property relating

--- 309 unchanged lines hidden (view full) ---

318 ''' % { "sReg0" : (dReg * 2), "sReg1" : (dReg * 2 + 1) }
319 unloadConv += '''
320 FpDestS%(dReg)dP0_uw = gtoh(conv2.cRegs[2 * %(dReg)d + 0]);
321 FpDestS%(dReg)dP1_uw = gtoh(conv2.cRegs[2 * %(dReg)d + 1]);
322 ''' % { "dReg" : dReg }
323 microDeintNeonCode = '''
324 const unsigned dRegs = %(dRegs)d;
325 const unsigned regs = 2 * dRegs;
1// -*- mode:c++ -*-
2
3// Copyright (c) 2010-2014 ARM Limited
4// All rights reserved
5//
6// The license below extends only to copyright in the software and shall
7// not be construed as granting a license to any other intellectual
8// property including but not limited to intellectual property relating

--- 309 unchanged lines hidden (view full) ---

318 ''' % { "sReg0" : (dReg * 2), "sReg1" : (dReg * 2 + 1) }
319 unloadConv += '''
320 FpDestS%(dReg)dP0_uw = gtoh(conv2.cRegs[2 * %(dReg)d + 0]);
321 FpDestS%(dReg)dP1_uw = gtoh(conv2.cRegs[2 * %(dReg)d + 1]);
322 ''' % { "dReg" : dReg }
323 microDeintNeonCode = '''
324 const unsigned dRegs = %(dRegs)d;
325 const unsigned regs = 2 * dRegs;
326 const unsigned perDReg = (2 * sizeof(FloatRegBits)) /
327 sizeof(Element);
326 const unsigned perDReg =
327 (2 * sizeof(uint32_t)) / sizeof(Element);
328 union convStruct {
328 union convStruct {
329 FloatRegBits cRegs[regs];
329 uint32_t cRegs[regs];
330 Element elements[dRegs * perDReg];
331 } conv1, conv2;
332
333 %(loadConv)s
334
335 unsigned srcElem = 0;
336 for (unsigned destOffset = 0;
337 destOffset < perDReg; destOffset++) {

--- 26 unchanged lines hidden (view full) ---

364 ''' % { "dReg" : dReg }
365 unloadConv += '''
366 FpDestP%(sReg0)d_uw = gtoh(conv2.cRegs[%(sReg0)d]);
367 FpDestP%(sReg1)d_uw = gtoh(conv2.cRegs[%(sReg1)d]);
368 ''' % { "sReg0" : (dReg * 2), "sReg1" : (dReg * 2 + 1) }
369 microInterNeonCode = '''
370 const unsigned dRegs = %(dRegs)d;
371 const unsigned regs = 2 * dRegs;
330 Element elements[dRegs * perDReg];
331 } conv1, conv2;
332
333 %(loadConv)s
334
335 unsigned srcElem = 0;
336 for (unsigned destOffset = 0;
337 destOffset < perDReg; destOffset++) {

--- 26 unchanged lines hidden (view full) ---

364 ''' % { "dReg" : dReg }
365 unloadConv += '''
366 FpDestP%(sReg0)d_uw = gtoh(conv2.cRegs[%(sReg0)d]);
367 FpDestP%(sReg1)d_uw = gtoh(conv2.cRegs[%(sReg1)d]);
368 ''' % { "sReg0" : (dReg * 2), "sReg1" : (dReg * 2 + 1) }
369 microInterNeonCode = '''
370 const unsigned dRegs = %(dRegs)d;
371 const unsigned regs = 2 * dRegs;
372 const unsigned perDReg = (2 * sizeof(FloatRegBits)) /
373 sizeof(Element);
372 const unsigned perDReg =
373 (2 * sizeof(uint32_t)) / sizeof(Element);
374 union convStruct {
374 union convStruct {
375 FloatRegBits cRegs[regs];
375 uint32_t cRegs[regs];
376 Element elements[dRegs * perDReg];
377 } conv1, conv2;
378
379 %(loadConv)s
380
381 unsigned destElem = 0;
382 for (unsigned srcOffset = 0;
383 srcOffset < perDReg; srcOffset++) {

--- 53 unchanged lines hidden (view full) ---

437 destRegs[%(reg)d].fRegs[0] = htog(FpDestS%(reg)dP0_uw);
438 destRegs[%(reg)d].fRegs[1] = htog(FpDestS%(reg)dP1_uw);
439 ''' % { "reg" : reg }
440 unloadRegs += '''
441 FpDestS%(reg)dP0_uw = gtoh(destRegs[%(reg)d].fRegs[0]);
442 FpDestS%(reg)dP1_uw = gtoh(destRegs[%(reg)d].fRegs[1]);
443 ''' % { "reg" : reg }
444 microUnpackNeonCode = '''
376 Element elements[dRegs * perDReg];
377 } conv1, conv2;
378
379 %(loadConv)s
380
381 unsigned destElem = 0;
382 for (unsigned srcOffset = 0;
383 srcOffset < perDReg; srcOffset++) {

--- 53 unchanged lines hidden (view full) ---

437 destRegs[%(reg)d].fRegs[0] = htog(FpDestS%(reg)dP0_uw);
438 destRegs[%(reg)d].fRegs[1] = htog(FpDestS%(reg)dP1_uw);
439 ''' % { "reg" : reg }
440 unloadRegs += '''
441 FpDestS%(reg)dP0_uw = gtoh(destRegs[%(reg)d].fRegs[0]);
442 FpDestS%(reg)dP1_uw = gtoh(destRegs[%(reg)d].fRegs[1]);
443 ''' % { "reg" : reg }
444 microUnpackNeonCode = '''
445 const unsigned perDReg = (2 * sizeof(FloatRegBits)) /
446 sizeof(Element);
445 const unsigned perDReg = (2 * sizeof(uint32_t)) / sizeof(Element);
447
448 union SourceRegs {
446
447 union SourceRegs {
449 FloatRegBits fRegs[2 * %(sRegs)d];
448 uint32_t fRegs[2 * %(sRegs)d];
450 Element elements[%(sRegs)d * perDReg];
451 } sourceRegs;
452
453 union DestReg {
449 Element elements[%(sRegs)d * perDReg];
450 } sourceRegs;
451
452 union DestReg {
454 FloatRegBits fRegs[2];
453 uint32_t fRegs[2];
455 Element elements[perDReg];
456 } destRegs[%(dRegs)d];
457
458 %(loadRegs)s
459
460 for (unsigned i = 0; i < %(dRegs)d; i++) {
461 destRegs[i].elements[lane] = sourceRegs.elements[i];
462 }

--- 24 unchanged lines hidden (view full) ---

487 for dRegs in range(sRegs, 5):
488 unloadRegs = ''
489 for reg in range(dRegs):
490 unloadRegs += '''
491 FpDestS%(reg)dP0_uw = gtoh(destRegs[%(reg)d].fRegs[0]);
492 FpDestS%(reg)dP1_uw = gtoh(destRegs[%(reg)d].fRegs[1]);
493 ''' % { "reg" : reg }
494 microUnpackAllNeonCode = '''
454 Element elements[perDReg];
455 } destRegs[%(dRegs)d];
456
457 %(loadRegs)s
458
459 for (unsigned i = 0; i < %(dRegs)d; i++) {
460 destRegs[i].elements[lane] = sourceRegs.elements[i];
461 }

--- 24 unchanged lines hidden (view full) ---

486 for dRegs in range(sRegs, 5):
487 unloadRegs = ''
488 for reg in range(dRegs):
489 unloadRegs += '''
490 FpDestS%(reg)dP0_uw = gtoh(destRegs[%(reg)d].fRegs[0]);
491 FpDestS%(reg)dP1_uw = gtoh(destRegs[%(reg)d].fRegs[1]);
492 ''' % { "reg" : reg }
493 microUnpackAllNeonCode = '''
495 const unsigned perDReg = (2 * sizeof(FloatRegBits)) /
496 sizeof(Element);
494 const unsigned perDReg = (2 * sizeof(uint32_t)) / sizeof(Element);
497
498 union SourceRegs {
495
496 union SourceRegs {
499 FloatRegBits fRegs[2 * %(sRegs)d];
497 uint32_t fRegs[2 * %(sRegs)d];
500 Element elements[%(sRegs)d * perDReg];
501 } sourceRegs;
502
503 union DestReg {
498 Element elements[%(sRegs)d * perDReg];
499 } sourceRegs;
500
501 union DestReg {
504 FloatRegBits fRegs[2];
502 uint32_t fRegs[2];
505 Element elements[perDReg];
506 } destRegs[%(dRegs)d];
507
508 %(loadRegs)s
509
510 for (unsigned i = 0; i < %(dRegs)d; i++) {
511 for (unsigned j = 0; j < perDReg; j++)
512 destRegs[i].elements[j] = sourceRegs.elements[i];

--- 25 unchanged lines hidden (view full) ---

538 for sRegs in range(dRegs, 5):
539 loadRegs = ''
540 for reg in range(sRegs):
541 loadRegs += '''
542 sourceRegs[%(reg)d].fRegs[0] = htog(FpOp1S%(reg)dP0_uw);
543 sourceRegs[%(reg)d].fRegs[1] = htog(FpOp1S%(reg)dP1_uw);
544 ''' % { "reg" : reg }
545 microPackNeonCode = '''
503 Element elements[perDReg];
504 } destRegs[%(dRegs)d];
505
506 %(loadRegs)s
507
508 for (unsigned i = 0; i < %(dRegs)d; i++) {
509 for (unsigned j = 0; j < perDReg; j++)
510 destRegs[i].elements[j] = sourceRegs.elements[i];

--- 25 unchanged lines hidden (view full) ---

536 for sRegs in range(dRegs, 5):
537 loadRegs = ''
538 for reg in range(sRegs):
539 loadRegs += '''
540 sourceRegs[%(reg)d].fRegs[0] = htog(FpOp1S%(reg)dP0_uw);
541 sourceRegs[%(reg)d].fRegs[1] = htog(FpOp1S%(reg)dP1_uw);
542 ''' % { "reg" : reg }
543 microPackNeonCode = '''
546 const unsigned perDReg = (2 * sizeof(FloatRegBits)) /
547 sizeof(Element);
544 const unsigned perDReg =
545 (2 * sizeof(uint32_t)) / sizeof(Element);
548
549 union SourceReg {
546
547 union SourceReg {
550 FloatRegBits fRegs[2];
548 uint32_t fRegs[2];
551 Element elements[perDReg];
552 } sourceRegs[%(sRegs)d];
553
554 union DestRegs {
549 Element elements[perDReg];
550 } sourceRegs[%(sRegs)d];
551
552 union DestRegs {
555 FloatRegBits fRegs[2 * %(dRegs)d];
553 uint32_t fRegs[2 * %(dRegs)d];
556 Element elements[%(dRegs)d * perDReg];
557 } destRegs;
558
559 %(loadRegs)s
560
561 for (unsigned i = 0; i < %(sRegs)d; i++) {
562 destRegs.elements[i] = sourceRegs[i].elements[lane];
563 }

--- 216 unchanged lines hidden ---
554 Element elements[%(dRegs)d * perDReg];
555 } destRegs;
556
557 %(loadRegs)s
558
559 for (unsigned i = 0; i < %(sRegs)d; i++) {
560 destRegs.elements[i] = sourceRegs[i].elements[lane];
561 }

--- 216 unchanged lines hidden ---