macromem.isa (12134:604f47f63877) | macromem.isa (13544:0b4e5446167c) |
---|---|
1// -*- mode:c++ -*- 2 3// Copyright (c) 2010-2014 ARM Limited 4// All rights reserved 5// 6// The license below extends only to copyright in the software and shall 7// not be construed as granting a license to any other intellectual 8// property including but not limited to intellectual property relating --- 309 unchanged lines hidden (view full) --- 318 ''' % { "sReg0" : (dReg * 2), "sReg1" : (dReg * 2 + 1) } 319 unloadConv += ''' 320 FpDestS%(dReg)dP0_uw = gtoh(conv2.cRegs[2 * %(dReg)d + 0]); 321 FpDestS%(dReg)dP1_uw = gtoh(conv2.cRegs[2 * %(dReg)d + 1]); 322 ''' % { "dReg" : dReg } 323 microDeintNeonCode = ''' 324 const unsigned dRegs = %(dRegs)d; 325 const unsigned regs = 2 * dRegs; | 1// -*- mode:c++ -*- 2 3// Copyright (c) 2010-2014 ARM Limited 4// All rights reserved 5// 6// The license below extends only to copyright in the software and shall 7// not be construed as granting a license to any other intellectual 8// property including but not limited to intellectual property relating --- 309 unchanged lines hidden (view full) --- 318 ''' % { "sReg0" : (dReg * 2), "sReg1" : (dReg * 2 + 1) } 319 unloadConv += ''' 320 FpDestS%(dReg)dP0_uw = gtoh(conv2.cRegs[2 * %(dReg)d + 0]); 321 FpDestS%(dReg)dP1_uw = gtoh(conv2.cRegs[2 * %(dReg)d + 1]); 322 ''' % { "dReg" : dReg } 323 microDeintNeonCode = ''' 324 const unsigned dRegs = %(dRegs)d; 325 const unsigned regs = 2 * dRegs; |
326 const unsigned perDReg = (2 * sizeof(FloatRegBits)) / 327 sizeof(Element); | 326 const unsigned perDReg = 327 (2 * sizeof(uint32_t)) / sizeof(Element); |
328 union convStruct { | 328 union convStruct { |
329 FloatRegBits cRegs[regs]; | 329 uint32_t cRegs[regs]; |
330 Element elements[dRegs * perDReg]; 331 } conv1, conv2; 332 333 %(loadConv)s 334 335 unsigned srcElem = 0; 336 for (unsigned destOffset = 0; 337 destOffset < perDReg; destOffset++) { --- 26 unchanged lines hidden (view full) --- 364 ''' % { "dReg" : dReg } 365 unloadConv += ''' 366 FpDestP%(sReg0)d_uw = gtoh(conv2.cRegs[%(sReg0)d]); 367 FpDestP%(sReg1)d_uw = gtoh(conv2.cRegs[%(sReg1)d]); 368 ''' % { "sReg0" : (dReg * 2), "sReg1" : (dReg * 2 + 1) } 369 microInterNeonCode = ''' 370 const unsigned dRegs = %(dRegs)d; 371 const unsigned regs = 2 * dRegs; | 330 Element elements[dRegs * perDReg]; 331 } conv1, conv2; 332 333 %(loadConv)s 334 335 unsigned srcElem = 0; 336 for (unsigned destOffset = 0; 337 destOffset < perDReg; destOffset++) { --- 26 unchanged lines hidden (view full) --- 364 ''' % { "dReg" : dReg } 365 unloadConv += ''' 366 FpDestP%(sReg0)d_uw = gtoh(conv2.cRegs[%(sReg0)d]); 367 FpDestP%(sReg1)d_uw = gtoh(conv2.cRegs[%(sReg1)d]); 368 ''' % { "sReg0" : (dReg * 2), "sReg1" : (dReg * 2 + 1) } 369 microInterNeonCode = ''' 370 const unsigned dRegs = %(dRegs)d; 371 const unsigned regs = 2 * dRegs; |
372 const unsigned perDReg = (2 * sizeof(FloatRegBits)) / 373 sizeof(Element); | 372 const unsigned perDReg = 373 (2 * sizeof(uint32_t)) / sizeof(Element); |
374 union convStruct { | 374 union convStruct { |
375 FloatRegBits cRegs[regs]; | 375 uint32_t cRegs[regs]; |
376 Element elements[dRegs * perDReg]; 377 } conv1, conv2; 378 379 %(loadConv)s 380 381 unsigned destElem = 0; 382 for (unsigned srcOffset = 0; 383 srcOffset < perDReg; srcOffset++) { --- 53 unchanged lines hidden (view full) --- 437 destRegs[%(reg)d].fRegs[0] = htog(FpDestS%(reg)dP0_uw); 438 destRegs[%(reg)d].fRegs[1] = htog(FpDestS%(reg)dP1_uw); 439 ''' % { "reg" : reg } 440 unloadRegs += ''' 441 FpDestS%(reg)dP0_uw = gtoh(destRegs[%(reg)d].fRegs[0]); 442 FpDestS%(reg)dP1_uw = gtoh(destRegs[%(reg)d].fRegs[1]); 443 ''' % { "reg" : reg } 444 microUnpackNeonCode = ''' | 376 Element elements[dRegs * perDReg]; 377 } conv1, conv2; 378 379 %(loadConv)s 380 381 unsigned destElem = 0; 382 for (unsigned srcOffset = 0; 383 srcOffset < perDReg; srcOffset++) { --- 53 unchanged lines hidden (view full) --- 437 destRegs[%(reg)d].fRegs[0] = htog(FpDestS%(reg)dP0_uw); 438 destRegs[%(reg)d].fRegs[1] = htog(FpDestS%(reg)dP1_uw); 439 ''' % { "reg" : reg } 440 unloadRegs += ''' 441 FpDestS%(reg)dP0_uw = gtoh(destRegs[%(reg)d].fRegs[0]); 442 FpDestS%(reg)dP1_uw = gtoh(destRegs[%(reg)d].fRegs[1]); 443 ''' % { "reg" : reg } 444 microUnpackNeonCode = ''' |
445 const unsigned perDReg = (2 * sizeof(FloatRegBits)) / 446 sizeof(Element); | 445 const unsigned perDReg = (2 * sizeof(uint32_t)) / sizeof(Element); |
447 448 union SourceRegs { | 446 447 union SourceRegs { |
449 FloatRegBits fRegs[2 * %(sRegs)d]; | 448 uint32_t fRegs[2 * %(sRegs)d]; |
450 Element elements[%(sRegs)d * perDReg]; 451 } sourceRegs; 452 453 union DestReg { | 449 Element elements[%(sRegs)d * perDReg]; 450 } sourceRegs; 451 452 union DestReg { |
454 FloatRegBits fRegs[2]; | 453 uint32_t fRegs[2]; |
455 Element elements[perDReg]; 456 } destRegs[%(dRegs)d]; 457 458 %(loadRegs)s 459 460 for (unsigned i = 0; i < %(dRegs)d; i++) { 461 destRegs[i].elements[lane] = sourceRegs.elements[i]; 462 } --- 24 unchanged lines hidden (view full) --- 487 for dRegs in range(sRegs, 5): 488 unloadRegs = '' 489 for reg in range(dRegs): 490 unloadRegs += ''' 491 FpDestS%(reg)dP0_uw = gtoh(destRegs[%(reg)d].fRegs[0]); 492 FpDestS%(reg)dP1_uw = gtoh(destRegs[%(reg)d].fRegs[1]); 493 ''' % { "reg" : reg } 494 microUnpackAllNeonCode = ''' | 454 Element elements[perDReg]; 455 } destRegs[%(dRegs)d]; 456 457 %(loadRegs)s 458 459 for (unsigned i = 0; i < %(dRegs)d; i++) { 460 destRegs[i].elements[lane] = sourceRegs.elements[i]; 461 } --- 24 unchanged lines hidden (view full) --- 486 for dRegs in range(sRegs, 5): 487 unloadRegs = '' 488 for reg in range(dRegs): 489 unloadRegs += ''' 490 FpDestS%(reg)dP0_uw = gtoh(destRegs[%(reg)d].fRegs[0]); 491 FpDestS%(reg)dP1_uw = gtoh(destRegs[%(reg)d].fRegs[1]); 492 ''' % { "reg" : reg } 493 microUnpackAllNeonCode = ''' |
495 const unsigned perDReg = (2 * sizeof(FloatRegBits)) / 496 sizeof(Element); | 494 const unsigned perDReg = (2 * sizeof(uint32_t)) / sizeof(Element); |
497 498 union SourceRegs { | 495 496 union SourceRegs { |
499 FloatRegBits fRegs[2 * %(sRegs)d]; | 497 uint32_t fRegs[2 * %(sRegs)d]; |
500 Element elements[%(sRegs)d * perDReg]; 501 } sourceRegs; 502 503 union DestReg { | 498 Element elements[%(sRegs)d * perDReg]; 499 } sourceRegs; 500 501 union DestReg { |
504 FloatRegBits fRegs[2]; | 502 uint32_t fRegs[2]; |
505 Element elements[perDReg]; 506 } destRegs[%(dRegs)d]; 507 508 %(loadRegs)s 509 510 for (unsigned i = 0; i < %(dRegs)d; i++) { 511 for (unsigned j = 0; j < perDReg; j++) 512 destRegs[i].elements[j] = sourceRegs.elements[i]; --- 25 unchanged lines hidden (view full) --- 538 for sRegs in range(dRegs, 5): 539 loadRegs = '' 540 for reg in range(sRegs): 541 loadRegs += ''' 542 sourceRegs[%(reg)d].fRegs[0] = htog(FpOp1S%(reg)dP0_uw); 543 sourceRegs[%(reg)d].fRegs[1] = htog(FpOp1S%(reg)dP1_uw); 544 ''' % { "reg" : reg } 545 microPackNeonCode = ''' | 503 Element elements[perDReg]; 504 } destRegs[%(dRegs)d]; 505 506 %(loadRegs)s 507 508 for (unsigned i = 0; i < %(dRegs)d; i++) { 509 for (unsigned j = 0; j < perDReg; j++) 510 destRegs[i].elements[j] = sourceRegs.elements[i]; --- 25 unchanged lines hidden (view full) --- 536 for sRegs in range(dRegs, 5): 537 loadRegs = '' 538 for reg in range(sRegs): 539 loadRegs += ''' 540 sourceRegs[%(reg)d].fRegs[0] = htog(FpOp1S%(reg)dP0_uw); 541 sourceRegs[%(reg)d].fRegs[1] = htog(FpOp1S%(reg)dP1_uw); 542 ''' % { "reg" : reg } 543 microPackNeonCode = ''' |
546 const unsigned perDReg = (2 * sizeof(FloatRegBits)) / 547 sizeof(Element); | 544 const unsigned perDReg = 545 (2 * sizeof(uint32_t)) / sizeof(Element); |
548 549 union SourceReg { | 546 547 union SourceReg { |
550 FloatRegBits fRegs[2]; | 548 uint32_t fRegs[2]; |
551 Element elements[perDReg]; 552 } sourceRegs[%(sRegs)d]; 553 554 union DestRegs { | 549 Element elements[perDReg]; 550 } sourceRegs[%(sRegs)d]; 551 552 union DestRegs { |
555 FloatRegBits fRegs[2 * %(dRegs)d]; | 553 uint32_t fRegs[2 * %(dRegs)d]; |
556 Element elements[%(dRegs)d * perDReg]; 557 } destRegs; 558 559 %(loadRegs)s 560 561 for (unsigned i = 0; i < %(sRegs)d; i++) { 562 destRegs.elements[i] = sourceRegs[i].elements[lane]; 563 } --- 216 unchanged lines hidden --- | 554 Element elements[%(dRegs)d * perDReg]; 555 } destRegs; 556 557 %(loadRegs)s 558 559 for (unsigned i = 0; i < %(sRegs)d; i++) { 560 destRegs.elements[i] = sourceRegs[i].elements[lane]; 561 } --- 216 unchanged lines hidden --- |