mediaop.isa (7081:ff2321547ca3) mediaop.isa (7620:3d8a23caa1ef)
1/// Copyright (c) 2009 The Regents of The University of Michigan
2// All rights reserved.
3//
4// Redistribution and use in source and binary forms, with or without
5// modification, are permitted provided that the following conditions are
6// met: redistributions of source code must retain the above copyright
7// notice, this list of conditions and the following disclaimer;
8// redistributions in binary form must reproduce the above copyright
9// notice, this list of conditions and the following disclaimer in the
10// documentation and/or other materials provided with the distribution;
11// neither the name of the copyright holders nor the names of its
12// contributors may be used to endorse or promote products derived from
13// this software without specific prior written permission.
14//
15// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
16// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
17// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
18// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
19// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
20// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
21// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
22// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
23// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
24// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
25// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
26//
27// Authors: Gabe Black
28
29def template MediaOpExecute {{
30 Fault %(class_name)s::execute(%(CPU_exec_context)s *xc,
31 Trace::InstRecord *traceData) const
32 {
33 Fault fault = NoFault;
34
35 %(op_decl)s;
36 %(op_rd)s;
37
38 %(code)s;
39
40 //Write the resulting state to the execution context
41 if(fault == NoFault)
42 {
43 %(op_wb)s;
44 }
45 return fault;
46 }
47}};
48
49def template MediaOpRegDeclare {{
50 class %(class_name)s : public %(base_class)s
51 {
52 protected:
53 void buildMe();
54
55 public:
56 %(class_name)s(ExtMachInst _machInst,
1/// Copyright (c) 2009 The Regents of The University of Michigan
2// All rights reserved.
3//
4// Redistribution and use in source and binary forms, with or without
5// modification, are permitted provided that the following conditions are
6// met: redistributions of source code must retain the above copyright
7// notice, this list of conditions and the following disclaimer;
8// redistributions in binary form must reproduce the above copyright
9// notice, this list of conditions and the following disclaimer in the
10// documentation and/or other materials provided with the distribution;
11// neither the name of the copyright holders nor the names of its
12// contributors may be used to endorse or promote products derived from
13// this software without specific prior written permission.
14//
15// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
16// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
17// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
18// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
19// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
20// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
21// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
22// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
23// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
24// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
25// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
26//
27// Authors: Gabe Black
28
29def template MediaOpExecute {{
30 Fault %(class_name)s::execute(%(CPU_exec_context)s *xc,
31 Trace::InstRecord *traceData) const
32 {
33 Fault fault = NoFault;
34
35 %(op_decl)s;
36 %(op_rd)s;
37
38 %(code)s;
39
40 //Write the resulting state to the execution context
41 if(fault == NoFault)
42 {
43 %(op_wb)s;
44 }
45 return fault;
46 }
47}};
48
49def template MediaOpRegDeclare {{
50 class %(class_name)s : public %(base_class)s
51 {
52 protected:
53 void buildMe();
54
55 public:
56 %(class_name)s(ExtMachInst _machInst,
57 const char * instMnem,
58 bool isMicro, bool isDelayed, bool isFirst, bool isLast,
57 const char * instMnem, uint64_t setFlags,
59 InstRegIndex _src1, InstRegIndex _src2, InstRegIndex _dest,
60 uint8_t _srcSize, uint8_t _destSize, uint16_t _ext);
61
62 %(class_name)s(ExtMachInst _machInst,
63 const char * instMnem,
64 InstRegIndex _src1, InstRegIndex _src2, InstRegIndex _dest,
65 uint8_t _srcSize, uint8_t _destSize, uint16_t _ext);
66
67 %(BasicExecDeclare)s
68 };
69}};
70
71def template MediaOpImmDeclare {{
72
73 class %(class_name)s : public %(base_class)s
74 {
75 protected:
76 void buildMe();
77
78 public:
79 %(class_name)s(ExtMachInst _machInst,
58 InstRegIndex _src1, InstRegIndex _src2, InstRegIndex _dest,
59 uint8_t _srcSize, uint8_t _destSize, uint16_t _ext);
60
61 %(class_name)s(ExtMachInst _machInst,
62 const char * instMnem,
63 InstRegIndex _src1, InstRegIndex _src2, InstRegIndex _dest,
64 uint8_t _srcSize, uint8_t _destSize, uint16_t _ext);
65
66 %(BasicExecDeclare)s
67 };
68}};
69
70def template MediaOpImmDeclare {{
71
72 class %(class_name)s : public %(base_class)s
73 {
74 protected:
75 void buildMe();
76
77 public:
78 %(class_name)s(ExtMachInst _machInst,
80 const char * instMnem,
81 bool isMicro, bool isDelayed, bool isFirst, bool isLast,
79 const char * instMnem, uint64_t setFlags,
82 InstRegIndex _src1, uint16_t _imm8, InstRegIndex _dest,
83 uint8_t _srcSize, uint8_t _destSize, uint16_t _ext);
84
85 %(class_name)s(ExtMachInst _machInst,
86 const char * instMnem,
87 InstRegIndex _src1, uint16_t _imm8, InstRegIndex _dest,
88 uint8_t _srcSize, uint8_t _destSize, uint16_t _ext);
89
90 %(BasicExecDeclare)s
91 };
92}};
93
94def template MediaOpRegConstructor {{
95
96 inline void %(class_name)s::buildMe()
97 {
98 %(constructor)s;
99 }
100
101 inline %(class_name)s::%(class_name)s(
102 ExtMachInst machInst, const char * instMnem,
103 InstRegIndex _src1, InstRegIndex _src2, InstRegIndex _dest,
104 uint8_t _srcSize, uint8_t _destSize, uint16_t _ext) :
80 InstRegIndex _src1, uint16_t _imm8, InstRegIndex _dest,
81 uint8_t _srcSize, uint8_t _destSize, uint16_t _ext);
82
83 %(class_name)s(ExtMachInst _machInst,
84 const char * instMnem,
85 InstRegIndex _src1, uint16_t _imm8, InstRegIndex _dest,
86 uint8_t _srcSize, uint8_t _destSize, uint16_t _ext);
87
88 %(BasicExecDeclare)s
89 };
90}};
91
92def template MediaOpRegConstructor {{
93
94 inline void %(class_name)s::buildMe()
95 {
96 %(constructor)s;
97 }
98
99 inline %(class_name)s::%(class_name)s(
100 ExtMachInst machInst, const char * instMnem,
101 InstRegIndex _src1, InstRegIndex _src2, InstRegIndex _dest,
102 uint8_t _srcSize, uint8_t _destSize, uint16_t _ext) :
105 %(base_class)s(machInst, "%(mnemonic)s", instMnem,
106 false, false, false, false,
103 %(base_class)s(machInst, "%(mnemonic)s", instMnem, 0,
107 _src1, _src2, _dest, _srcSize, _destSize, _ext,
108 %(op_class)s)
109 {
110 buildMe();
111 }
112
113 inline %(class_name)s::%(class_name)s(
104 _src1, _src2, _dest, _srcSize, _destSize, _ext,
105 %(op_class)s)
106 {
107 buildMe();
108 }
109
110 inline %(class_name)s::%(class_name)s(
114 ExtMachInst machInst, const char * instMnem,
115 bool isMicro, bool isDelayed, bool isFirst, bool isLast,
111 ExtMachInst machInst, const char * instMnem, uint64_t setFlags,
116 InstRegIndex _src1, InstRegIndex _src2, InstRegIndex _dest,
117 uint8_t _srcSize, uint8_t _destSize, uint16_t _ext) :
112 InstRegIndex _src1, InstRegIndex _src2, InstRegIndex _dest,
113 uint8_t _srcSize, uint8_t _destSize, uint16_t _ext) :
118 %(base_class)s(machInst, "%(mnemonic)s", instMnem,
119 isMicro, isDelayed, isFirst, isLast,
114 %(base_class)s(machInst, "%(mnemonic)s", instMnem, setFlags,
120 _src1, _src2, _dest, _srcSize, _destSize, _ext,
121 %(op_class)s)
122 {
123 buildMe();
124 }
125}};
126
127def template MediaOpImmConstructor {{
128
129 inline void %(class_name)s::buildMe()
130 {
131 %(constructor)s;
132 }
133
134 inline %(class_name)s::%(class_name)s(
135 ExtMachInst machInst, const char * instMnem,
136 InstRegIndex _src1, uint16_t _imm8, InstRegIndex _dest,
137 uint8_t _srcSize, uint8_t _destSize, uint16_t _ext) :
115 _src1, _src2, _dest, _srcSize, _destSize, _ext,
116 %(op_class)s)
117 {
118 buildMe();
119 }
120}};
121
122def template MediaOpImmConstructor {{
123
124 inline void %(class_name)s::buildMe()
125 {
126 %(constructor)s;
127 }
128
129 inline %(class_name)s::%(class_name)s(
130 ExtMachInst machInst, const char * instMnem,
131 InstRegIndex _src1, uint16_t _imm8, InstRegIndex _dest,
132 uint8_t _srcSize, uint8_t _destSize, uint16_t _ext) :
138 %(base_class)s(machInst, "%(mnemonic)s", instMnem,
139 false, false, false, false,
133 %(base_class)s(machInst, "%(mnemonic)s", instMnem, 0,
140 _src1, _imm8, _dest, _srcSize, _destSize, _ext,
141 %(op_class)s)
142 {
143 buildMe();
144 }
145
146 inline %(class_name)s::%(class_name)s(
134 _src1, _imm8, _dest, _srcSize, _destSize, _ext,
135 %(op_class)s)
136 {
137 buildMe();
138 }
139
140 inline %(class_name)s::%(class_name)s(
147 ExtMachInst machInst, const char * instMnem,
148 bool isMicro, bool isDelayed, bool isFirst, bool isLast,
141 ExtMachInst machInst, const char * instMnem, uint64_t setFlags,
149 InstRegIndex _src1, uint16_t _imm8, InstRegIndex _dest,
150 uint8_t _srcSize, uint8_t _destSize, uint16_t _ext) :
142 InstRegIndex _src1, uint16_t _imm8, InstRegIndex _dest,
143 uint8_t _srcSize, uint8_t _destSize, uint16_t _ext) :
151 %(base_class)s(machInst, "%(mnemonic)s", instMnem,
152 isMicro, isDelayed, isFirst, isLast,
144 %(base_class)s(machInst, "%(mnemonic)s", instMnem, setFlags,
153 _src1, _imm8, _dest, _srcSize, _destSize, _ext,
154 %(op_class)s)
155 {
156 buildMe();
157 }
158}};
159
160let {{
161 # Make these empty strings so that concatenating onto
162 # them will always work.
163 header_output = ""
164 decoder_output = ""
165 exec_output = ""
166
167 immTemplates = (
168 MediaOpImmDeclare,
169 MediaOpImmConstructor,
170 MediaOpExecute)
171
172 regTemplates = (
173 MediaOpRegDeclare,
174 MediaOpRegConstructor,
175 MediaOpExecute)
176
177 class MediaOpMeta(type):
178 def buildCppClasses(self, name, Name, suffix, code):
179
180 # Globals to stick the output in
181 global header_output
182 global decoder_output
183 global exec_output
184
185 # If op2 is used anywhere, make register and immediate versions
186 # of this code.
187 matcher = re.compile("(?<!\\w)(?P<prefix>s?)op2(?P<typeQual>\\.\\w+)?")
188 match = matcher.search(code)
189 if match:
190 typeQual = ""
191 if match.group("typeQual"):
192 typeQual = match.group("typeQual")
193 src2_name = "%sFpSrcReg2%s" % (match.group("prefix"), typeQual)
194 self.buildCppClasses(name, Name, suffix,
195 matcher.sub(src2_name, code))
196 self.buildCppClasses(name + "i", Name, suffix + "Imm",
197 matcher.sub("imm8", code))
198 return
199
200 base = "X86ISA::MediaOp"
201
202 # If imm8 shows up in the code, use the immediate templates, if
203 # not, hopefully the register ones will be correct.
204 matcher = re.compile("(?<!\w)imm8(?!\w)")
205 if matcher.search(code):
206 base += "Imm"
207 templates = immTemplates
208 else:
209 base += "Reg"
210 templates = regTemplates
211
212 # Get everything ready for the substitution
213 iop = InstObjParams(name, Name + suffix, base, {"code" : code})
214
215 # Generate the actual code (finally!)
216 header_output += templates[0].subst(iop)
217 decoder_output += templates[1].subst(iop)
218 exec_output += templates[2].subst(iop)
219
220
221 def __new__(mcls, Name, bases, dict):
222 abstract = False
223 name = Name.lower()
224 if "abstract" in dict:
225 abstract = dict['abstract']
226 del dict['abstract']
227
228 cls = super(MediaOpMeta, mcls).__new__(mcls, Name, bases, dict)
229 if not abstract:
230 cls.className = Name
231 cls.base_mnemonic = name
232 code = cls.code
233
234 # Set up the C++ classes
235 mcls.buildCppClasses(cls, name, Name, "", code)
236
237 # Hook into the microassembler dict
238 global microopClasses
239 microopClasses[name] = cls
240
241 # If op2 is used anywhere, make register and immediate versions
242 # of this code.
243 matcher = re.compile("op2(?P<typeQual>\\.\\w+)?")
244 if matcher.search(code):
245 microopClasses[name + 'i'] = cls
246 return cls
247
248
249 class MediaOp(X86Microop):
250 __metaclass__ = MediaOpMeta
251 # This class itself doesn't act as a microop
252 abstract = True
253
254 def __init__(self, dest, src1, op2,
255 size = None, destSize = None, srcSize = None, ext = None):
256 self.dest = dest
257 self.src1 = src1
258 self.op2 = op2
259 if size is not None:
260 self.srcSize = size
261 self.destSize = size
262 if srcSize is not None:
263 self.srcSize = srcSize
264 if destSize is not None:
265 self.destSize = destSize
266 if self.srcSize is None:
267 raise Exception, "Source size not set."
268 if self.destSize is None:
269 raise Exception, "Dest size not set."
270 if ext is None:
271 self.ext = 0
272 else:
273 self.ext = ext
274
145 _src1, _imm8, _dest, _srcSize, _destSize, _ext,
146 %(op_class)s)
147 {
148 buildMe();
149 }
150}};
151
152let {{
153 # Make these empty strings so that concatenating onto
154 # them will always work.
155 header_output = ""
156 decoder_output = ""
157 exec_output = ""
158
159 immTemplates = (
160 MediaOpImmDeclare,
161 MediaOpImmConstructor,
162 MediaOpExecute)
163
164 regTemplates = (
165 MediaOpRegDeclare,
166 MediaOpRegConstructor,
167 MediaOpExecute)
168
169 class MediaOpMeta(type):
170 def buildCppClasses(self, name, Name, suffix, code):
171
172 # Globals to stick the output in
173 global header_output
174 global decoder_output
175 global exec_output
176
177 # If op2 is used anywhere, make register and immediate versions
178 # of this code.
179 matcher = re.compile("(?<!\\w)(?P<prefix>s?)op2(?P<typeQual>\\.\\w+)?")
180 match = matcher.search(code)
181 if match:
182 typeQual = ""
183 if match.group("typeQual"):
184 typeQual = match.group("typeQual")
185 src2_name = "%sFpSrcReg2%s" % (match.group("prefix"), typeQual)
186 self.buildCppClasses(name, Name, suffix,
187 matcher.sub(src2_name, code))
188 self.buildCppClasses(name + "i", Name, suffix + "Imm",
189 matcher.sub("imm8", code))
190 return
191
192 base = "X86ISA::MediaOp"
193
194 # If imm8 shows up in the code, use the immediate templates, if
195 # not, hopefully the register ones will be correct.
196 matcher = re.compile("(?<!\w)imm8(?!\w)")
197 if matcher.search(code):
198 base += "Imm"
199 templates = immTemplates
200 else:
201 base += "Reg"
202 templates = regTemplates
203
204 # Get everything ready for the substitution
205 iop = InstObjParams(name, Name + suffix, base, {"code" : code})
206
207 # Generate the actual code (finally!)
208 header_output += templates[0].subst(iop)
209 decoder_output += templates[1].subst(iop)
210 exec_output += templates[2].subst(iop)
211
212
213 def __new__(mcls, Name, bases, dict):
214 abstract = False
215 name = Name.lower()
216 if "abstract" in dict:
217 abstract = dict['abstract']
218 del dict['abstract']
219
220 cls = super(MediaOpMeta, mcls).__new__(mcls, Name, bases, dict)
221 if not abstract:
222 cls.className = Name
223 cls.base_mnemonic = name
224 code = cls.code
225
226 # Set up the C++ classes
227 mcls.buildCppClasses(cls, name, Name, "", code)
228
229 # Hook into the microassembler dict
230 global microopClasses
231 microopClasses[name] = cls
232
233 # If op2 is used anywhere, make register and immediate versions
234 # of this code.
235 matcher = re.compile("op2(?P<typeQual>\\.\\w+)?")
236 if matcher.search(code):
237 microopClasses[name + 'i'] = cls
238 return cls
239
240
241 class MediaOp(X86Microop):
242 __metaclass__ = MediaOpMeta
243 # This class itself doesn't act as a microop
244 abstract = True
245
246 def __init__(self, dest, src1, op2,
247 size = None, destSize = None, srcSize = None, ext = None):
248 self.dest = dest
249 self.src1 = src1
250 self.op2 = op2
251 if size is not None:
252 self.srcSize = size
253 self.destSize = size
254 if srcSize is not None:
255 self.srcSize = srcSize
256 if destSize is not None:
257 self.destSize = destSize
258 if self.srcSize is None:
259 raise Exception, "Source size not set."
260 if self.destSize is None:
261 raise Exception, "Dest size not set."
262 if ext is None:
263 self.ext = 0
264 else:
265 self.ext = ext
266
275 def getAllocator(self, *microFlags):
267 def getAllocator(self, microFlags):
276 className = self.className
277 if self.mnemonic == self.base_mnemonic + 'i':
278 className += "Imm"
268 className = self.className
269 if self.mnemonic == self.base_mnemonic + 'i':
270 className += "Imm"
279 allocator = '''new %(class_name)s(machInst, macrocodeBlock
271 allocator = '''new %(class_name)s(machInst, macrocodeBlock,
280 %(flags)s, %(src1)s, %(op2)s, %(dest)s,
281 %(srcSize)s, %(destSize)s, %(ext)s)''' % {
282 "class_name" : className,
283 "flags" : self.microFlagsText(microFlags),
284 "src1" : self.src1, "op2" : self.op2,
285 "dest" : self.dest,
286 "srcSize" : self.srcSize,
287 "destSize" : self.destSize,
288 "ext" : self.ext}
289 return allocator
290
291 class Mov2int(MediaOp):
292 def __init__(self, dest, src1, src2 = 0, \
293 size = None, destSize = None, srcSize = None, ext = None):
294 super(Mov2int, self).__init__(dest, src1,\
295 src2, size, destSize, srcSize, ext)
296 code = '''
297 int items = sizeof(FloatRegBits) / srcSize;
298 int offset = imm8;
299 if (bits(src1, 0) && (ext & 0x1))
300 offset -= items;
301 if (offset >= 0 && offset < items) {
302 uint64_t fpSrcReg1 =
303 bits(FpSrcReg1.uqw,
304 (offset + 1) * srcSize * 8 - 1,
305 (offset + 0) * srcSize * 8);
306 DestReg = merge(0, fpSrcReg1, destSize);
307 } else {
308 DestReg = DestReg;
309 }
310 '''
311
312 class Mov2fp(MediaOp):
313 def __init__(self, dest, src1, src2 = 0, \
314 size = None, destSize = None, srcSize = None, ext = None):
315 super(Mov2fp, self).__init__(dest, src1,\
316 src2, size, destSize, srcSize, ext)
317 code = '''
318 int items = sizeof(FloatRegBits) / destSize;
319 int offset = imm8;
320 if (bits(dest, 0) && (ext & 0x1))
321 offset -= items;
322 if (offset >= 0 && offset < items) {
323 uint64_t srcReg1 = pick(SrcReg1, 0, srcSize);
324 FpDestReg.uqw =
325 insertBits(FpDestReg.uqw,
326 (offset + 1) * destSize * 8 - 1,
327 (offset + 0) * destSize * 8, srcReg1);
328 } else {
329 FpDestReg.uqw = FpDestReg.uqw;
330 }
331 '''
332
333 class Movsign(MediaOp):
334 def __init__(self, dest, src, \
335 size = None, destSize = None, srcSize = None, ext = None):
336 super(Movsign, self).__init__(dest, src,\
337 "InstRegIndex(0)", size, destSize, srcSize, ext)
338 code = '''
339 int items = sizeof(FloatRegBits) / srcSize;
340 uint64_t result = 0;
341 int offset = (ext & 0x1) ? items : 0;
342 for (int i = 0; i < items; i++) {
343 uint64_t picked =
344 bits(FpSrcReg1.uqw, (i + 1) * 8 * srcSize - 1);
345 result = insertBits(result, i + offset, i + offset, picked);
346 }
347 DestReg = DestReg | result;
348 '''
349
350 class Maskmov(MediaOp):
351 code = '''
352 assert(srcSize == destSize);
353 int size = srcSize;
354 int sizeBits = size * 8;
355 int items = numItems(size);
356 uint64_t result = FpDestReg.uqw;
357
358 for (int i = 0; i < items; i++) {
359 int hiIndex = (i + 1) * sizeBits - 1;
360 int loIndex = (i + 0) * sizeBits;
361 uint64_t arg1Bits = bits(FpSrcReg1.uqw, hiIndex, loIndex);
362 if (bits(FpSrcReg2.uqw, hiIndex))
363 result = insertBits(result, hiIndex, loIndex, arg1Bits);
364 }
365 FpDestReg.uqw = result;
366 '''
367
368 class shuffle(MediaOp):
369 code = '''
370 assert(srcSize == destSize);
371 int size = srcSize;
372 int sizeBits = size * 8;
373 int items = sizeof(FloatRegBits) / size;
374 int options;
375 int optionBits;
376 if (size == 8) {
377 options = 2;
378 optionBits = 1;
379 } else {
380 options = 4;
381 optionBits = 2;
382 }
383
384 uint64_t result = 0;
385 uint8_t sel = ext;
386
387 for (int i = 0; i < items; i++) {
388 uint64_t resBits;
389 uint8_t lsel = sel & mask(optionBits);
390 if (lsel * size >= sizeof(FloatRegBits)) {
391 lsel -= options / 2;
392 resBits = bits(FpSrcReg2.uqw,
393 (lsel + 1) * sizeBits - 1,
394 (lsel + 0) * sizeBits);
395 } else {
396 resBits = bits(FpSrcReg1.uqw,
397 (lsel + 1) * sizeBits - 1,
398 (lsel + 0) * sizeBits);
399 }
400
401 sel >>= optionBits;
402
403 int hiIndex = (i + 1) * sizeBits - 1;
404 int loIndex = (i + 0) * sizeBits;
405 result = insertBits(result, hiIndex, loIndex, resBits);
406 }
407 FpDestReg.uqw = result;
408 '''
409
410 class Unpack(MediaOp):
411 code = '''
412 assert(srcSize == destSize);
413 int size = destSize;
414 int items = (sizeof(FloatRegBits) / size) / 2;
415 int offset = ext ? items : 0;
416 uint64_t result = 0;
417 for (int i = 0; i < items; i++) {
418 uint64_t pickedLow =
419 bits(FpSrcReg1.uqw, (i + offset + 1) * 8 * size - 1,
420 (i + offset) * 8 * size);
421 result = insertBits(result,
422 (2 * i + 1) * 8 * size - 1,
423 (2 * i + 0) * 8 * size,
424 pickedLow);
425 uint64_t pickedHigh =
426 bits(FpSrcReg2.uqw, (i + offset + 1) * 8 * size - 1,
427 (i + offset) * 8 * size);
428 result = insertBits(result,
429 (2 * i + 2) * 8 * size - 1,
430 (2 * i + 1) * 8 * size,
431 pickedHigh);
432 }
433 FpDestReg.uqw = result;
434 '''
435
436 class Pack(MediaOp):
437 code = '''
438 assert(srcSize == destSize * 2);
439 int items = (sizeof(FloatRegBits) / destSize);
440 int destBits = destSize * 8;
441 int srcBits = srcSize * 8;
442 uint64_t result = 0;
443 int i;
444 for (i = 0; i < items / 2; i++) {
445 uint64_t picked =
446 bits(FpSrcReg1.uqw, (i + 1) * srcBits - 1,
447 (i + 0) * srcBits);
448 unsigned signBit = bits(picked, srcBits - 1);
449 uint64_t overflow = bits(picked, srcBits - 1, destBits - 1);
450
451 // Handle saturation.
452 if (signBit) {
453 if (overflow != mask(destBits - srcBits + 1)) {
454 if (signedOp())
455 picked = (ULL(1) << (destBits - 1));
456 else
457 picked = 0;
458 }
459 } else {
460 if (overflow != 0) {
461 if (signedOp())
462 picked = mask(destBits - 1);
463 else
464 picked = mask(destBits);
465 }
466 }
467 result = insertBits(result,
468 (i + 1) * destBits - 1,
469 (i + 0) * destBits,
470 picked);
471 }
472 for (;i < items; i++) {
473 uint64_t picked =
474 bits(FpSrcReg2.uqw, (i - items + 1) * srcBits - 1,
475 (i - items + 0) * srcBits);
476 unsigned signBit = bits(picked, srcBits - 1);
477 uint64_t overflow = bits(picked, srcBits - 1, destBits - 1);
478
479 // Handle saturation.
480 if (signBit) {
481 if (overflow != mask(destBits - srcBits + 1)) {
482 if (signedOp())
483 picked = (ULL(1) << (destBits - 1));
484 else
485 picked = 0;
486 }
487 } else {
488 if (overflow != 0) {
489 if (signedOp())
490 picked = mask(destBits - 1);
491 else
492 picked = mask(destBits);
493 }
494 }
495 result = insertBits(result,
496 (i + 1) * destBits - 1,
497 (i + 0) * destBits,
498 picked);
499 }
500 FpDestReg.uqw = result;
501 '''
502
503 class Mxor(MediaOp):
504 def __init__(self, dest, src1, src2):
505 super(Mxor, self).__init__(dest, src1, src2, 1)
506 code = '''
507 FpDestReg.uqw = FpSrcReg1.uqw ^ FpSrcReg2.uqw;
508 '''
509
510 class Mor(MediaOp):
511 def __init__(self, dest, src1, src2):
512 super(Mor, self).__init__(dest, src1, src2, 1)
513 code = '''
514 FpDestReg.uqw = FpSrcReg1.uqw | FpSrcReg2.uqw;
515 '''
516
517 class Mand(MediaOp):
518 def __init__(self, dest, src1, src2):
519 super(Mand, self).__init__(dest, src1, src2, 1)
520 code = '''
521 FpDestReg.uqw = FpSrcReg1.uqw & FpSrcReg2.uqw;
522 '''
523
524 class Mandn(MediaOp):
525 def __init__(self, dest, src1, src2):
526 super(Mandn, self).__init__(dest, src1, src2, 1)
527 code = '''
528 FpDestReg.uqw = ~FpSrcReg1.uqw & FpSrcReg2.uqw;
529 '''
530
531 class Mminf(MediaOp):
532 code = '''
533 union floatInt
534 {
535 float f;
536 uint32_t i;
537 };
538 union doubleInt
539 {
540 double d;
541 uint64_t i;
542 };
543
544 assert(srcSize == destSize);
545 int size = srcSize;
546 int sizeBits = size * 8;
547 assert(srcSize == 4 || srcSize == 8);
548 int items = numItems(size);
549 uint64_t result = FpDestReg.uqw;
550
551 for (int i = 0; i < items; i++) {
552 double arg1, arg2;
553 int hiIndex = (i + 1) * sizeBits - 1;
554 int loIndex = (i + 0) * sizeBits;
555 uint64_t arg1Bits = bits(FpSrcReg1.uqw, hiIndex, loIndex);
556 uint64_t arg2Bits = bits(FpSrcReg2.uqw, hiIndex, loIndex);
557
558 if (size == 4) {
559 floatInt fi;
560 fi.i = arg1Bits;
561 arg1 = fi.f;
562 fi.i = arg2Bits;
563 arg2 = fi.f;
564 } else {
565 doubleInt di;
566 di.i = arg1Bits;
567 arg1 = di.d;
568 di.i = arg2Bits;
569 arg2 = di.d;
570 }
571
572 if (arg1 < arg2) {
573 result = insertBits(result, hiIndex, loIndex, arg1Bits);
574 } else {
575 result = insertBits(result, hiIndex, loIndex, arg2Bits);
576 }
577 }
578 FpDestReg.uqw = result;
579 '''
580
581 class Mmaxf(MediaOp):
582 code = '''
583 union floatInt
584 {
585 float f;
586 uint32_t i;
587 };
588 union doubleInt
589 {
590 double d;
591 uint64_t i;
592 };
593
594 assert(srcSize == destSize);
595 int size = srcSize;
596 int sizeBits = size * 8;
597 assert(srcSize == 4 || srcSize == 8);
598 int items = numItems(size);
599 uint64_t result = FpDestReg.uqw;
600
601 for (int i = 0; i < items; i++) {
602 double arg1, arg2;
603 int hiIndex = (i + 1) * sizeBits - 1;
604 int loIndex = (i + 0) * sizeBits;
605 uint64_t arg1Bits = bits(FpSrcReg1.uqw, hiIndex, loIndex);
606 uint64_t arg2Bits = bits(FpSrcReg2.uqw, hiIndex, loIndex);
607
608 if (size == 4) {
609 floatInt fi;
610 fi.i = arg1Bits;
611 arg1 = fi.f;
612 fi.i = arg2Bits;
613 arg2 = fi.f;
614 } else {
615 doubleInt di;
616 di.i = arg1Bits;
617 arg1 = di.d;
618 di.i = arg2Bits;
619 arg2 = di.d;
620 }
621
622 if (arg1 > arg2) {
623 result = insertBits(result, hiIndex, loIndex, arg1Bits);
624 } else {
625 result = insertBits(result, hiIndex, loIndex, arg2Bits);
626 }
627 }
628 FpDestReg.uqw = result;
629 '''
630
631 class Mmini(MediaOp):
632 code = '''
633
634 assert(srcSize == destSize);
635 int size = srcSize;
636 int sizeBits = size * 8;
637 int items = numItems(size);
638 uint64_t result = FpDestReg.uqw;
639
640 for (int i = 0; i < items; i++) {
641 int hiIndex = (i + 1) * sizeBits - 1;
642 int loIndex = (i + 0) * sizeBits;
643 uint64_t arg1Bits = bits(FpSrcReg1.uqw, hiIndex, loIndex);
644 int64_t arg1 = arg1Bits |
645 (0 - (arg1Bits & (ULL(1) << (sizeBits - 1))));
646 uint64_t arg2Bits = bits(FpSrcReg2.uqw, hiIndex, loIndex);
647 int64_t arg2 = arg2Bits |
648 (0 - (arg2Bits & (ULL(1) << (sizeBits - 1))));
649 uint64_t resBits;
650
651 if (signedOp()) {
652 if (arg1 < arg2) {
653 resBits = arg1Bits;
654 } else {
655 resBits = arg2Bits;
656 }
657 } else {
658 if (arg1Bits < arg2Bits) {
659 resBits = arg1Bits;
660 } else {
661 resBits = arg2Bits;
662 }
663 }
664 result = insertBits(result, hiIndex, loIndex, resBits);
665 }
666 FpDestReg.uqw = result;
667 '''
668
669 class Mmaxi(MediaOp):
670 code = '''
671
672 assert(srcSize == destSize);
673 int size = srcSize;
674 int sizeBits = size * 8;
675 int items = numItems(size);
676 uint64_t result = FpDestReg.uqw;
677
678 for (int i = 0; i < items; i++) {
679 int hiIndex = (i + 1) * sizeBits - 1;
680 int loIndex = (i + 0) * sizeBits;
681 uint64_t arg1Bits = bits(FpSrcReg1.uqw, hiIndex, loIndex);
682 int64_t arg1 = arg1Bits |
683 (0 - (arg1Bits & (ULL(1) << (sizeBits - 1))));
684 uint64_t arg2Bits = bits(FpSrcReg2.uqw, hiIndex, loIndex);
685 int64_t arg2 = arg2Bits |
686 (0 - (arg2Bits & (ULL(1) << (sizeBits - 1))));
687 uint64_t resBits;
688
689 if (signedOp()) {
690 if (arg1 > arg2) {
691 resBits = arg1Bits;
692 } else {
693 resBits = arg2Bits;
694 }
695 } else {
696 if (arg1Bits > arg2Bits) {
697 resBits = arg1Bits;
698 } else {
699 resBits = arg2Bits;
700 }
701 }
702 result = insertBits(result, hiIndex, loIndex, resBits);
703 }
704 FpDestReg.uqw = result;
705 '''
706
707 class Msqrt(MediaOp):
708 def __init__(self, dest, src, \
709 size = None, destSize = None, srcSize = None, ext = None):
710 super(Msqrt, self).__init__(dest, src,\
711 "InstRegIndex(0)", size, destSize, srcSize, ext)
712 code = '''
713 union floatInt
714 {
715 float f;
716 uint32_t i;
717 };
718 union doubleInt
719 {
720 double d;
721 uint64_t i;
722 };
723
724 assert(srcSize == destSize);
725 int size = srcSize;
726 int sizeBits = size * 8;
727 assert(srcSize == 4 || srcSize == 8);
728 int items = numItems(size);
729 uint64_t result = FpDestReg.uqw;
730
731 for (int i = 0; i < items; i++) {
732 int hiIndex = (i + 1) * sizeBits - 1;
733 int loIndex = (i + 0) * sizeBits;
734 uint64_t argBits = bits(FpSrcReg1.uqw, hiIndex, loIndex);
735
736 if (size == 4) {
737 floatInt fi;
738 fi.i = argBits;
739 fi.f = sqrt(fi.f);
740 argBits = fi.i;
741 } else {
742 doubleInt di;
743 di.i = argBits;
744 di.d = sqrt(di.d);
745 argBits = di.i;
746 }
747 result = insertBits(result, hiIndex, loIndex, argBits);
748 }
749 FpDestReg.uqw = result;
750 '''
751
752 class Maddf(MediaOp):
753 code = '''
754 union floatInt
755 {
756 float f;
757 uint32_t i;
758 };
759 union doubleInt
760 {
761 double d;
762 uint64_t i;
763 };
764
765 assert(srcSize == destSize);
766 int size = srcSize;
767 int sizeBits = size * 8;
768 assert(srcSize == 4 || srcSize == 8);
769 int items = numItems(size);
770 uint64_t result = FpDestReg.uqw;
771
772 for (int i = 0; i < items; i++) {
773 int hiIndex = (i + 1) * sizeBits - 1;
774 int loIndex = (i + 0) * sizeBits;
775 uint64_t arg1Bits = bits(FpSrcReg1.uqw, hiIndex, loIndex);
776 uint64_t arg2Bits = bits(FpSrcReg2.uqw, hiIndex, loIndex);
777 uint64_t resBits;
778
779 if (size == 4) {
780 floatInt arg1, arg2, res;
781 arg1.i = arg1Bits;
782 arg2.i = arg2Bits;
783 res.f = arg1.f + arg2.f;
784 resBits = res.i;
785 } else {
786 doubleInt arg1, arg2, res;
787 arg1.i = arg1Bits;
788 arg2.i = arg2Bits;
789 res.d = arg1.d + arg2.d;
790 resBits = res.i;
791 }
792
793 result = insertBits(result, hiIndex, loIndex, resBits);
794 }
795 FpDestReg.uqw = result;
796 '''
797
798 class Msubf(MediaOp):
799 code = '''
800 union floatInt
801 {
802 float f;
803 uint32_t i;
804 };
805 union doubleInt
806 {
807 double d;
808 uint64_t i;
809 };
810
811 assert(srcSize == destSize);
812 int size = srcSize;
813 int sizeBits = size * 8;
814 assert(srcSize == 4 || srcSize == 8);
815 int items = numItems(size);
816 uint64_t result = FpDestReg.uqw;
817
818 for (int i = 0; i < items; i++) {
819 int hiIndex = (i + 1) * sizeBits - 1;
820 int loIndex = (i + 0) * sizeBits;
821 uint64_t arg1Bits = bits(FpSrcReg1.uqw, hiIndex, loIndex);
822 uint64_t arg2Bits = bits(FpSrcReg2.uqw, hiIndex, loIndex);
823 uint64_t resBits;
824
825 if (size == 4) {
826 floatInt arg1, arg2, res;
827 arg1.i = arg1Bits;
828 arg2.i = arg2Bits;
829 res.f = arg1.f - arg2.f;
830 resBits = res.i;
831 } else {
832 doubleInt arg1, arg2, res;
833 arg1.i = arg1Bits;
834 arg2.i = arg2Bits;
835 res.d = arg1.d - arg2.d;
836 resBits = res.i;
837 }
838
839 result = insertBits(result, hiIndex, loIndex, resBits);
840 }
841 FpDestReg.uqw = result;
842 '''
843
844 class Mmulf(MediaOp):
845 code = '''
846 union floatInt
847 {
848 float f;
849 uint32_t i;
850 };
851 union doubleInt
852 {
853 double d;
854 uint64_t i;
855 };
856
857 assert(srcSize == destSize);
858 int size = srcSize;
859 int sizeBits = size * 8;
860 assert(srcSize == 4 || srcSize == 8);
861 int items = numItems(size);
862 uint64_t result = FpDestReg.uqw;
863
864 for (int i = 0; i < items; i++) {
865 int hiIndex = (i + 1) * sizeBits - 1;
866 int loIndex = (i + 0) * sizeBits;
867 uint64_t arg1Bits = bits(FpSrcReg1.uqw, hiIndex, loIndex);
868 uint64_t arg2Bits = bits(FpSrcReg2.uqw, hiIndex, loIndex);
869 uint64_t resBits;
870
871 if (size == 4) {
872 floatInt arg1, arg2, res;
873 arg1.i = arg1Bits;
874 arg2.i = arg2Bits;
875 res.f = arg1.f * arg2.f;
876 resBits = res.i;
877 } else {
878 doubleInt arg1, arg2, res;
879 arg1.i = arg1Bits;
880 arg2.i = arg2Bits;
881 res.d = arg1.d * arg2.d;
882 resBits = res.i;
883 }
884
885 result = insertBits(result, hiIndex, loIndex, resBits);
886 }
887 FpDestReg.uqw = result;
888 '''
889
890 class Mdivf(MediaOp):
891 code = '''
892 union floatInt
893 {
894 float f;
895 uint32_t i;
896 };
897 union doubleInt
898 {
899 double d;
900 uint64_t i;
901 };
902
903 assert(srcSize == destSize);
904 int size = srcSize;
905 int sizeBits = size * 8;
906 assert(srcSize == 4 || srcSize == 8);
907 int items = numItems(size);
908 uint64_t result = FpDestReg.uqw;
909
910 for (int i = 0; i < items; i++) {
911 int hiIndex = (i + 1) * sizeBits - 1;
912 int loIndex = (i + 0) * sizeBits;
913 uint64_t arg1Bits = bits(FpSrcReg1.uqw, hiIndex, loIndex);
914 uint64_t arg2Bits = bits(FpSrcReg2.uqw, hiIndex, loIndex);
915 uint64_t resBits;
916
917 if (size == 4) {
918 floatInt arg1, arg2, res;
919 arg1.i = arg1Bits;
920 arg2.i = arg2Bits;
921 res.f = arg1.f / arg2.f;
922 resBits = res.i;
923 } else {
924 doubleInt arg1, arg2, res;
925 arg1.i = arg1Bits;
926 arg2.i = arg2Bits;
927 res.d = arg1.d / arg2.d;
928 resBits = res.i;
929 }
930
931 result = insertBits(result, hiIndex, loIndex, resBits);
932 }
933 FpDestReg.uqw = result;
934 '''
935
936 class Maddi(MediaOp):
937 code = '''
938 assert(srcSize == destSize);
939 int size = srcSize;
940 int sizeBits = size * 8;
941 int items = numItems(size);
942 uint64_t result = FpDestReg.uqw;
943
944 for (int i = 0; i < items; i++) {
945 int hiIndex = (i + 1) * sizeBits - 1;
946 int loIndex = (i + 0) * sizeBits;
947 uint64_t arg1Bits = bits(FpSrcReg1.uqw, hiIndex, loIndex);
948 uint64_t arg2Bits = bits(FpSrcReg2.uqw, hiIndex, loIndex);
949 uint64_t resBits = arg1Bits + arg2Bits;
950
951 if (ext & 0x2) {
952 if (signedOp()) {
953 int arg1Sign = bits(arg1Bits, sizeBits - 1);
954 int arg2Sign = bits(arg2Bits, sizeBits - 1);
955 int resSign = bits(resBits, sizeBits - 1);
956 if ((arg1Sign == arg2Sign) && (arg1Sign != resSign)) {
957 if (resSign == 0)
958 resBits = (ULL(1) << (sizeBits - 1));
959 else
960 resBits = mask(sizeBits - 1);
961 }
962 } else {
963 if (findCarry(sizeBits, resBits, arg1Bits, arg2Bits))
964 resBits = mask(sizeBits);
965 }
966 }
967
968 result = insertBits(result, hiIndex, loIndex, resBits);
969 }
970 FpDestReg.uqw = result;
971 '''
972
973 class Msubi(MediaOp):
974 code = '''
975 assert(srcSize == destSize);
976 int size = srcSize;
977 int sizeBits = size * 8;
978 int items = numItems(size);
979 uint64_t result = FpDestReg.uqw;
980
981 for (int i = 0; i < items; i++) {
982 int hiIndex = (i + 1) * sizeBits - 1;
983 int loIndex = (i + 0) * sizeBits;
984 uint64_t arg1Bits = bits(FpSrcReg1.uqw, hiIndex, loIndex);
985 uint64_t arg2Bits = bits(FpSrcReg2.uqw, hiIndex, loIndex);
986 uint64_t resBits = arg1Bits - arg2Bits;
987
988 if (ext & 0x2) {
989 if (signedOp()) {
990 int arg1Sign = bits(arg1Bits, sizeBits - 1);
991 int arg2Sign = !bits(arg2Bits, sizeBits - 1);
992 int resSign = bits(resBits, sizeBits - 1);
993 if ((arg1Sign == arg2Sign) && (arg1Sign != resSign)) {
994 if (resSign == 0)
995 resBits = (ULL(1) << (sizeBits - 1));
996 else
997 resBits = mask(sizeBits - 1);
998 }
999 } else {
1000 if (arg2Bits > arg1Bits) {
1001 resBits = 0;
1002 } else if (!findCarry(sizeBits, resBits,
1003 arg1Bits, ~arg2Bits)) {
1004 resBits = mask(sizeBits);
1005 }
1006 }
1007 }
1008
1009 result = insertBits(result, hiIndex, loIndex, resBits);
1010 }
1011 FpDestReg.uqw = result;
1012 '''
1013
1014 class Mmuli(MediaOp):
1015 code = '''
1016 int srcBits = srcSize * 8;
1017 int destBits = destSize * 8;
1018 assert(destBits <= 64);
1019 assert(destSize >= srcSize);
1020 int items = numItems(destSize);
1021 uint64_t result = FpDestReg.uqw;
1022
1023 for (int i = 0; i < items; i++) {
1024 int offset = 0;
1025 if (ext & 16) {
1026 if (ext & 32)
1027 offset = i * (destBits - srcBits);
1028 else
1029 offset = i * (destBits - srcBits) + srcBits;
1030 }
1031 int srcHiIndex = (i + 1) * srcBits - 1 + offset;
1032 int srcLoIndex = (i + 0) * srcBits + offset;
1033 uint64_t arg1Bits = bits(FpSrcReg1.uqw, srcHiIndex, srcLoIndex);
1034 uint64_t arg2Bits = bits(FpSrcReg2.uqw, srcHiIndex, srcLoIndex);
1035 uint64_t resBits;
1036
1037 if (signedOp()) {
1038 int64_t arg1 = arg1Bits |
1039 (0 - (arg1Bits & (ULL(1) << (srcBits - 1))));
1040 int64_t arg2 = arg2Bits |
1041 (0 - (arg2Bits & (ULL(1) << (srcBits - 1))));
1042 resBits = (uint64_t)(arg1 * arg2);
1043 } else {
1044 resBits = arg1Bits * arg2Bits;
1045 }
1046
1047 if (ext & 0x4)
1048 resBits += (ULL(1) << (destBits - 1));
1049
1050 if (multHi())
1051 resBits >>= destBits;
1052
1053 int destHiIndex = (i + 1) * destBits - 1;
1054 int destLoIndex = (i + 0) * destBits;
1055 result = insertBits(result, destHiIndex, destLoIndex, resBits);
1056 }
1057 FpDestReg.uqw = result;
1058 '''
1059
1060 class Mavg(MediaOp):
1061 code = '''
1062 assert(srcSize == destSize);
1063 int size = srcSize;
1064 int sizeBits = size * 8;
1065 int items = numItems(size);
1066 uint64_t result = FpDestReg.uqw;
1067
1068 for (int i = 0; i < items; i++) {
1069 int hiIndex = (i + 1) * sizeBits - 1;
1070 int loIndex = (i + 0) * sizeBits;
1071 uint64_t arg1Bits = bits(FpSrcReg1.uqw, hiIndex, loIndex);
1072 uint64_t arg2Bits = bits(FpSrcReg2.uqw, hiIndex, loIndex);
1073 uint64_t resBits = (arg1Bits + arg2Bits + 1) / 2;
1074
1075 result = insertBits(result, hiIndex, loIndex, resBits);
1076 }
1077 FpDestReg.uqw = result;
1078 '''
1079
1080 class Msad(MediaOp):
1081 code = '''
1082 int srcBits = srcSize * 8;
1083 int items = sizeof(FloatRegBits) / srcSize;
1084
1085 uint64_t sum = 0;
1086 for (int i = 0; i < items; i++) {
1087 int hiIndex = (i + 1) * srcBits - 1;
1088 int loIndex = (i + 0) * srcBits;
1089 uint64_t arg1Bits = bits(FpSrcReg1.uqw, hiIndex, loIndex);
1090 uint64_t arg2Bits = bits(FpSrcReg2.uqw, hiIndex, loIndex);
1091 int64_t resBits = arg1Bits - arg2Bits;
1092 if (resBits < 0)
1093 resBits = -resBits;
1094 sum += resBits;
1095 }
1096 FpDestReg.uqw = sum & mask(destSize * 8);
1097 '''
1098
1099 class Msrl(MediaOp):
1100 code = '''
1101
1102 assert(srcSize == destSize);
1103 int size = srcSize;
1104 int sizeBits = size * 8;
1105 int items = numItems(size);
1106 uint64_t shiftAmt = op2.uqw;
1107 uint64_t result = FpDestReg.uqw;
1108
1109 for (int i = 0; i < items; i++) {
1110 int hiIndex = (i + 1) * sizeBits - 1;
1111 int loIndex = (i + 0) * sizeBits;
1112 uint64_t arg1Bits = bits(FpSrcReg1.uqw, hiIndex, loIndex);
1113 uint64_t resBits;
1114 if (shiftAmt >= sizeBits) {
1115 resBits = 0;
1116 } else {
1117 resBits = (arg1Bits >> shiftAmt) &
1118 mask(sizeBits - shiftAmt);
1119 }
1120
1121 result = insertBits(result, hiIndex, loIndex, resBits);
1122 }
1123 FpDestReg.uqw = result;
1124 '''
1125
1126 class Msra(MediaOp):
1127 code = '''
1128
1129 assert(srcSize == destSize);
1130 int size = srcSize;
1131 int sizeBits = size * 8;
1132 int items = numItems(size);
1133 uint64_t shiftAmt = op2.uqw;
1134 uint64_t result = FpDestReg.uqw;
1135
1136 for (int i = 0; i < items; i++) {
1137 int hiIndex = (i + 1) * sizeBits - 1;
1138 int loIndex = (i + 0) * sizeBits;
1139 uint64_t arg1Bits = bits(FpSrcReg1.uqw, hiIndex, loIndex);
1140 uint64_t resBits;
1141 if (shiftAmt >= sizeBits) {
1142 if (bits(arg1Bits, sizeBits - 1))
1143 resBits = mask(sizeBits);
1144 else
1145 resBits = 0;
1146 } else {
1147 resBits = (arg1Bits >> shiftAmt);
1148 resBits = resBits |
1149 (0 - (resBits & (ULL(1) << (sizeBits - 1 - shiftAmt))));
1150 }
1151
1152 result = insertBits(result, hiIndex, loIndex, resBits);
1153 }
1154 FpDestReg.uqw = result;
1155 '''
1156
1157 class Msll(MediaOp):
1158 code = '''
1159
1160 assert(srcSize == destSize);
1161 int size = srcSize;
1162 int sizeBits = size * 8;
1163 int items = numItems(size);
1164 uint64_t shiftAmt = op2.uqw;
1165 uint64_t result = FpDestReg.uqw;
1166
1167 for (int i = 0; i < items; i++) {
1168 int hiIndex = (i + 1) * sizeBits - 1;
1169 int loIndex = (i + 0) * sizeBits;
1170 uint64_t arg1Bits = bits(FpSrcReg1.uqw, hiIndex, loIndex);
1171 uint64_t resBits;
1172 if (shiftAmt >= sizeBits) {
1173 resBits = 0;
1174 } else {
1175 resBits = (arg1Bits << shiftAmt);
1176 }
1177
1178 result = insertBits(result, hiIndex, loIndex, resBits);
1179 }
1180 FpDestReg.uqw = result;
1181 '''
1182
1183 class Cvtf2i(MediaOp):
1184 def __init__(self, dest, src, \
1185 size = None, destSize = None, srcSize = None, ext = None):
1186 super(Cvtf2i, self).__init__(dest, src,\
1187 "InstRegIndex(0)", size, destSize, srcSize, ext)
1188 code = '''
1189 union floatInt
1190 {
1191 float f;
1192 uint32_t i;
1193 };
1194 union doubleInt
1195 {
1196 double d;
1197 uint64_t i;
1198 };
1199
1200 assert(destSize == 4 || destSize == 8);
1201 assert(srcSize == 4 || srcSize == 8);
1202 int srcSizeBits = srcSize * 8;
1203 int destSizeBits = destSize * 8;
1204 int items;
1205 int srcStart = 0;
1206 int destStart = 0;
1207 if (srcSize == 2 * destSize) {
1208 items = numItems(srcSize);
1209 if (ext & 0x2)
1210 destStart = destSizeBits * items;
1211 } else if (destSize == 2 * srcSize) {
1212 items = numItems(destSize);
1213 if (ext & 0x2)
1214 srcStart = srcSizeBits * items;
1215 } else {
1216 items = numItems(destSize);
1217 }
1218 uint64_t result = FpDestReg.uqw;
1219
1220 for (int i = 0; i < items; i++) {
1221 int srcHiIndex = srcStart + (i + 1) * srcSizeBits - 1;
1222 int srcLoIndex = srcStart + (i + 0) * srcSizeBits;
1223 uint64_t argBits = bits(FpSrcReg1.uqw, srcHiIndex, srcLoIndex);
1224 double arg;
1225
1226 if (srcSize == 4) {
1227 floatInt fi;
1228 fi.i = argBits;
1229 arg = fi.f;
1230 } else {
1231 doubleInt di;
1232 di.i = argBits;
1233 arg = di.d;
1234 }
1235
1236 if (ext & 0x4) {
1237 if (arg >= 0)
1238 arg += 0.5;
1239 else
1240 arg -= 0.5;
1241 }
1242
1243 if (destSize == 4) {
1244 argBits = (uint32_t)arg;
1245 } else {
1246 argBits = (uint64_t)arg;
1247 }
1248 int destHiIndex = destStart + (i + 1) * destSizeBits - 1;
1249 int destLoIndex = destStart + (i + 0) * destSizeBits;
1250 result = insertBits(result, destHiIndex, destLoIndex, argBits);
1251 }
1252 FpDestReg.uqw = result;
1253 '''
1254
1255 class Cvti2f(MediaOp):
1256 def __init__(self, dest, src, \
1257 size = None, destSize = None, srcSize = None, ext = None):
1258 super(Cvti2f, self).__init__(dest, src,\
1259 "InstRegIndex(0)", size, destSize, srcSize, ext)
1260 code = '''
1261 union floatInt
1262 {
1263 float f;
1264 uint32_t i;
1265 };
1266 union doubleInt
1267 {
1268 double d;
1269 uint64_t i;
1270 };
1271
1272 assert(destSize == 4 || destSize == 8);
1273 assert(srcSize == 4 || srcSize == 8);
1274 int srcSizeBits = srcSize * 8;
1275 int destSizeBits = destSize * 8;
1276 int items;
1277 int srcStart = 0;
1278 int destStart = 0;
1279 if (srcSize == 2 * destSize) {
1280 items = numItems(srcSize);
1281 if (ext & 0x2)
1282 destStart = destSizeBits * items;
1283 } else if (destSize == 2 * srcSize) {
1284 items = numItems(destSize);
1285 if (ext & 0x2)
1286 srcStart = srcSizeBits * items;
1287 } else {
1288 items = numItems(destSize);
1289 }
1290 uint64_t result = FpDestReg.uqw;
1291
1292 for (int i = 0; i < items; i++) {
1293 int srcHiIndex = srcStart + (i + 1) * srcSizeBits - 1;
1294 int srcLoIndex = srcStart + (i + 0) * srcSizeBits;
1295 uint64_t argBits = bits(FpSrcReg1.uqw, srcHiIndex, srcLoIndex);
1296
1297 int64_t sArg = argBits |
1298 (0 - (argBits & (ULL(1) << (srcSizeBits - 1))));
1299 double arg = sArg;
1300
1301 if (destSize == 4) {
1302 floatInt fi;
1303 fi.f = arg;
1304 argBits = fi.i;
1305 } else {
1306 doubleInt di;
1307 di.d = arg;
1308 argBits = di.i;
1309 }
1310 int destHiIndex = destStart + (i + 1) * destSizeBits - 1;
1311 int destLoIndex = destStart + (i + 0) * destSizeBits;
1312 result = insertBits(result, destHiIndex, destLoIndex, argBits);
1313 }
1314 FpDestReg.uqw = result;
1315 '''
1316
1317 class Cvtf2f(MediaOp):
1318 def __init__(self, dest, src, \
1319 size = None, destSize = None, srcSize = None, ext = None):
1320 super(Cvtf2f, self).__init__(dest, src,\
1321 "InstRegIndex(0)", size, destSize, srcSize, ext)
1322 code = '''
1323 union floatInt
1324 {
1325 float f;
1326 uint32_t i;
1327 };
1328 union doubleInt
1329 {
1330 double d;
1331 uint64_t i;
1332 };
1333
1334 assert(destSize == 4 || destSize == 8);
1335 assert(srcSize == 4 || srcSize == 8);
1336 int srcSizeBits = srcSize * 8;
1337 int destSizeBits = destSize * 8;
1338 int items;
1339 int srcStart = 0;
1340 int destStart = 0;
1341 if (srcSize == 2 * destSize) {
1342 items = numItems(srcSize);
1343 if (ext & 0x2)
1344 destStart = destSizeBits * items;
1345 } else if (destSize == 2 * srcSize) {
1346 items = numItems(destSize);
1347 if (ext & 0x2)
1348 srcStart = srcSizeBits * items;
1349 } else {
1350 items = numItems(destSize);
1351 }
1352 uint64_t result = FpDestReg.uqw;
1353
1354 for (int i = 0; i < items; i++) {
1355 int srcHiIndex = srcStart + (i + 1) * srcSizeBits - 1;
1356 int srcLoIndex = srcStart + (i + 0) * srcSizeBits;
1357 uint64_t argBits = bits(FpSrcReg1.uqw, srcHiIndex, srcLoIndex);
1358 double arg;
1359
1360 if (srcSize == 4) {
1361 floatInt fi;
1362 fi.i = argBits;
1363 arg = fi.f;
1364 } else {
1365 doubleInt di;
1366 di.i = argBits;
1367 arg = di.d;
1368 }
1369 if (destSize == 4) {
1370 floatInt fi;
1371 fi.f = arg;
1372 argBits = fi.i;
1373 } else {
1374 doubleInt di;
1375 di.d = arg;
1376 argBits = di.i;
1377 }
1378 int destHiIndex = destStart + (i + 1) * destSizeBits - 1;
1379 int destLoIndex = destStart + (i + 0) * destSizeBits;
1380 result = insertBits(result, destHiIndex, destLoIndex, argBits);
1381 }
1382 FpDestReg.uqw = result;
1383 '''
1384
1385 class Mcmpi2r(MediaOp):
1386 code = '''
1387 union floatInt
1388 {
1389 float f;
1390 uint32_t i;
1391 };
1392 union doubleInt
1393 {
1394 double d;
1395 uint64_t i;
1396 };
1397
1398 assert(srcSize == destSize);
1399 int size = srcSize;
1400 int sizeBits = size * 8;
1401 int items = numItems(size);
1402 uint64_t result = FpDestReg.uqw;
1403
1404 for (int i = 0; i < items; i++) {
1405 int hiIndex = (i + 1) * sizeBits - 1;
1406 int loIndex = (i + 0) * sizeBits;
1407 uint64_t arg1Bits = bits(FpSrcReg1.uqw, hiIndex, loIndex);
1408 int64_t arg1 = arg1Bits |
1409 (0 - (arg1Bits & (ULL(1) << (sizeBits - 1))));
1410 uint64_t arg2Bits = bits(FpSrcReg2.uqw, hiIndex, loIndex);
1411 int64_t arg2 = arg2Bits |
1412 (0 - (arg2Bits & (ULL(1) << (sizeBits - 1))));
1413
1414 uint64_t resBits = 0;
1415 if (((ext & 0x2) == 0 && arg1 == arg2) ||
1416 ((ext & 0x2) == 0x2 && arg1 > arg2))
1417 resBits = mask(sizeBits);
1418
1419 result = insertBits(result, hiIndex, loIndex, resBits);
1420 }
1421 FpDestReg.uqw = result;
1422 '''
1423
1424 class Mcmpf2r(MediaOp):
1425 code = '''
1426 union floatInt
1427 {
1428 float f;
1429 uint32_t i;
1430 };
1431 union doubleInt
1432 {
1433 double d;
1434 uint64_t i;
1435 };
1436
1437 assert(srcSize == destSize);
1438 int size = srcSize;
1439 int sizeBits = size * 8;
1440 int items = numItems(size);
1441 uint64_t result = FpDestReg.uqw;
1442
1443 for (int i = 0; i < items; i++) {
1444 int hiIndex = (i + 1) * sizeBits - 1;
1445 int loIndex = (i + 0) * sizeBits;
1446 uint64_t arg1Bits = bits(FpSrcReg1.uqw, hiIndex, loIndex);
1447 uint64_t arg2Bits = bits(FpSrcReg2.uqw, hiIndex, loIndex);
1448 double arg1, arg2;
1449
1450 if (size == 4) {
1451 floatInt fi;
1452 fi.i = arg1Bits;
1453 arg1 = fi.f;
1454 fi.i = arg2Bits;
1455 arg2 = fi.f;
1456 } else {
1457 doubleInt di;
1458 di.i = arg1Bits;
1459 arg1 = di.d;
1460 di.i = arg2Bits;
1461 arg2 = di.d;
1462 }
1463
1464 uint64_t resBits = 0;
1465 bool nanop = isnan(arg1) || isnan(arg2);
1466 switch (ext & mask(3)) {
1467 case 0:
1468 if (arg1 == arg2 && !nanop)
1469 resBits = mask(sizeBits);
1470 break;
1471 case 1:
1472 if (arg1 < arg2 && !nanop)
1473 resBits = mask(sizeBits);
1474 break;
1475 case 2:
1476 if (arg1 <= arg2 && !nanop)
1477 resBits = mask(sizeBits);
1478 break;
1479 case 3:
1480 if (nanop)
1481 resBits = mask(sizeBits);
1482 break;
1483 case 4:
1484 if (arg1 != arg2 || nanop)
1485 resBits = mask(sizeBits);
1486 break;
1487 case 5:
1488 if (!(arg1 < arg2) || nanop)
1489 resBits = mask(sizeBits);
1490 break;
1491 case 6:
1492 if (!(arg1 <= arg2) || nanop)
1493 resBits = mask(sizeBits);
1494 break;
1495 case 7:
1496 if (!nanop)
1497 resBits = mask(sizeBits);
1498 break;
1499 };
1500
1501 result = insertBits(result, hiIndex, loIndex, resBits);
1502 }
1503 FpDestReg.uqw = result;
1504 '''
1505
1506 class Mcmpf2rf(MediaOp):
1507 def __init__(self, src1, src2,\
1508 size = None, destSize = None, srcSize = None, ext = None):
1509 super(Mcmpf2rf, self).__init__("InstRegIndex(0)", src1,\
1510 src2, size, destSize, srcSize, ext)
1511 code = '''
1512 union floatInt
1513 {
1514 float f;
1515 uint32_t i;
1516 };
1517 union doubleInt
1518 {
1519 double d;
1520 uint64_t i;
1521 };
1522
1523 assert(srcSize == destSize);
1524 assert(srcSize == 4 || srcSize == 8);
1525 int size = srcSize;
1526 int sizeBits = size * 8;
1527
1528 double arg1, arg2;
1529 uint64_t arg1Bits = bits(FpSrcReg1.uqw, sizeBits - 1, 0);
1530 uint64_t arg2Bits = bits(FpSrcReg2.uqw, sizeBits - 1, 0);
1531 if (size == 4) {
1532 floatInt fi;
1533 fi.i = arg1Bits;
1534 arg1 = fi.f;
1535 fi.i = arg2Bits;
1536 arg2 = fi.f;
1537 } else {
1538 doubleInt di;
1539 di.i = arg1Bits;
1540 arg1 = di.d;
1541 di.i = arg2Bits;
1542 arg2 = di.d;
1543 }
1544
1545 // ZF PF CF
1546 // Unordered 1 1 1
1547 // Greater than 0 0 0
1548 // Less than 0 0 1
1549 // Equal 1 0 0
1550 // OF = SF = AF = 0
1551 ccFlagBits = ccFlagBits & ~(OFBit | SFBit | AFBit |
1552 ZFBit | PFBit | CFBit);
1553 if (isnan(arg1) || isnan(arg2))
1554 ccFlagBits = ccFlagBits | (ZFBit | PFBit | CFBit);
1555 else if(arg1 < arg2)
1556 ccFlagBits = ccFlagBits | CFBit;
1557 else if(arg1 == arg2)
1558 ccFlagBits = ccFlagBits | ZFBit;
1559 '''
1560}};
272 %(flags)s, %(src1)s, %(op2)s, %(dest)s,
273 %(srcSize)s, %(destSize)s, %(ext)s)''' % {
274 "class_name" : className,
275 "flags" : self.microFlagsText(microFlags),
276 "src1" : self.src1, "op2" : self.op2,
277 "dest" : self.dest,
278 "srcSize" : self.srcSize,
279 "destSize" : self.destSize,
280 "ext" : self.ext}
281 return allocator
282
283 class Mov2int(MediaOp):
284 def __init__(self, dest, src1, src2 = 0, \
285 size = None, destSize = None, srcSize = None, ext = None):
286 super(Mov2int, self).__init__(dest, src1,\
287 src2, size, destSize, srcSize, ext)
288 code = '''
289 int items = sizeof(FloatRegBits) / srcSize;
290 int offset = imm8;
291 if (bits(src1, 0) && (ext & 0x1))
292 offset -= items;
293 if (offset >= 0 && offset < items) {
294 uint64_t fpSrcReg1 =
295 bits(FpSrcReg1.uqw,
296 (offset + 1) * srcSize * 8 - 1,
297 (offset + 0) * srcSize * 8);
298 DestReg = merge(0, fpSrcReg1, destSize);
299 } else {
300 DestReg = DestReg;
301 }
302 '''
303
304 class Mov2fp(MediaOp):
305 def __init__(self, dest, src1, src2 = 0, \
306 size = None, destSize = None, srcSize = None, ext = None):
307 super(Mov2fp, self).__init__(dest, src1,\
308 src2, size, destSize, srcSize, ext)
309 code = '''
310 int items = sizeof(FloatRegBits) / destSize;
311 int offset = imm8;
312 if (bits(dest, 0) && (ext & 0x1))
313 offset -= items;
314 if (offset >= 0 && offset < items) {
315 uint64_t srcReg1 = pick(SrcReg1, 0, srcSize);
316 FpDestReg.uqw =
317 insertBits(FpDestReg.uqw,
318 (offset + 1) * destSize * 8 - 1,
319 (offset + 0) * destSize * 8, srcReg1);
320 } else {
321 FpDestReg.uqw = FpDestReg.uqw;
322 }
323 '''
324
325 class Movsign(MediaOp):
326 def __init__(self, dest, src, \
327 size = None, destSize = None, srcSize = None, ext = None):
328 super(Movsign, self).__init__(dest, src,\
329 "InstRegIndex(0)", size, destSize, srcSize, ext)
330 code = '''
331 int items = sizeof(FloatRegBits) / srcSize;
332 uint64_t result = 0;
333 int offset = (ext & 0x1) ? items : 0;
334 for (int i = 0; i < items; i++) {
335 uint64_t picked =
336 bits(FpSrcReg1.uqw, (i + 1) * 8 * srcSize - 1);
337 result = insertBits(result, i + offset, i + offset, picked);
338 }
339 DestReg = DestReg | result;
340 '''
341
342 class Maskmov(MediaOp):
343 code = '''
344 assert(srcSize == destSize);
345 int size = srcSize;
346 int sizeBits = size * 8;
347 int items = numItems(size);
348 uint64_t result = FpDestReg.uqw;
349
350 for (int i = 0; i < items; i++) {
351 int hiIndex = (i + 1) * sizeBits - 1;
352 int loIndex = (i + 0) * sizeBits;
353 uint64_t arg1Bits = bits(FpSrcReg1.uqw, hiIndex, loIndex);
354 if (bits(FpSrcReg2.uqw, hiIndex))
355 result = insertBits(result, hiIndex, loIndex, arg1Bits);
356 }
357 FpDestReg.uqw = result;
358 '''
359
360 class shuffle(MediaOp):
361 code = '''
362 assert(srcSize == destSize);
363 int size = srcSize;
364 int sizeBits = size * 8;
365 int items = sizeof(FloatRegBits) / size;
366 int options;
367 int optionBits;
368 if (size == 8) {
369 options = 2;
370 optionBits = 1;
371 } else {
372 options = 4;
373 optionBits = 2;
374 }
375
376 uint64_t result = 0;
377 uint8_t sel = ext;
378
379 for (int i = 0; i < items; i++) {
380 uint64_t resBits;
381 uint8_t lsel = sel & mask(optionBits);
382 if (lsel * size >= sizeof(FloatRegBits)) {
383 lsel -= options / 2;
384 resBits = bits(FpSrcReg2.uqw,
385 (lsel + 1) * sizeBits - 1,
386 (lsel + 0) * sizeBits);
387 } else {
388 resBits = bits(FpSrcReg1.uqw,
389 (lsel + 1) * sizeBits - 1,
390 (lsel + 0) * sizeBits);
391 }
392
393 sel >>= optionBits;
394
395 int hiIndex = (i + 1) * sizeBits - 1;
396 int loIndex = (i + 0) * sizeBits;
397 result = insertBits(result, hiIndex, loIndex, resBits);
398 }
399 FpDestReg.uqw = result;
400 '''
401
402 class Unpack(MediaOp):
403 code = '''
404 assert(srcSize == destSize);
405 int size = destSize;
406 int items = (sizeof(FloatRegBits) / size) / 2;
407 int offset = ext ? items : 0;
408 uint64_t result = 0;
409 for (int i = 0; i < items; i++) {
410 uint64_t pickedLow =
411 bits(FpSrcReg1.uqw, (i + offset + 1) * 8 * size - 1,
412 (i + offset) * 8 * size);
413 result = insertBits(result,
414 (2 * i + 1) * 8 * size - 1,
415 (2 * i + 0) * 8 * size,
416 pickedLow);
417 uint64_t pickedHigh =
418 bits(FpSrcReg2.uqw, (i + offset + 1) * 8 * size - 1,
419 (i + offset) * 8 * size);
420 result = insertBits(result,
421 (2 * i + 2) * 8 * size - 1,
422 (2 * i + 1) * 8 * size,
423 pickedHigh);
424 }
425 FpDestReg.uqw = result;
426 '''
427
428 class Pack(MediaOp):
429 code = '''
430 assert(srcSize == destSize * 2);
431 int items = (sizeof(FloatRegBits) / destSize);
432 int destBits = destSize * 8;
433 int srcBits = srcSize * 8;
434 uint64_t result = 0;
435 int i;
436 for (i = 0; i < items / 2; i++) {
437 uint64_t picked =
438 bits(FpSrcReg1.uqw, (i + 1) * srcBits - 1,
439 (i + 0) * srcBits);
440 unsigned signBit = bits(picked, srcBits - 1);
441 uint64_t overflow = bits(picked, srcBits - 1, destBits - 1);
442
443 // Handle saturation.
444 if (signBit) {
445 if (overflow != mask(destBits - srcBits + 1)) {
446 if (signedOp())
447 picked = (ULL(1) << (destBits - 1));
448 else
449 picked = 0;
450 }
451 } else {
452 if (overflow != 0) {
453 if (signedOp())
454 picked = mask(destBits - 1);
455 else
456 picked = mask(destBits);
457 }
458 }
459 result = insertBits(result,
460 (i + 1) * destBits - 1,
461 (i + 0) * destBits,
462 picked);
463 }
464 for (;i < items; i++) {
465 uint64_t picked =
466 bits(FpSrcReg2.uqw, (i - items + 1) * srcBits - 1,
467 (i - items + 0) * srcBits);
468 unsigned signBit = bits(picked, srcBits - 1);
469 uint64_t overflow = bits(picked, srcBits - 1, destBits - 1);
470
471 // Handle saturation.
472 if (signBit) {
473 if (overflow != mask(destBits - srcBits + 1)) {
474 if (signedOp())
475 picked = (ULL(1) << (destBits - 1));
476 else
477 picked = 0;
478 }
479 } else {
480 if (overflow != 0) {
481 if (signedOp())
482 picked = mask(destBits - 1);
483 else
484 picked = mask(destBits);
485 }
486 }
487 result = insertBits(result,
488 (i + 1) * destBits - 1,
489 (i + 0) * destBits,
490 picked);
491 }
492 FpDestReg.uqw = result;
493 '''
494
495 class Mxor(MediaOp):
496 def __init__(self, dest, src1, src2):
497 super(Mxor, self).__init__(dest, src1, src2, 1)
498 code = '''
499 FpDestReg.uqw = FpSrcReg1.uqw ^ FpSrcReg2.uqw;
500 '''
501
502 class Mor(MediaOp):
503 def __init__(self, dest, src1, src2):
504 super(Mor, self).__init__(dest, src1, src2, 1)
505 code = '''
506 FpDestReg.uqw = FpSrcReg1.uqw | FpSrcReg2.uqw;
507 '''
508
509 class Mand(MediaOp):
510 def __init__(self, dest, src1, src2):
511 super(Mand, self).__init__(dest, src1, src2, 1)
512 code = '''
513 FpDestReg.uqw = FpSrcReg1.uqw & FpSrcReg2.uqw;
514 '''
515
516 class Mandn(MediaOp):
517 def __init__(self, dest, src1, src2):
518 super(Mandn, self).__init__(dest, src1, src2, 1)
519 code = '''
520 FpDestReg.uqw = ~FpSrcReg1.uqw & FpSrcReg2.uqw;
521 '''
522
523 class Mminf(MediaOp):
524 code = '''
525 union floatInt
526 {
527 float f;
528 uint32_t i;
529 };
530 union doubleInt
531 {
532 double d;
533 uint64_t i;
534 };
535
536 assert(srcSize == destSize);
537 int size = srcSize;
538 int sizeBits = size * 8;
539 assert(srcSize == 4 || srcSize == 8);
540 int items = numItems(size);
541 uint64_t result = FpDestReg.uqw;
542
543 for (int i = 0; i < items; i++) {
544 double arg1, arg2;
545 int hiIndex = (i + 1) * sizeBits - 1;
546 int loIndex = (i + 0) * sizeBits;
547 uint64_t arg1Bits = bits(FpSrcReg1.uqw, hiIndex, loIndex);
548 uint64_t arg2Bits = bits(FpSrcReg2.uqw, hiIndex, loIndex);
549
550 if (size == 4) {
551 floatInt fi;
552 fi.i = arg1Bits;
553 arg1 = fi.f;
554 fi.i = arg2Bits;
555 arg2 = fi.f;
556 } else {
557 doubleInt di;
558 di.i = arg1Bits;
559 arg1 = di.d;
560 di.i = arg2Bits;
561 arg2 = di.d;
562 }
563
564 if (arg1 < arg2) {
565 result = insertBits(result, hiIndex, loIndex, arg1Bits);
566 } else {
567 result = insertBits(result, hiIndex, loIndex, arg2Bits);
568 }
569 }
570 FpDestReg.uqw = result;
571 '''
572
573 class Mmaxf(MediaOp):
574 code = '''
575 union floatInt
576 {
577 float f;
578 uint32_t i;
579 };
580 union doubleInt
581 {
582 double d;
583 uint64_t i;
584 };
585
586 assert(srcSize == destSize);
587 int size = srcSize;
588 int sizeBits = size * 8;
589 assert(srcSize == 4 || srcSize == 8);
590 int items = numItems(size);
591 uint64_t result = FpDestReg.uqw;
592
593 for (int i = 0; i < items; i++) {
594 double arg1, arg2;
595 int hiIndex = (i + 1) * sizeBits - 1;
596 int loIndex = (i + 0) * sizeBits;
597 uint64_t arg1Bits = bits(FpSrcReg1.uqw, hiIndex, loIndex);
598 uint64_t arg2Bits = bits(FpSrcReg2.uqw, hiIndex, loIndex);
599
600 if (size == 4) {
601 floatInt fi;
602 fi.i = arg1Bits;
603 arg1 = fi.f;
604 fi.i = arg2Bits;
605 arg2 = fi.f;
606 } else {
607 doubleInt di;
608 di.i = arg1Bits;
609 arg1 = di.d;
610 di.i = arg2Bits;
611 arg2 = di.d;
612 }
613
614 if (arg1 > arg2) {
615 result = insertBits(result, hiIndex, loIndex, arg1Bits);
616 } else {
617 result = insertBits(result, hiIndex, loIndex, arg2Bits);
618 }
619 }
620 FpDestReg.uqw = result;
621 '''
622
623 class Mmini(MediaOp):
624 code = '''
625
626 assert(srcSize == destSize);
627 int size = srcSize;
628 int sizeBits = size * 8;
629 int items = numItems(size);
630 uint64_t result = FpDestReg.uqw;
631
632 for (int i = 0; i < items; i++) {
633 int hiIndex = (i + 1) * sizeBits - 1;
634 int loIndex = (i + 0) * sizeBits;
635 uint64_t arg1Bits = bits(FpSrcReg1.uqw, hiIndex, loIndex);
636 int64_t arg1 = arg1Bits |
637 (0 - (arg1Bits & (ULL(1) << (sizeBits - 1))));
638 uint64_t arg2Bits = bits(FpSrcReg2.uqw, hiIndex, loIndex);
639 int64_t arg2 = arg2Bits |
640 (0 - (arg2Bits & (ULL(1) << (sizeBits - 1))));
641 uint64_t resBits;
642
643 if (signedOp()) {
644 if (arg1 < arg2) {
645 resBits = arg1Bits;
646 } else {
647 resBits = arg2Bits;
648 }
649 } else {
650 if (arg1Bits < arg2Bits) {
651 resBits = arg1Bits;
652 } else {
653 resBits = arg2Bits;
654 }
655 }
656 result = insertBits(result, hiIndex, loIndex, resBits);
657 }
658 FpDestReg.uqw = result;
659 '''
660
661 class Mmaxi(MediaOp):
662 code = '''
663
664 assert(srcSize == destSize);
665 int size = srcSize;
666 int sizeBits = size * 8;
667 int items = numItems(size);
668 uint64_t result = FpDestReg.uqw;
669
670 for (int i = 0; i < items; i++) {
671 int hiIndex = (i + 1) * sizeBits - 1;
672 int loIndex = (i + 0) * sizeBits;
673 uint64_t arg1Bits = bits(FpSrcReg1.uqw, hiIndex, loIndex);
674 int64_t arg1 = arg1Bits |
675 (0 - (arg1Bits & (ULL(1) << (sizeBits - 1))));
676 uint64_t arg2Bits = bits(FpSrcReg2.uqw, hiIndex, loIndex);
677 int64_t arg2 = arg2Bits |
678 (0 - (arg2Bits & (ULL(1) << (sizeBits - 1))));
679 uint64_t resBits;
680
681 if (signedOp()) {
682 if (arg1 > arg2) {
683 resBits = arg1Bits;
684 } else {
685 resBits = arg2Bits;
686 }
687 } else {
688 if (arg1Bits > arg2Bits) {
689 resBits = arg1Bits;
690 } else {
691 resBits = arg2Bits;
692 }
693 }
694 result = insertBits(result, hiIndex, loIndex, resBits);
695 }
696 FpDestReg.uqw = result;
697 '''
698
699 class Msqrt(MediaOp):
700 def __init__(self, dest, src, \
701 size = None, destSize = None, srcSize = None, ext = None):
702 super(Msqrt, self).__init__(dest, src,\
703 "InstRegIndex(0)", size, destSize, srcSize, ext)
704 code = '''
705 union floatInt
706 {
707 float f;
708 uint32_t i;
709 };
710 union doubleInt
711 {
712 double d;
713 uint64_t i;
714 };
715
716 assert(srcSize == destSize);
717 int size = srcSize;
718 int sizeBits = size * 8;
719 assert(srcSize == 4 || srcSize == 8);
720 int items = numItems(size);
721 uint64_t result = FpDestReg.uqw;
722
723 for (int i = 0; i < items; i++) {
724 int hiIndex = (i + 1) * sizeBits - 1;
725 int loIndex = (i + 0) * sizeBits;
726 uint64_t argBits = bits(FpSrcReg1.uqw, hiIndex, loIndex);
727
728 if (size == 4) {
729 floatInt fi;
730 fi.i = argBits;
731 fi.f = sqrt(fi.f);
732 argBits = fi.i;
733 } else {
734 doubleInt di;
735 di.i = argBits;
736 di.d = sqrt(di.d);
737 argBits = di.i;
738 }
739 result = insertBits(result, hiIndex, loIndex, argBits);
740 }
741 FpDestReg.uqw = result;
742 '''
743
744 class Maddf(MediaOp):
745 code = '''
746 union floatInt
747 {
748 float f;
749 uint32_t i;
750 };
751 union doubleInt
752 {
753 double d;
754 uint64_t i;
755 };
756
757 assert(srcSize == destSize);
758 int size = srcSize;
759 int sizeBits = size * 8;
760 assert(srcSize == 4 || srcSize == 8);
761 int items = numItems(size);
762 uint64_t result = FpDestReg.uqw;
763
764 for (int i = 0; i < items; i++) {
765 int hiIndex = (i + 1) * sizeBits - 1;
766 int loIndex = (i + 0) * sizeBits;
767 uint64_t arg1Bits = bits(FpSrcReg1.uqw, hiIndex, loIndex);
768 uint64_t arg2Bits = bits(FpSrcReg2.uqw, hiIndex, loIndex);
769 uint64_t resBits;
770
771 if (size == 4) {
772 floatInt arg1, arg2, res;
773 arg1.i = arg1Bits;
774 arg2.i = arg2Bits;
775 res.f = arg1.f + arg2.f;
776 resBits = res.i;
777 } else {
778 doubleInt arg1, arg2, res;
779 arg1.i = arg1Bits;
780 arg2.i = arg2Bits;
781 res.d = arg1.d + arg2.d;
782 resBits = res.i;
783 }
784
785 result = insertBits(result, hiIndex, loIndex, resBits);
786 }
787 FpDestReg.uqw = result;
788 '''
789
790 class Msubf(MediaOp):
791 code = '''
792 union floatInt
793 {
794 float f;
795 uint32_t i;
796 };
797 union doubleInt
798 {
799 double d;
800 uint64_t i;
801 };
802
803 assert(srcSize == destSize);
804 int size = srcSize;
805 int sizeBits = size * 8;
806 assert(srcSize == 4 || srcSize == 8);
807 int items = numItems(size);
808 uint64_t result = FpDestReg.uqw;
809
810 for (int i = 0; i < items; i++) {
811 int hiIndex = (i + 1) * sizeBits - 1;
812 int loIndex = (i + 0) * sizeBits;
813 uint64_t arg1Bits = bits(FpSrcReg1.uqw, hiIndex, loIndex);
814 uint64_t arg2Bits = bits(FpSrcReg2.uqw, hiIndex, loIndex);
815 uint64_t resBits;
816
817 if (size == 4) {
818 floatInt arg1, arg2, res;
819 arg1.i = arg1Bits;
820 arg2.i = arg2Bits;
821 res.f = arg1.f - arg2.f;
822 resBits = res.i;
823 } else {
824 doubleInt arg1, arg2, res;
825 arg1.i = arg1Bits;
826 arg2.i = arg2Bits;
827 res.d = arg1.d - arg2.d;
828 resBits = res.i;
829 }
830
831 result = insertBits(result, hiIndex, loIndex, resBits);
832 }
833 FpDestReg.uqw = result;
834 '''
835
836 class Mmulf(MediaOp):
837 code = '''
838 union floatInt
839 {
840 float f;
841 uint32_t i;
842 };
843 union doubleInt
844 {
845 double d;
846 uint64_t i;
847 };
848
849 assert(srcSize == destSize);
850 int size = srcSize;
851 int sizeBits = size * 8;
852 assert(srcSize == 4 || srcSize == 8);
853 int items = numItems(size);
854 uint64_t result = FpDestReg.uqw;
855
856 for (int i = 0; i < items; i++) {
857 int hiIndex = (i + 1) * sizeBits - 1;
858 int loIndex = (i + 0) * sizeBits;
859 uint64_t arg1Bits = bits(FpSrcReg1.uqw, hiIndex, loIndex);
860 uint64_t arg2Bits = bits(FpSrcReg2.uqw, hiIndex, loIndex);
861 uint64_t resBits;
862
863 if (size == 4) {
864 floatInt arg1, arg2, res;
865 arg1.i = arg1Bits;
866 arg2.i = arg2Bits;
867 res.f = arg1.f * arg2.f;
868 resBits = res.i;
869 } else {
870 doubleInt arg1, arg2, res;
871 arg1.i = arg1Bits;
872 arg2.i = arg2Bits;
873 res.d = arg1.d * arg2.d;
874 resBits = res.i;
875 }
876
877 result = insertBits(result, hiIndex, loIndex, resBits);
878 }
879 FpDestReg.uqw = result;
880 '''
881
882 class Mdivf(MediaOp):
883 code = '''
884 union floatInt
885 {
886 float f;
887 uint32_t i;
888 };
889 union doubleInt
890 {
891 double d;
892 uint64_t i;
893 };
894
895 assert(srcSize == destSize);
896 int size = srcSize;
897 int sizeBits = size * 8;
898 assert(srcSize == 4 || srcSize == 8);
899 int items = numItems(size);
900 uint64_t result = FpDestReg.uqw;
901
902 for (int i = 0; i < items; i++) {
903 int hiIndex = (i + 1) * sizeBits - 1;
904 int loIndex = (i + 0) * sizeBits;
905 uint64_t arg1Bits = bits(FpSrcReg1.uqw, hiIndex, loIndex);
906 uint64_t arg2Bits = bits(FpSrcReg2.uqw, hiIndex, loIndex);
907 uint64_t resBits;
908
909 if (size == 4) {
910 floatInt arg1, arg2, res;
911 arg1.i = arg1Bits;
912 arg2.i = arg2Bits;
913 res.f = arg1.f / arg2.f;
914 resBits = res.i;
915 } else {
916 doubleInt arg1, arg2, res;
917 arg1.i = arg1Bits;
918 arg2.i = arg2Bits;
919 res.d = arg1.d / arg2.d;
920 resBits = res.i;
921 }
922
923 result = insertBits(result, hiIndex, loIndex, resBits);
924 }
925 FpDestReg.uqw = result;
926 '''
927
928 class Maddi(MediaOp):
929 code = '''
930 assert(srcSize == destSize);
931 int size = srcSize;
932 int sizeBits = size * 8;
933 int items = numItems(size);
934 uint64_t result = FpDestReg.uqw;
935
936 for (int i = 0; i < items; i++) {
937 int hiIndex = (i + 1) * sizeBits - 1;
938 int loIndex = (i + 0) * sizeBits;
939 uint64_t arg1Bits = bits(FpSrcReg1.uqw, hiIndex, loIndex);
940 uint64_t arg2Bits = bits(FpSrcReg2.uqw, hiIndex, loIndex);
941 uint64_t resBits = arg1Bits + arg2Bits;
942
943 if (ext & 0x2) {
944 if (signedOp()) {
945 int arg1Sign = bits(arg1Bits, sizeBits - 1);
946 int arg2Sign = bits(arg2Bits, sizeBits - 1);
947 int resSign = bits(resBits, sizeBits - 1);
948 if ((arg1Sign == arg2Sign) && (arg1Sign != resSign)) {
949 if (resSign == 0)
950 resBits = (ULL(1) << (sizeBits - 1));
951 else
952 resBits = mask(sizeBits - 1);
953 }
954 } else {
955 if (findCarry(sizeBits, resBits, arg1Bits, arg2Bits))
956 resBits = mask(sizeBits);
957 }
958 }
959
960 result = insertBits(result, hiIndex, loIndex, resBits);
961 }
962 FpDestReg.uqw = result;
963 '''
964
965 class Msubi(MediaOp):
966 code = '''
967 assert(srcSize == destSize);
968 int size = srcSize;
969 int sizeBits = size * 8;
970 int items = numItems(size);
971 uint64_t result = FpDestReg.uqw;
972
973 for (int i = 0; i < items; i++) {
974 int hiIndex = (i + 1) * sizeBits - 1;
975 int loIndex = (i + 0) * sizeBits;
976 uint64_t arg1Bits = bits(FpSrcReg1.uqw, hiIndex, loIndex);
977 uint64_t arg2Bits = bits(FpSrcReg2.uqw, hiIndex, loIndex);
978 uint64_t resBits = arg1Bits - arg2Bits;
979
980 if (ext & 0x2) {
981 if (signedOp()) {
982 int arg1Sign = bits(arg1Bits, sizeBits - 1);
983 int arg2Sign = !bits(arg2Bits, sizeBits - 1);
984 int resSign = bits(resBits, sizeBits - 1);
985 if ((arg1Sign == arg2Sign) && (arg1Sign != resSign)) {
986 if (resSign == 0)
987 resBits = (ULL(1) << (sizeBits - 1));
988 else
989 resBits = mask(sizeBits - 1);
990 }
991 } else {
992 if (arg2Bits > arg1Bits) {
993 resBits = 0;
994 } else if (!findCarry(sizeBits, resBits,
995 arg1Bits, ~arg2Bits)) {
996 resBits = mask(sizeBits);
997 }
998 }
999 }
1000
1001 result = insertBits(result, hiIndex, loIndex, resBits);
1002 }
1003 FpDestReg.uqw = result;
1004 '''
1005
1006 class Mmuli(MediaOp):
1007 code = '''
1008 int srcBits = srcSize * 8;
1009 int destBits = destSize * 8;
1010 assert(destBits <= 64);
1011 assert(destSize >= srcSize);
1012 int items = numItems(destSize);
1013 uint64_t result = FpDestReg.uqw;
1014
1015 for (int i = 0; i < items; i++) {
1016 int offset = 0;
1017 if (ext & 16) {
1018 if (ext & 32)
1019 offset = i * (destBits - srcBits);
1020 else
1021 offset = i * (destBits - srcBits) + srcBits;
1022 }
1023 int srcHiIndex = (i + 1) * srcBits - 1 + offset;
1024 int srcLoIndex = (i + 0) * srcBits + offset;
1025 uint64_t arg1Bits = bits(FpSrcReg1.uqw, srcHiIndex, srcLoIndex);
1026 uint64_t arg2Bits = bits(FpSrcReg2.uqw, srcHiIndex, srcLoIndex);
1027 uint64_t resBits;
1028
1029 if (signedOp()) {
1030 int64_t arg1 = arg1Bits |
1031 (0 - (arg1Bits & (ULL(1) << (srcBits - 1))));
1032 int64_t arg2 = arg2Bits |
1033 (0 - (arg2Bits & (ULL(1) << (srcBits - 1))));
1034 resBits = (uint64_t)(arg1 * arg2);
1035 } else {
1036 resBits = arg1Bits * arg2Bits;
1037 }
1038
1039 if (ext & 0x4)
1040 resBits += (ULL(1) << (destBits - 1));
1041
1042 if (multHi())
1043 resBits >>= destBits;
1044
1045 int destHiIndex = (i + 1) * destBits - 1;
1046 int destLoIndex = (i + 0) * destBits;
1047 result = insertBits(result, destHiIndex, destLoIndex, resBits);
1048 }
1049 FpDestReg.uqw = result;
1050 '''
1051
1052 class Mavg(MediaOp):
1053 code = '''
1054 assert(srcSize == destSize);
1055 int size = srcSize;
1056 int sizeBits = size * 8;
1057 int items = numItems(size);
1058 uint64_t result = FpDestReg.uqw;
1059
1060 for (int i = 0; i < items; i++) {
1061 int hiIndex = (i + 1) * sizeBits - 1;
1062 int loIndex = (i + 0) * sizeBits;
1063 uint64_t arg1Bits = bits(FpSrcReg1.uqw, hiIndex, loIndex);
1064 uint64_t arg2Bits = bits(FpSrcReg2.uqw, hiIndex, loIndex);
1065 uint64_t resBits = (arg1Bits + arg2Bits + 1) / 2;
1066
1067 result = insertBits(result, hiIndex, loIndex, resBits);
1068 }
1069 FpDestReg.uqw = result;
1070 '''
1071
1072 class Msad(MediaOp):
1073 code = '''
1074 int srcBits = srcSize * 8;
1075 int items = sizeof(FloatRegBits) / srcSize;
1076
1077 uint64_t sum = 0;
1078 for (int i = 0; i < items; i++) {
1079 int hiIndex = (i + 1) * srcBits - 1;
1080 int loIndex = (i + 0) * srcBits;
1081 uint64_t arg1Bits = bits(FpSrcReg1.uqw, hiIndex, loIndex);
1082 uint64_t arg2Bits = bits(FpSrcReg2.uqw, hiIndex, loIndex);
1083 int64_t resBits = arg1Bits - arg2Bits;
1084 if (resBits < 0)
1085 resBits = -resBits;
1086 sum += resBits;
1087 }
1088 FpDestReg.uqw = sum & mask(destSize * 8);
1089 '''
1090
1091 class Msrl(MediaOp):
1092 code = '''
1093
1094 assert(srcSize == destSize);
1095 int size = srcSize;
1096 int sizeBits = size * 8;
1097 int items = numItems(size);
1098 uint64_t shiftAmt = op2.uqw;
1099 uint64_t result = FpDestReg.uqw;
1100
1101 for (int i = 0; i < items; i++) {
1102 int hiIndex = (i + 1) * sizeBits - 1;
1103 int loIndex = (i + 0) * sizeBits;
1104 uint64_t arg1Bits = bits(FpSrcReg1.uqw, hiIndex, loIndex);
1105 uint64_t resBits;
1106 if (shiftAmt >= sizeBits) {
1107 resBits = 0;
1108 } else {
1109 resBits = (arg1Bits >> shiftAmt) &
1110 mask(sizeBits - shiftAmt);
1111 }
1112
1113 result = insertBits(result, hiIndex, loIndex, resBits);
1114 }
1115 FpDestReg.uqw = result;
1116 '''
1117
1118 class Msra(MediaOp):
1119 code = '''
1120
1121 assert(srcSize == destSize);
1122 int size = srcSize;
1123 int sizeBits = size * 8;
1124 int items = numItems(size);
1125 uint64_t shiftAmt = op2.uqw;
1126 uint64_t result = FpDestReg.uqw;
1127
1128 for (int i = 0; i < items; i++) {
1129 int hiIndex = (i + 1) * sizeBits - 1;
1130 int loIndex = (i + 0) * sizeBits;
1131 uint64_t arg1Bits = bits(FpSrcReg1.uqw, hiIndex, loIndex);
1132 uint64_t resBits;
1133 if (shiftAmt >= sizeBits) {
1134 if (bits(arg1Bits, sizeBits - 1))
1135 resBits = mask(sizeBits);
1136 else
1137 resBits = 0;
1138 } else {
1139 resBits = (arg1Bits >> shiftAmt);
1140 resBits = resBits |
1141 (0 - (resBits & (ULL(1) << (sizeBits - 1 - shiftAmt))));
1142 }
1143
1144 result = insertBits(result, hiIndex, loIndex, resBits);
1145 }
1146 FpDestReg.uqw = result;
1147 '''
1148
1149 class Msll(MediaOp):
1150 code = '''
1151
1152 assert(srcSize == destSize);
1153 int size = srcSize;
1154 int sizeBits = size * 8;
1155 int items = numItems(size);
1156 uint64_t shiftAmt = op2.uqw;
1157 uint64_t result = FpDestReg.uqw;
1158
1159 for (int i = 0; i < items; i++) {
1160 int hiIndex = (i + 1) * sizeBits - 1;
1161 int loIndex = (i + 0) * sizeBits;
1162 uint64_t arg1Bits = bits(FpSrcReg1.uqw, hiIndex, loIndex);
1163 uint64_t resBits;
1164 if (shiftAmt >= sizeBits) {
1165 resBits = 0;
1166 } else {
1167 resBits = (arg1Bits << shiftAmt);
1168 }
1169
1170 result = insertBits(result, hiIndex, loIndex, resBits);
1171 }
1172 FpDestReg.uqw = result;
1173 '''
1174
1175 class Cvtf2i(MediaOp):
1176 def __init__(self, dest, src, \
1177 size = None, destSize = None, srcSize = None, ext = None):
1178 super(Cvtf2i, self).__init__(dest, src,\
1179 "InstRegIndex(0)", size, destSize, srcSize, ext)
1180 code = '''
1181 union floatInt
1182 {
1183 float f;
1184 uint32_t i;
1185 };
1186 union doubleInt
1187 {
1188 double d;
1189 uint64_t i;
1190 };
1191
1192 assert(destSize == 4 || destSize == 8);
1193 assert(srcSize == 4 || srcSize == 8);
1194 int srcSizeBits = srcSize * 8;
1195 int destSizeBits = destSize * 8;
1196 int items;
1197 int srcStart = 0;
1198 int destStart = 0;
1199 if (srcSize == 2 * destSize) {
1200 items = numItems(srcSize);
1201 if (ext & 0x2)
1202 destStart = destSizeBits * items;
1203 } else if (destSize == 2 * srcSize) {
1204 items = numItems(destSize);
1205 if (ext & 0x2)
1206 srcStart = srcSizeBits * items;
1207 } else {
1208 items = numItems(destSize);
1209 }
1210 uint64_t result = FpDestReg.uqw;
1211
1212 for (int i = 0; i < items; i++) {
1213 int srcHiIndex = srcStart + (i + 1) * srcSizeBits - 1;
1214 int srcLoIndex = srcStart + (i + 0) * srcSizeBits;
1215 uint64_t argBits = bits(FpSrcReg1.uqw, srcHiIndex, srcLoIndex);
1216 double arg;
1217
1218 if (srcSize == 4) {
1219 floatInt fi;
1220 fi.i = argBits;
1221 arg = fi.f;
1222 } else {
1223 doubleInt di;
1224 di.i = argBits;
1225 arg = di.d;
1226 }
1227
1228 if (ext & 0x4) {
1229 if (arg >= 0)
1230 arg += 0.5;
1231 else
1232 arg -= 0.5;
1233 }
1234
1235 if (destSize == 4) {
1236 argBits = (uint32_t)arg;
1237 } else {
1238 argBits = (uint64_t)arg;
1239 }
1240 int destHiIndex = destStart + (i + 1) * destSizeBits - 1;
1241 int destLoIndex = destStart + (i + 0) * destSizeBits;
1242 result = insertBits(result, destHiIndex, destLoIndex, argBits);
1243 }
1244 FpDestReg.uqw = result;
1245 '''
1246
1247 class Cvti2f(MediaOp):
1248 def __init__(self, dest, src, \
1249 size = None, destSize = None, srcSize = None, ext = None):
1250 super(Cvti2f, self).__init__(dest, src,\
1251 "InstRegIndex(0)", size, destSize, srcSize, ext)
1252 code = '''
1253 union floatInt
1254 {
1255 float f;
1256 uint32_t i;
1257 };
1258 union doubleInt
1259 {
1260 double d;
1261 uint64_t i;
1262 };
1263
1264 assert(destSize == 4 || destSize == 8);
1265 assert(srcSize == 4 || srcSize == 8);
1266 int srcSizeBits = srcSize * 8;
1267 int destSizeBits = destSize * 8;
1268 int items;
1269 int srcStart = 0;
1270 int destStart = 0;
1271 if (srcSize == 2 * destSize) {
1272 items = numItems(srcSize);
1273 if (ext & 0x2)
1274 destStart = destSizeBits * items;
1275 } else if (destSize == 2 * srcSize) {
1276 items = numItems(destSize);
1277 if (ext & 0x2)
1278 srcStart = srcSizeBits * items;
1279 } else {
1280 items = numItems(destSize);
1281 }
1282 uint64_t result = FpDestReg.uqw;
1283
1284 for (int i = 0; i < items; i++) {
1285 int srcHiIndex = srcStart + (i + 1) * srcSizeBits - 1;
1286 int srcLoIndex = srcStart + (i + 0) * srcSizeBits;
1287 uint64_t argBits = bits(FpSrcReg1.uqw, srcHiIndex, srcLoIndex);
1288
1289 int64_t sArg = argBits |
1290 (0 - (argBits & (ULL(1) << (srcSizeBits - 1))));
1291 double arg = sArg;
1292
1293 if (destSize == 4) {
1294 floatInt fi;
1295 fi.f = arg;
1296 argBits = fi.i;
1297 } else {
1298 doubleInt di;
1299 di.d = arg;
1300 argBits = di.i;
1301 }
1302 int destHiIndex = destStart + (i + 1) * destSizeBits - 1;
1303 int destLoIndex = destStart + (i + 0) * destSizeBits;
1304 result = insertBits(result, destHiIndex, destLoIndex, argBits);
1305 }
1306 FpDestReg.uqw = result;
1307 '''
1308
1309 class Cvtf2f(MediaOp):
1310 def __init__(self, dest, src, \
1311 size = None, destSize = None, srcSize = None, ext = None):
1312 super(Cvtf2f, self).__init__(dest, src,\
1313 "InstRegIndex(0)", size, destSize, srcSize, ext)
1314 code = '''
1315 union floatInt
1316 {
1317 float f;
1318 uint32_t i;
1319 };
1320 union doubleInt
1321 {
1322 double d;
1323 uint64_t i;
1324 };
1325
1326 assert(destSize == 4 || destSize == 8);
1327 assert(srcSize == 4 || srcSize == 8);
1328 int srcSizeBits = srcSize * 8;
1329 int destSizeBits = destSize * 8;
1330 int items;
1331 int srcStart = 0;
1332 int destStart = 0;
1333 if (srcSize == 2 * destSize) {
1334 items = numItems(srcSize);
1335 if (ext & 0x2)
1336 destStart = destSizeBits * items;
1337 } else if (destSize == 2 * srcSize) {
1338 items = numItems(destSize);
1339 if (ext & 0x2)
1340 srcStart = srcSizeBits * items;
1341 } else {
1342 items = numItems(destSize);
1343 }
1344 uint64_t result = FpDestReg.uqw;
1345
1346 for (int i = 0; i < items; i++) {
1347 int srcHiIndex = srcStart + (i + 1) * srcSizeBits - 1;
1348 int srcLoIndex = srcStart + (i + 0) * srcSizeBits;
1349 uint64_t argBits = bits(FpSrcReg1.uqw, srcHiIndex, srcLoIndex);
1350 double arg;
1351
1352 if (srcSize == 4) {
1353 floatInt fi;
1354 fi.i = argBits;
1355 arg = fi.f;
1356 } else {
1357 doubleInt di;
1358 di.i = argBits;
1359 arg = di.d;
1360 }
1361 if (destSize == 4) {
1362 floatInt fi;
1363 fi.f = arg;
1364 argBits = fi.i;
1365 } else {
1366 doubleInt di;
1367 di.d = arg;
1368 argBits = di.i;
1369 }
1370 int destHiIndex = destStart + (i + 1) * destSizeBits - 1;
1371 int destLoIndex = destStart + (i + 0) * destSizeBits;
1372 result = insertBits(result, destHiIndex, destLoIndex, argBits);
1373 }
1374 FpDestReg.uqw = result;
1375 '''
1376
1377 class Mcmpi2r(MediaOp):
1378 code = '''
1379 union floatInt
1380 {
1381 float f;
1382 uint32_t i;
1383 };
1384 union doubleInt
1385 {
1386 double d;
1387 uint64_t i;
1388 };
1389
1390 assert(srcSize == destSize);
1391 int size = srcSize;
1392 int sizeBits = size * 8;
1393 int items = numItems(size);
1394 uint64_t result = FpDestReg.uqw;
1395
1396 for (int i = 0; i < items; i++) {
1397 int hiIndex = (i + 1) * sizeBits - 1;
1398 int loIndex = (i + 0) * sizeBits;
1399 uint64_t arg1Bits = bits(FpSrcReg1.uqw, hiIndex, loIndex);
1400 int64_t arg1 = arg1Bits |
1401 (0 - (arg1Bits & (ULL(1) << (sizeBits - 1))));
1402 uint64_t arg2Bits = bits(FpSrcReg2.uqw, hiIndex, loIndex);
1403 int64_t arg2 = arg2Bits |
1404 (0 - (arg2Bits & (ULL(1) << (sizeBits - 1))));
1405
1406 uint64_t resBits = 0;
1407 if (((ext & 0x2) == 0 && arg1 == arg2) ||
1408 ((ext & 0x2) == 0x2 && arg1 > arg2))
1409 resBits = mask(sizeBits);
1410
1411 result = insertBits(result, hiIndex, loIndex, resBits);
1412 }
1413 FpDestReg.uqw = result;
1414 '''
1415
1416 class Mcmpf2r(MediaOp):
1417 code = '''
1418 union floatInt
1419 {
1420 float f;
1421 uint32_t i;
1422 };
1423 union doubleInt
1424 {
1425 double d;
1426 uint64_t i;
1427 };
1428
1429 assert(srcSize == destSize);
1430 int size = srcSize;
1431 int sizeBits = size * 8;
1432 int items = numItems(size);
1433 uint64_t result = FpDestReg.uqw;
1434
1435 for (int i = 0; i < items; i++) {
1436 int hiIndex = (i + 1) * sizeBits - 1;
1437 int loIndex = (i + 0) * sizeBits;
1438 uint64_t arg1Bits = bits(FpSrcReg1.uqw, hiIndex, loIndex);
1439 uint64_t arg2Bits = bits(FpSrcReg2.uqw, hiIndex, loIndex);
1440 double arg1, arg2;
1441
1442 if (size == 4) {
1443 floatInt fi;
1444 fi.i = arg1Bits;
1445 arg1 = fi.f;
1446 fi.i = arg2Bits;
1447 arg2 = fi.f;
1448 } else {
1449 doubleInt di;
1450 di.i = arg1Bits;
1451 arg1 = di.d;
1452 di.i = arg2Bits;
1453 arg2 = di.d;
1454 }
1455
1456 uint64_t resBits = 0;
1457 bool nanop = isnan(arg1) || isnan(arg2);
1458 switch (ext & mask(3)) {
1459 case 0:
1460 if (arg1 == arg2 && !nanop)
1461 resBits = mask(sizeBits);
1462 break;
1463 case 1:
1464 if (arg1 < arg2 && !nanop)
1465 resBits = mask(sizeBits);
1466 break;
1467 case 2:
1468 if (arg1 <= arg2 && !nanop)
1469 resBits = mask(sizeBits);
1470 break;
1471 case 3:
1472 if (nanop)
1473 resBits = mask(sizeBits);
1474 break;
1475 case 4:
1476 if (arg1 != arg2 || nanop)
1477 resBits = mask(sizeBits);
1478 break;
1479 case 5:
1480 if (!(arg1 < arg2) || nanop)
1481 resBits = mask(sizeBits);
1482 break;
1483 case 6:
1484 if (!(arg1 <= arg2) || nanop)
1485 resBits = mask(sizeBits);
1486 break;
1487 case 7:
1488 if (!nanop)
1489 resBits = mask(sizeBits);
1490 break;
1491 };
1492
1493 result = insertBits(result, hiIndex, loIndex, resBits);
1494 }
1495 FpDestReg.uqw = result;
1496 '''
1497
1498 class Mcmpf2rf(MediaOp):
1499 def __init__(self, src1, src2,\
1500 size = None, destSize = None, srcSize = None, ext = None):
1501 super(Mcmpf2rf, self).__init__("InstRegIndex(0)", src1,\
1502 src2, size, destSize, srcSize, ext)
1503 code = '''
1504 union floatInt
1505 {
1506 float f;
1507 uint32_t i;
1508 };
1509 union doubleInt
1510 {
1511 double d;
1512 uint64_t i;
1513 };
1514
1515 assert(srcSize == destSize);
1516 assert(srcSize == 4 || srcSize == 8);
1517 int size = srcSize;
1518 int sizeBits = size * 8;
1519
1520 double arg1, arg2;
1521 uint64_t arg1Bits = bits(FpSrcReg1.uqw, sizeBits - 1, 0);
1522 uint64_t arg2Bits = bits(FpSrcReg2.uqw, sizeBits - 1, 0);
1523 if (size == 4) {
1524 floatInt fi;
1525 fi.i = arg1Bits;
1526 arg1 = fi.f;
1527 fi.i = arg2Bits;
1528 arg2 = fi.f;
1529 } else {
1530 doubleInt di;
1531 di.i = arg1Bits;
1532 arg1 = di.d;
1533 di.i = arg2Bits;
1534 arg2 = di.d;
1535 }
1536
1537 // ZF PF CF
1538 // Unordered 1 1 1
1539 // Greater than 0 0 0
1540 // Less than 0 0 1
1541 // Equal 1 0 0
1542 // OF = SF = AF = 0
1543 ccFlagBits = ccFlagBits & ~(OFBit | SFBit | AFBit |
1544 ZFBit | PFBit | CFBit);
1545 if (isnan(arg1) || isnan(arg2))
1546 ccFlagBits = ccFlagBits | (ZFBit | PFBit | CFBit);
1547 else if(arg1 < arg2)
1548 ccFlagBits = ccFlagBits | CFBit;
1549 else if(arg1 == arg2)
1550 ccFlagBits = ccFlagBits | ZFBit;
1551 '''
1552}};