multiply_and_divide.py revision 6514:1802d70f4092
1# Copyright (c) 2007 The Hewlett-Packard Development Company
2# All rights reserved.
3#
4# Redistribution and use of this software in source and binary forms,
5# with or without modification, are permitted provided that the
6# following conditions are met:
7#
8# The software must be used only for Non-Commercial Use which means any
9# use which is NOT directed to receiving any direct monetary
10# compensation for, or commercial advantage from such use.  Illustrative
11# examples of non-commercial use are academic research, personal study,
12# teaching, education and corporate research & development.
13# Illustrative examples of commercial use are distributing products for
14# commercial advantage and providing services using the software for
15# commercial advantage.
16#
17# If you wish to use this software or functionality therein that may be
18# covered by patents for commercial use, please contact:
19#     Director of Intellectual Property Licensing
20#     Office of Strategy and Technology
21#     Hewlett-Packard Company
22#     1501 Page Mill Road
23#     Palo Alto, California  94304
24#
25# Redistributions of source code must retain the above copyright notice,
26# this list of conditions and the following disclaimer.  Redistributions
27# in binary form must reproduce the above copyright notice, this list of
28# conditions and the following disclaimer in the documentation and/or
29# other materials provided with the distribution.  Neither the name of
30# the COPYRIGHT HOLDER(s), HEWLETT-PACKARD COMPANY, nor the names of its
31# contributors may be used to endorse or promote products derived from
32# this software without specific prior written permission.  No right of
33# sublicense is granted herewith.  Derivatives of the software and
34# output created using the software may be prepared, but only for
35# Non-Commercial Uses.  Derivatives of the software may be shared with
36# others provided: (i) the others agree to abide by the list of
37# conditions herein which includes the Non-Commercial Use restrictions;
38# and (ii) such Derivatives of the software include the above copyright
39# notice to acknowledge the contribution from this software where
40# applicable, this list of conditions and the disclaimer below.
41#
42# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
43# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
44# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
45# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
46# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
47# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
48# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
49# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
50# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
51# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
52# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
53#
54# Authors: Gabe Black
55
56microcode = '''
57
58#
59# Byte version of one operand unsigned multiply.
60#
61
62def macroop MUL_B_R
63{
64    mul1u rax, reg, flags=(OF,CF)
65    mulel rax
66    muleh ah
67};
68
69def macroop MUL_B_M
70{
71    ld t1, seg, sib, disp
72    mul1u rax, t1, flags=(OF,CF)
73    mulel rax
74    muleh ah
75};
76
77def macroop MUL_B_P
78{
79    rdip t7
80    ld t1, seg, riprel, disp
81    mul1u rax, t1, flags=(OF,CF)
82    mulel rax
83    muleh ah
84};
85
86#
87# One operand unsigned multiply.
88#
89
90def macroop MUL_R
91{
92    mul1u rax, reg, flags=(OF,CF)
93    mulel rax
94    muleh rdx
95};
96
97def macroop MUL_M
98{
99    ld t1, seg, sib, disp
100    mul1u rax, t1, flags=(OF,CF)
101    mulel rax
102    muleh rdx
103};
104
105def macroop MUL_P
106{
107    rdip t7
108    ld t1, seg, riprel, disp
109    mul1u rax, t1, flags=(OF,CF)
110    mulel rax
111    muleh rdx
112};
113
114#
115# Byte version of one operand signed multiply.
116#
117
118def macroop IMUL_B_R
119{
120    mul1s rax, reg, flags=(OF,CF)
121    mulel rax
122    muleh ah
123};
124
125def macroop IMUL_B_M
126{
127    ld t1, seg, sib, disp
128    mul1s rax, t1, flags=(OF,CF)
129    mulel rax
130    muleh ah
131};
132
133def macroop IMUL_B_P
134{
135    rdip t7
136    ld t1, seg, riprel, disp
137    mul1s rax, t1, flags=(OF,CF)
138    mulel rax
139    muleh ah
140};
141
142#
143# One operand signed multiply.
144#
145
146def macroop IMUL_R
147{
148    mul1s rax, reg, flags=(OF,CF)
149    mulel rax
150    muleh rdx
151};
152
153def macroop IMUL_M
154{
155    ld t1, seg, sib, disp
156    mul1s rax, t1, flags=(OF,CF)
157    mulel rax
158    muleh rdx
159};
160
161def macroop IMUL_P
162{
163    rdip t7
164    ld t1, seg, riprel, disp
165    mul1s rax, t1, flags=(OF,CF)
166    mulel rax
167    muleh rdx
168};
169
170def macroop IMUL_R_R
171{
172    mul1s reg, regm, flags=(OF,CF)
173    mulel reg
174    muleh t0
175};
176
177def macroop IMUL_R_M
178{
179    ld t1, seg, sib, disp
180    mul1s reg, t1, flags=(CF,OF)
181    mulel reg
182    muleh t0
183};
184
185def macroop IMUL_R_P
186{
187    rdip t7
188    ld t1, seg, riprel, disp
189    mul1s reg, t1, flags=(CF,OF)
190    mulel reg
191    muleh t0
192};
193
194#
195# Three operand signed multiply.
196#
197
198def macroop IMUL_R_R_I
199{
200    limm t1, imm
201    mul1s regm, t1, flags=(OF,CF)
202    mulel reg
203    muleh t0
204};
205
206def macroop IMUL_R_M_I
207{
208    limm t1, imm
209    ld t2, seg, sib, disp
210    mul1s t2, t1, flags=(OF,CF)
211    mulel reg
212    muleh t0
213};
214
215def macroop IMUL_R_P_I
216{
217    rdip t7
218    limm t1, imm
219    ld t2, seg, riprel
220    mul1s t2, t1, flags=(OF,CF)
221    mulel reg
222    muleh t0
223};
224'''
225
226pcRel = '''
227    rdip t7
228    ld %s, seg, riprel, disp
229'''
230sibRel = '''
231    ld %s, seg, sib, disp
232'''
233
234#
235# One byte version of unsigned division
236#
237
238divcode = '''
239def macroop DIV_B_%(suffix)s
240{
241    %(readOp1)s
242    # Do the initial part of the division
243    div1 ah, %(op1)s, dataSize=1
244
245    #These are split out so we can initialize the number of bits in the
246    #second register
247    div2i t1, rax, 8, dataSize=1
248    div2 t1, rax, t1, dataSize=1
249
250    #Loop until we're out of bits to shift in
251divLoopTop:
252    div2 t1, rax, t1, dataSize=1
253    div2 t1, rax, t1, flags=(EZF,), dataSize=1
254    br label("divLoopTop"), flags=(nCEZF,)
255
256    #Unload the answer
257    divq rax, dataSize=1
258    divr ah, dataSize=1
259};
260'''
261
262#
263# Unsigned division
264#
265
266divcode += '''
267def macroop DIV_%(suffix)s
268{
269    %(readOp1)s
270    # Do the initial part of the division
271    div1 rdx, %(op1)s
272
273    #These are split out so we can initialize the number of bits in the
274    #second register
275    div2i t1, rax, "env.dataSize * 8"
276    div2 t1, rax, t1
277
278    #Loop until we're out of bits to shift in
279    #The amount of unrolling here could stand some tuning
280divLoopTop:
281    div2 t1, rax, t1
282    div2 t1, rax, t1
283    div2 t1, rax, t1
284    div2 t1, rax, t1, flags=(EZF,)
285    br label("divLoopTop"), flags=(nCEZF,)
286
287    #Unload the answer
288    divq rax
289    divr rdx
290};
291'''
292
293#
294# One byte version of signed division
295#
296
297divcode += '''
298def macroop IDIV_B_%(suffix)s
299{
300    # Negate dividend
301    sub t1, t0, rax, flags=(ECF,), dataSize=1
302    ruflag t4, 3
303    sub t2, t0, ah, dataSize=1
304    sub t2, t2, t4
305
306    %(readOp1)s
307
308    #Find the sign of the divisor
309    slli t0, %(op1)s, 1, flags=(ECF,), dataSize=1
310
311    # Negate divisor
312    sub t3, t0, %(op1)s, dataSize=1
313    # Put the divisor's absolute value into t3
314    mov t3, t3, %(op1)s, flags=(nCECF,), dataSize=1
315
316    #Find the sign of the dividend
317    slli t0, ah, 1, flags=(ECF,), dataSize=1
318
319    # Put the dividend's absolute value into t1 and t2
320    mov t1, t1, rax, flags=(nCECF,), dataSize=1
321    mov t2, t2, ah, flags=(nCECF,), dataSize=1
322
323    # Do the initial part of the division
324    div1 t2, t3, dataSize=1
325
326    #These are split out so we can initialize the number of bits in the
327    #second register
328    div2i t4, t1, 8, dataSize=1
329    div2 t4, t1, t4, dataSize=1
330
331    #Loop until we're out of bits to shift in
332divLoopTop:
333    div2 t4, t1, t4, dataSize=1
334    div2 t4, t1, t4, flags=(EZF,), dataSize=1
335    br label("divLoopTop"), flags=(nCEZF,)
336
337    #Unload the answer
338    divq t5, dataSize=1
339    divr t6, dataSize=1
340
341    # Fix up signs. The sign of the dividend is still lying around in ECF.
342    # The sign of the remainder, ah, is the same as the dividend. The sign
343    # of the quotient is negated if the signs of the divisor and dividend
344    # were different.
345
346    # Negate the remainder
347    sub t4, t0, t6, dataSize=1
348    # If the dividend was negitive, put the negated remainder in ah.
349    mov ah, ah, t4, (CECF,), dataSize=1
350    # Otherwise put the regular remainder in ah.
351    mov ah, ah, t6, (nCECF,), dataSize=1
352
353    # Negate the quotient.
354    sub t4, t0, t5, dataSize=1
355    # If the dividend was negative, start using the negated quotient
356    mov t5, t5, t4, (CECF,), dataSize=1
357
358    # Check the sign of the divisor
359    slli t0, %(op1)s, 1, flags=(ECF,), dataSize=1
360
361    # Negate the (possibly already negated) quotient
362    sub t4, t0, t5, dataSize=1
363    # If the divisor was negative, put the negated quotient in rax.
364    mov rax, rax, t4, (CECF,), dataSize=1
365    # Otherwise put the one that wasn't negated (at least here) in rax.
366    mov rax, rax, t5, (nCECF,), dataSize=1
367};
368'''
369
370#
371# Signed division
372#
373
374divcode += '''
375def macroop IDIV_%(suffix)s
376{
377    # Negate dividend
378    sub t1, t0, rax, flags=(ECF,)
379    ruflag t4, 3
380    sub t2, t0, rdx
381    sub t2, t2, t4
382
383    %(readOp1)s
384
385    #Find the sign of the divisor
386    slli t0, %(op1)s, 1, flags=(ECF,)
387
388    # Negate divisor
389    sub t3, t0, %(op1)s
390    # Put the divisor's absolute value into t3
391    mov t3, t3, %(op1)s, flags=(nCECF,)
392
393    #Find the sign of the dividend
394    slli t0, rdx, 1, flags=(ECF,)
395
396    # Put the dividend's absolute value into t1 and t2
397    mov t1, t1, rax, flags=(nCECF,)
398    mov t2, t2, rdx, flags=(nCECF,)
399
400    # Do the initial part of the division
401    div1 t2, t3
402
403    #These are split out so we can initialize the number of bits in the
404    #second register
405    div2i t4, t1, "env.dataSize * 8"
406    div2 t4, t1, t4
407
408    #Loop until we're out of bits to shift in
409divLoopTop:
410    div2 t4, t1, t4
411    div2 t4, t1, t4
412    div2 t4, t1, t4
413    div2 t4, t1, t4, flags=(EZF,)
414    br label("divLoopTop"), flags=(nCEZF,)
415
416    #Unload the answer
417    divq t5
418    divr t6
419
420    # Fix up signs. The sign of the dividend is still lying around in ECF.
421    # The sign of the remainder, ah, is the same as the dividend. The sign
422    # of the quotient is negated if the signs of the divisor and dividend
423    # were different.
424
425    # Negate the remainder
426    sub t4, t0, t6
427    # If the dividend was negitive, put the negated remainder in rdx.
428    mov rdx, rdx, t4, (CECF,)
429    # Otherwise put the regular remainder in rdx.
430    mov rdx, rdx, t6, (nCECF,)
431
432    # Negate the quotient.
433    sub t4, t0, t5
434    # If the dividend was negative, start using the negated quotient
435    mov t5, t5, t4, (CECF,)
436
437    # Check the sign of the divisor
438    slli t0, %(op1)s, 1, flags=(ECF,)
439
440    # Negate the (possibly already negated) quotient
441    sub t4, t0, t5
442    # If the divisor was negative, put the negated quotient in rax.
443    mov rax, rax, t4, (CECF,)
444    # Otherwise put the one that wasn't negated (at least here) in rax.
445    mov rax, rax, t5, (nCECF,)
446};
447'''
448
449microcode += divcode % {"suffix": "R",
450                        "readOp1": "", "op1": "reg"}
451microcode += divcode % {"suffix": "M",
452                        "readOp1": sibRel % "t2", "op1": "t2"}
453microcode += divcode % {"suffix": "P",
454                        "readOp1": pcRel % "t2", "op1": "t2"}
455