Update x86-64 mpn routines from GMP 5.0.1.
This commit is contained in:
parent
ece2984070
commit
0959ffc97b
20
ChangeLog
20
ChangeLog
@ -1,12 +1,22 @@
|
||||
2010-09-02 Ulrich Drepper <drepper@redhat.com>
|
||||
|
||||
* sysdeps/x86_64/add_n.S: Update from GMP 5.0.1.
|
||||
* sysdeps/x86_64/addmul_1.S: Likewise.
|
||||
* sysdeps/x86_64/lshift.S: Likewise.
|
||||
* sysdeps/x86_64/mul_1.S: Likewise.
|
||||
* sysdeps/x86_64/rshift.S: Likewise.
|
||||
* sysdeps/x86_64/sub_n.S: Likewise.
|
||||
* sysdeps/x86_64/submul_1.S: Likewise.
|
||||
|
||||
2010-09-01 Samuel Thibault <samuel.thibault@ens-lyon.org>
|
||||
|
||||
This aligns bits/sched.h onto sysdeps/unix/sysv/linux/bits/sched.h:
|
||||
Define __sched_param instead of SCHED_* and sched_param when
|
||||
This aligns bits/sched.h onto sysdeps/unix/sysv/linux/bits/sched.h:
|
||||
Define __sched_param instead of SCHED_* and sched_param when
|
||||
<bits/sched.h> is included with __need_schedparam defined.
|
||||
* bits/sched.h [__need_schedparam]
|
||||
* bits/sched.h [__need_schedparam]
|
||||
(SCHED_OTHER, SCHED_FIFO, SCHED_RR, sched_param): Do not define.
|
||||
[!__defined_schedparam && (__need_schedparam || _SCHED_H)]
|
||||
(__defined_schedparam): Define to 1.
|
||||
[!__defined_schedparam && (__need_schedparam || _SCHED_H)]
|
||||
(__defined_schedparam): Define to 1.
|
||||
(__sched_param): New structure, identical to sched_param.
|
||||
(__need_schedparam): Undefine.
|
||||
|
||||
|
@ -1,6 +1,6 @@
|
||||
/* Add two limb vectors of the same length > 0 and store sum in a third
|
||||
limb vector.
|
||||
Copyright (C) 2004 Free Software Foundation, Inc.
|
||||
/* x86-64 __mpn_add_n -- Add two limb vectors of the same length > 0 and store
|
||||
sum in a third limb vector.
|
||||
Copyright (C) 2006, 2007 Free Software Foundation, Inc.
|
||||
This file is part of the GNU MP Library.
|
||||
|
||||
The GNU MP Library is free software; you can redistribute it and/or modify
|
||||
@ -21,22 +21,81 @@
|
||||
#include "sysdep.h"
|
||||
#include "asm-syntax.h"
|
||||
|
||||
#define rp %rdi
|
||||
#define up %rsi
|
||||
#define vp %rdx
|
||||
#define n %rcx
|
||||
#define cy %r8
|
||||
|
||||
#ifndef func
|
||||
# define func __mpn_add_n
|
||||
# define ADCSBB adc
|
||||
#endif
|
||||
|
||||
.text
|
||||
ENTRY (__mpn_add_n)
|
||||
leaq (%rsi,%rcx,8), %rsi
|
||||
leaq (%rdi,%rcx,8), %rdi
|
||||
leaq (%rdx,%rcx,8), %rdx
|
||||
negq %rcx
|
||||
xorl %eax, %eax # clear cy
|
||||
.p2align 2
|
||||
L(loop):
|
||||
movq (%rsi,%rcx,8), %rax
|
||||
movq (%rdx,%rcx,8), %r10
|
||||
adcq %r10, %rax
|
||||
movq %rax, (%rdi,%rcx,8)
|
||||
incq %rcx
|
||||
jne L(loop)
|
||||
movq %rcx, %rax # zero %rax
|
||||
adcq %rax, %rax
|
||||
ENTRY (func)
|
||||
xor %r8, %r8
|
||||
mov (up), %r10
|
||||
mov (vp), %r11
|
||||
|
||||
lea -8(up,n,8), up
|
||||
lea -8(vp,n,8), vp
|
||||
lea -16(rp,n,8), rp
|
||||
mov %ecx, %eax
|
||||
neg n
|
||||
and $3, %eax
|
||||
je L(b00)
|
||||
add %rax, n /* clear low rcx bits for jrcxz */
|
||||
cmp $2, %eax
|
||||
jl L(b01)
|
||||
je L(b10)
|
||||
|
||||
L(b11): shr %r8 /* set cy */
|
||||
jmp L(e11)
|
||||
|
||||
L(b00): shr %r8 /* set cy */
|
||||
mov %r10, %r8
|
||||
mov %r11, %r9
|
||||
lea 4(n), n
|
||||
jmp L(e00)
|
||||
|
||||
L(b01): shr %r8 /* set cy */
|
||||
jmp L(e01)
|
||||
|
||||
L(b10): shr %r8 /* set cy */
|
||||
mov %r10, %r8
|
||||
mov %r11, %r9
|
||||
jmp L(e10)
|
||||
|
||||
L(end): ADCSBB %r11, %r10
|
||||
mov %r10, 8(rp)
|
||||
mov %ecx, %eax /* clear eax, ecx contains 0 */
|
||||
adc %eax, %eax
|
||||
ret
|
||||
END (__mpn_add_n)
|
||||
|
||||
.p2align 4
|
||||
L(top):
|
||||
mov -24(up,n,8), %r8
|
||||
mov -24(vp,n,8), %r9
|
||||
ADCSBB %r11, %r10
|
||||
mov %r10, -24(rp,n,8)
|
||||
L(e00):
|
||||
mov -16(up,n,8), %r10
|
||||
mov -16(vp,n,8), %r11
|
||||
ADCSBB %r9, %r8
|
||||
mov %r8, -16(rp,n,8)
|
||||
L(e11):
|
||||
mov -8(up,n,8), %r8
|
||||
mov -8(vp,n,8), %r9
|
||||
ADCSBB %r11, %r10
|
||||
mov %r10, -8(rp,n,8)
|
||||
L(e10):
|
||||
mov (up,n,8), %r10
|
||||
mov (vp,n,8), %r11
|
||||
ADCSBB %r9, %r8
|
||||
mov %r8, (rp,n,8)
|
||||
L(e01):
|
||||
jrcxz L(end)
|
||||
lea 4(n), n
|
||||
jmp L(top)
|
||||
END (func)
|
||||
|
@ -1,6 +1,6 @@
|
||||
/* AMD64 __mpn_addmul_1 -- Multiply a limb vector with a limb and add
|
||||
/* x86-64 __mpn_addmul_1 -- Multiply a limb vector with a limb and add
|
||||
the result to a second limb vector.
|
||||
Copyright (C) 2004 Free Software Foundation, Inc.
|
||||
Copyright (C) 2003,2004,2005,2007,2008,2009 Free Software Foundation, Inc.
|
||||
This file is part of the GNU MP Library.
|
||||
|
||||
The GNU MP Library is free software; you can redistribute it and/or modify
|
||||
@ -21,26 +21,95 @@
|
||||
#include "sysdep.h"
|
||||
#include "asm-syntax.h"
|
||||
|
||||
#define rp %rdi
|
||||
#define up %rsi
|
||||
#define n %rdx
|
||||
#define v0 %rcx
|
||||
|
||||
#ifndef func
|
||||
# define func __mpn_addmul_1
|
||||
# define ADDSUB add
|
||||
#endif
|
||||
|
||||
.text
|
||||
ENTRY (__mpn_addmul_1)
|
||||
movq %rdx, %r11
|
||||
leaq (%rsi,%rdx,8), %rsi
|
||||
leaq (%rdi,%rdx,8), %rdi
|
||||
negq %r11
|
||||
xorl %r8d, %r8d
|
||||
xorl %r10d, %r10d
|
||||
.p2align 2
|
||||
L(loop):
|
||||
movq (%rsi,%r11,8), %rax
|
||||
mulq %rcx
|
||||
addq (%rdi,%r11,8), %rax
|
||||
adcq %r10, %rdx
|
||||
addq %r8, %rax
|
||||
movq %r10, %r8
|
||||
movq %rax, (%rdi,%r11,8)
|
||||
adcq %rdx, %r8
|
||||
incq %r11
|
||||
jne L(loop)
|
||||
movq %r8, %rax
|
||||
ENTRY (func)
|
||||
push %rbx
|
||||
push %rbp
|
||||
lea (%rdx), %rbx
|
||||
neg %rbx
|
||||
|
||||
mov (up), %rax
|
||||
mov (rp), %r10
|
||||
|
||||
lea -16(rp,%rdx,8), rp
|
||||
lea (up,%rdx,8), up
|
||||
mul %rcx
|
||||
|
||||
bt $0, %ebx
|
||||
jc L(odd)
|
||||
|
||||
lea (%rax), %r11
|
||||
mov 8(up,%rbx,8), %rax
|
||||
lea (%rdx), %rbp
|
||||
mul %rcx
|
||||
add $2, %rbx
|
||||
jns L(n2)
|
||||
|
||||
lea (%rax), %r8
|
||||
mov (up,%rbx,8), %rax
|
||||
lea (%rdx), %r9
|
||||
jmp L(mid)
|
||||
|
||||
L(odd): add $1, %rbx
|
||||
jns L(n1)
|
||||
|
||||
lea (%rax), %r8
|
||||
mov (up,%rbx,8), %rax
|
||||
lea (%rdx), %r9
|
||||
mul %rcx
|
||||
lea (%rax), %r11
|
||||
mov 8(up,%rbx,8), %rax
|
||||
lea (%rdx), %rbp
|
||||
jmp L(e)
|
||||
|
||||
.p2align 4
|
||||
L(top): mul %rcx
|
||||
ADDSUB %r8, %r10
|
||||
lea (%rax), %r8
|
||||
mov (up,%rbx,8), %rax
|
||||
adc %r9, %r11
|
||||
mov %r10, -8(rp,%rbx,8)
|
||||
mov (rp,%rbx,8), %r10
|
||||
lea (%rdx), %r9
|
||||
adc $0, %rbp
|
||||
L(mid): mul %rcx
|
||||
ADDSUB %r11, %r10
|
||||
lea (%rax), %r11
|
||||
mov 8(up,%rbx,8), %rax
|
||||
adc %rbp, %r8
|
||||
mov %r10, (rp,%rbx,8)
|
||||
mov 8(rp,%rbx,8), %r10
|
||||
lea (%rdx), %rbp
|
||||
adc $0, %r9
|
||||
L(e): add $2, %rbx
|
||||
js L(top)
|
||||
|
||||
mul %rcx
|
||||
ADDSUB %r8, %r10
|
||||
adc %r9, %r11
|
||||
mov %r10, -8(rp)
|
||||
adc $0, %rbp
|
||||
L(n2): mov (rp), %r10
|
||||
ADDSUB %r11, %r10
|
||||
adc %rbp, %rax
|
||||
mov %r10, (rp)
|
||||
adc $0, %rdx
|
||||
L(n1): mov 8(rp), %r10
|
||||
ADDSUB %rax, %r10
|
||||
mov %r10, 8(rp)
|
||||
mov %ebx, %eax /* zero rax */
|
||||
adc %rdx, %rax
|
||||
pop %rbp
|
||||
pop %rbx
|
||||
ret
|
||||
END (__mpn_addmul_1)
|
||||
END (func)
|
||||
|
@ -1,5 +1,5 @@
|
||||
/* AMD64 __mpn_lshift --
|
||||
Copyright 2004, 2006 Free Software Foundation, Inc.
|
||||
/* x86-64 __mpn_lshift --
|
||||
Copyright (C) 2007, 2009 Free Software Foundation, Inc.
|
||||
This file is part of the GNU MP Library.
|
||||
|
||||
The GNU MP Library is free software; you can redistribute it and/or modify
|
||||
@ -20,41 +20,98 @@
|
||||
#include "sysdep.h"
|
||||
#include "asm-syntax.h"
|
||||
|
||||
#define rp %rdi
|
||||
#define up %rsi
|
||||
#define n %rdx
|
||||
#define cnt %cl
|
||||
|
||||
.text
|
||||
ENTRY (__mpn_lshift)
|
||||
movq -8(%rsi,%rdx,8), %mm7
|
||||
movd %ecx, %mm1
|
||||
movl $64, %eax
|
||||
subl %ecx, %eax
|
||||
movd %eax, %mm0
|
||||
movq %mm7, %mm3
|
||||
psrlq %mm0, %mm7
|
||||
movd %mm7, %rax
|
||||
subq $2, %rdx
|
||||
jl L(endo)
|
||||
.p2align 2
|
||||
L(loop):
|
||||
movq (%rsi,%rdx,8), %mm6
|
||||
movq %mm6, %mm2
|
||||
psrlq %mm0, %mm6
|
||||
psllq %mm1, %mm3
|
||||
por %mm6, %mm3
|
||||
movq %mm3, 8(%rdi,%rdx,8)
|
||||
je L(ende)
|
||||
movq -8(%rsi,%rdx,8), %mm7
|
||||
movq %mm7, %mm3
|
||||
psrlq %mm0, %mm7
|
||||
psllq %mm1, %mm2
|
||||
por %mm7, %mm2
|
||||
movq %mm2, (%rdi,%rdx,8)
|
||||
subq $2, %rdx
|
||||
jge L(loop)
|
||||
L(endo):
|
||||
movq %mm3, %mm2
|
||||
L(ende):
|
||||
psllq %mm1, %mm2
|
||||
movq %mm2, (%rdi)
|
||||
emms
|
||||
lea -8(rp,n,8), rp
|
||||
lea -8(up,n,8), up
|
||||
|
||||
mov %edx, %eax
|
||||
and $3, %eax
|
||||
jne L(nb00)
|
||||
L(b00): /* n = 4, 8, 12, ... */
|
||||
mov (up), %r10
|
||||
mov -8(up), %r11
|
||||
xor %eax, %eax
|
||||
shld %cl, %r10, %rax
|
||||
mov -16(up), %r8
|
||||
lea 24(rp), rp
|
||||
sub $4, n
|
||||
jmp L(00)
|
||||
|
||||
L(nb00):/* n = 1, 5, 9, ... */
|
||||
cmp $2, %eax
|
||||
jae L(nb01)
|
||||
L(b01): mov (up), %r9
|
||||
xor %eax, %eax
|
||||
shld %cl, %r9, %rax
|
||||
sub $2, n
|
||||
jb L(le1)
|
||||
mov -8(up), %r10
|
||||
mov -16(up), %r11
|
||||
lea -8(up), up
|
||||
lea 16(rp), rp
|
||||
jmp L(01)
|
||||
L(le1): shl %cl, %r9
|
||||
mov %r9, (rp)
|
||||
ret
|
||||
|
||||
L(nb01):/* n = 2, 6, 10, ... */
|
||||
jne L(b11)
|
||||
L(b10): mov (up), %r8
|
||||
mov -8(up), %r9
|
||||
xor %eax, %eax
|
||||
shld %cl, %r8, %rax
|
||||
sub $3, n
|
||||
jb L(le2)
|
||||
mov -16(up), %r10
|
||||
lea -16(up), up
|
||||
lea 8(rp), rp
|
||||
jmp L(10)
|
||||
L(le2): shld %cl, %r9, %r8
|
||||
mov %r8, (rp)
|
||||
shl %cl, %r9
|
||||
mov %r9, -8(rp)
|
||||
ret
|
||||
|
||||
.p2align 4 /* performance critical! */
|
||||
L(b11): /* n = 3, 7, 11, ... */
|
||||
mov (up), %r11
|
||||
mov -8(up), %r8
|
||||
xor %eax, %eax
|
||||
shld %cl, %r11, %rax
|
||||
mov -16(up), %r9
|
||||
lea -24(up), up
|
||||
sub $4, n
|
||||
jb L(end)
|
||||
|
||||
.p2align 4
|
||||
L(top): shld %cl, %r8, %r11
|
||||
mov (up), %r10
|
||||
mov %r11, (rp)
|
||||
L(10): shld %cl, %r9, %r8
|
||||
mov -8(up), %r11
|
||||
mov %r8, -8(rp)
|
||||
L(01): shld %cl, %r10, %r9
|
||||
mov -16(up), %r8
|
||||
mov %r9, -16(rp)
|
||||
L(00): shld %cl, %r11, %r10
|
||||
mov -24(up), %r9
|
||||
mov %r10, -24(rp)
|
||||
add $-32, up
|
||||
lea -32(rp), rp
|
||||
sub $4, n
|
||||
jnc L(top)
|
||||
|
||||
L(end): shld %cl, %r8, %r11
|
||||
mov %r11, (rp)
|
||||
shld %cl, %r9, %r8
|
||||
mov %r8, -8(rp)
|
||||
shl %cl, %r9
|
||||
mov %r9, -16(rp)
|
||||
ret
|
||||
END (__mpn_lshift)
|
||||
|
@ -1,6 +1,6 @@
|
||||
/* AMD64 __mpn_mul_1 -- Multiply a limb vector with a limb and store
|
||||
the result in a second limb vector.
|
||||
Copyright (C) 2004 Free Software Foundation, Inc.
|
||||
Copyright (C) 2003, 2004, 2005, 2007, 2008 Free Software Foundation, Inc.
|
||||
This file is part of the GNU MP Library.
|
||||
|
||||
The GNU MP Library is free software; you can redistribute it and/or modify
|
||||
@ -21,22 +21,109 @@
|
||||
#include <sysdep.h>
|
||||
#include "asm-syntax.h"
|
||||
|
||||
#define rp %rdi
|
||||
#define up %rsi
|
||||
#define n_param %rdx
|
||||
#define vl %rcx
|
||||
|
||||
#define n %r11
|
||||
|
||||
.text
|
||||
ENTRY (__mpn_mul_1)
|
||||
movq %rdx, %r11
|
||||
leaq (%rsi,%rdx,8), %rsi
|
||||
leaq (%rdi,%rdx,8), %rdi
|
||||
negq %r11
|
||||
xorl %r8d, %r8d
|
||||
L(loop):
|
||||
movq (%rsi,%r11,8), %rax
|
||||
mulq %rcx
|
||||
addq %r8, %rax
|
||||
movl $0, %r8d
|
||||
adcq %rdx, %r8
|
||||
movq %rax, (%rdi,%r11,8)
|
||||
incq %r11
|
||||
jne L(loop)
|
||||
movq %r8, %rax
|
||||
push %rbx
|
||||
cfi_adjust_cfa_offset (8)
|
||||
cfi_rel_offset (%rbx, 0)
|
||||
xor %r10, %r10
|
||||
mov (up), %rax /* read first u limb early */
|
||||
mov n_param, %rbx /* move away n from rdx, mul uses it */
|
||||
mul vl
|
||||
mov %rbx, %r11
|
||||
|
||||
add %r10, %rax
|
||||
adc $0, %rdx
|
||||
|
||||
and $3, %ebx
|
||||
jz L(b0)
|
||||
cmp $2, %ebx
|
||||
jz L(b2)
|
||||
jg L(b3)
|
||||
|
||||
L(b1): dec n
|
||||
jne L(gt1)
|
||||
mov %rax, (rp)
|
||||
jmp L(ret)
|
||||
L(gt1): lea 8(up,n,8), up
|
||||
lea -8(rp,n,8), rp
|
||||
neg n
|
||||
xor %r10, %r10
|
||||
xor %ebx, %ebx
|
||||
mov %rax, %r9
|
||||
mov (up,n,8), %rax
|
||||
mov %rdx, %r8
|
||||
jmp L(L1)
|
||||
|
||||
L(b0): lea (up,n,8), up
|
||||
lea -16(rp,n,8), rp
|
||||
neg n
|
||||
xor %r10, %r10
|
||||
mov %rax, %r8
|
||||
mov %rdx, %rbx
|
||||
jmp L(L0)
|
||||
|
||||
L(b3): lea -8(up,n,8), up
|
||||
lea -24(rp,n,8), rp
|
||||
neg n
|
||||
mov %rax, %rbx
|
||||
mov %rdx, %r10
|
||||
jmp L(L3)
|
||||
|
||||
L(b2): lea -16(up,n,8), up
|
||||
lea -32(rp,n,8), rp
|
||||
neg n
|
||||
xor %r8, %r8
|
||||
xor %ebx, %ebx
|
||||
mov %rax, %r10
|
||||
mov 24(up,n,8), %rax
|
||||
mov %rdx, %r9
|
||||
jmp L(L2)
|
||||
|
||||
.p2align 4
|
||||
L(top): mov %r10, (rp,n,8)
|
||||
add %rax, %r9
|
||||
mov (up,n,8), %rax
|
||||
adc %rdx, %r8
|
||||
mov $0, %r10d
|
||||
L(L1): mul vl
|
||||
mov %r9, 8(rp,n,8)
|
||||
add %rax, %r8
|
||||
adc %rdx, %rbx
|
||||
L(L0): mov 8(up,n,8), %rax
|
||||
mul vl
|
||||
mov %r8, 16(rp,n,8)
|
||||
add %rax, %rbx
|
||||
adc %rdx, %r10
|
||||
L(L3): mov 16(up,n,8), %rax
|
||||
mul vl
|
||||
mov %rbx, 24(rp,n,8)
|
||||
mov $0, %r8d # zero
|
||||
mov %r8, %rbx # zero
|
||||
add %rax, %r10
|
||||
mov 24(up,n,8), %rax
|
||||
mov %r8, %r9 # zero
|
||||
adc %rdx, %r9
|
||||
L(L2): mul vl
|
||||
add $4, n
|
||||
js L(top)
|
||||
|
||||
mov %r10, (rp,n,8)
|
||||
add %rax, %r9
|
||||
adc %r8, %rdx
|
||||
mov %r9, 8(rp,n,8)
|
||||
add %r8, %rdx
|
||||
L(ret): mov %rdx, %rax
|
||||
|
||||
pop %rbx
|
||||
cfi_adjust_cfa_offset (-8)
|
||||
cfi_restore (%rbx)
|
||||
ret
|
||||
END (__mpn_mul_1)
|
||||
|
@ -1,5 +1,5 @@
|
||||
/* AMD64 __mpn_rshift --
|
||||
Copyright (C) 2004, 2006 Free Software Foundation, Inc.
|
||||
/* x86-64 __mpn_rshift --
|
||||
Copyright (C) 2007, 2009 Free Software Foundation, Inc.
|
||||
This file is part of the GNU MP Library.
|
||||
|
||||
The GNU MP Library is free software; you can redistribute it and/or modify
|
||||
@ -20,43 +20,96 @@
|
||||
#include "sysdep.h"
|
||||
#include "asm-syntax.h"
|
||||
|
||||
#define rp %rdi
|
||||
#define up %rsi
|
||||
#define n %rdx
|
||||
#define cnt %cl
|
||||
|
||||
.text
|
||||
ENTRY (__mpn_rshift)
|
||||
movq (%rsi), %mm7
|
||||
movd %ecx, %mm1
|
||||
movl $64, %eax
|
||||
subl %ecx, %eax
|
||||
movd %eax, %mm0
|
||||
movq %mm7, %mm3
|
||||
psllq %mm0, %mm7
|
||||
movd %mm7, %rax
|
||||
leaq (%rsi,%rdx,8), %rsi
|
||||
leaq (%rdi,%rdx,8), %rdi
|
||||
negq %rdx
|
||||
addq $2, %rdx
|
||||
jg L(endo)
|
||||
.p2align 2
|
||||
L(loop):
|
||||
movq -8(%rsi,%rdx,8), %mm6
|
||||
movq %mm6, %mm2
|
||||
psllq %mm0, %mm6
|
||||
psrlq %mm1, %mm3
|
||||
por %mm6, %mm3
|
||||
movq %mm3, -16(%rdi,%rdx,8)
|
||||
je L(ende)
|
||||
movq (%rsi,%rdx,8), %mm7
|
||||
movq %mm7, %mm3
|
||||
psllq %mm0, %mm7
|
||||
psrlq %mm1, %mm2
|
||||
por %mm7, %mm2
|
||||
movq %mm2, -8(%rdi,%rdx,8)
|
||||
addq $2, %rdx
|
||||
jle L(loop)
|
||||
L(endo):
|
||||
movq %mm3, %mm2
|
||||
L(ende):
|
||||
psrlq %mm1, %mm2
|
||||
movq %mm2, -8(%rdi)
|
||||
emms
|
||||
mov %edx, %eax
|
||||
and $3, %eax
|
||||
jne L(nb00)
|
||||
L(b00): /* n = 4, 8, 12, ... */
|
||||
mov (up), %r10
|
||||
mov 8(up), %r11
|
||||
xor %eax, %eax
|
||||
shrd %cl, %r10, %rax
|
||||
mov 16(up), %r8
|
||||
lea 8(up), up
|
||||
lea -24(rp), rp
|
||||
sub $4, n
|
||||
jmp L(00)
|
||||
|
||||
L(nb00):/* n = 1, 5, 9, ... */
|
||||
cmp $2, %eax
|
||||
jae L(nb01)
|
||||
L(b01): mov (up), %r9
|
||||
xor %eax, %eax
|
||||
shrd %cl, %r9, %rax
|
||||
sub $2, n
|
||||
jb L(le1)
|
||||
mov 8(up), %r10
|
||||
mov 16(up), %r11
|
||||
lea 16(up), up
|
||||
lea -16(rp), rp
|
||||
jmp L(01)
|
||||
L(le1): shr %cl, %r9
|
||||
mov %r9, (rp)
|
||||
ret
|
||||
|
||||
L(nb01):/* n = 2, 6, 10, ... */
|
||||
jne L(b11)
|
||||
L(b10): mov (up), %r8
|
||||
mov 8(up), %r9
|
||||
xor %eax, %eax
|
||||
shrd %cl, %r8, %rax
|
||||
sub $3, n
|
||||
jb L(le2)
|
||||
mov 16(up), %r10
|
||||
lea 24(up), up
|
||||
lea -8(rp), rp
|
||||
jmp L(10)
|
||||
L(le2): shrd %cl, %r9, %r8
|
||||
mov %r8, (rp)
|
||||
shr %cl, %r9
|
||||
mov %r9, 8(rp)
|
||||
ret
|
||||
|
||||
.p2align 4
|
||||
L(b11): /* n = 3, 7, 11, ... */
|
||||
mov (up), %r11
|
||||
mov 8(up), %r8
|
||||
xor %eax, %eax
|
||||
shrd %cl, %r11, %rax
|
||||
mov 16(up), %r9
|
||||
lea 32(up), up
|
||||
sub $4, n
|
||||
jb L(end)
|
||||
|
||||
.p2align 4
|
||||
L(top): shrd %cl, %r8, %r11
|
||||
mov -8(up), %r10
|
||||
mov %r11, (rp)
|
||||
L(10): shrd %cl, %r9, %r8
|
||||
mov (up), %r11
|
||||
mov %r8, 8(rp)
|
||||
L(01): shrd %cl, %r10, %r9
|
||||
mov 8(up), %r8
|
||||
mov %r9, 16(rp)
|
||||
L(00): shrd %cl, %r11, %r10
|
||||
mov 16(up), %r9
|
||||
mov %r10, 24(rp)
|
||||
add $32, up
|
||||
lea 32(rp), rp
|
||||
sub $4, n
|
||||
jnc L(top)
|
||||
|
||||
L(end): shrd %cl, %r8, %r11
|
||||
mov %r11, (rp)
|
||||
shrd %cl, %r9, %r8
|
||||
mov %r8, 8(rp)
|
||||
shr %cl, %r9
|
||||
mov %r9, 16(rp)
|
||||
ret
|
||||
END (__mpn_rshift)
|
||||
|
@ -1,6 +1,6 @@
|
||||
/* AMD64 __mpn_sub_n -- Add two limb vectors of the same length > 0 and store
|
||||
/* x86-64 __mpn_sub_n -- Add two limb vectors of the same length > 0 and store
|
||||
sum in a third limb vector.
|
||||
Copyright (C) 2004 Free Software Foundation, Inc.
|
||||
Copyright (C) 2006, 2007 Free Software Foundation, Inc.
|
||||
This file is part of the GNU MP Library.
|
||||
|
||||
The GNU MP Library is free software; you can redistribute it and/or modify
|
||||
@ -18,25 +18,7 @@
|
||||
the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
|
||||
MA 02111-1307, USA. */
|
||||
|
||||
#include "sysdep.h"
|
||||
#include "asm-syntax.h"
|
||||
#define func __mpn_sub_n
|
||||
#define ADCSBB sbb
|
||||
|
||||
.text
|
||||
ENTRY (__mpn_sub_n)
|
||||
leaq (%rsi,%rcx,8), %rsi
|
||||
leaq (%rdi,%rcx,8), %rdi
|
||||
leaq (%rdx,%rcx,8), %rdx
|
||||
negq %rcx
|
||||
xorl %eax, %eax # clear cy
|
||||
.p2align 2
|
||||
L(loop):
|
||||
movq (%rsi,%rcx,8), %rax
|
||||
movq (%rdx,%rcx,8), %r10
|
||||
sbbq %r10, %rax
|
||||
movq %rax, (%rdi,%rcx,8)
|
||||
incq %rcx
|
||||
jne L(loop)
|
||||
movq %rcx, %rax # zero %rax
|
||||
adcq %rax, %rax
|
||||
ret
|
||||
END (__mpn_sub_n)
|
||||
#include "add_n.S"
|
||||
|
@ -1,6 +1,6 @@
|
||||
/* AMD64 __mpn_submul_1 -- Multiply a limb vector with a limb and subtract
|
||||
/* x86-64 __mpn_submul_1 -- Multiply a limb vector with a limb and subtract
|
||||
the result from a second limb vector.
|
||||
Copyright (C) 2004 Free Software Foundation, Inc.
|
||||
Copyright (C) 2003,2004,2005,2007,2008,2009 Free Software Foundation, Inc.
|
||||
This file is part of the GNU MP Library.
|
||||
|
||||
The GNU MP Library is free software; you can redistribute it and/or modify
|
||||
@ -18,29 +18,7 @@
|
||||
the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
|
||||
MA 02111-1307, USA. */
|
||||
|
||||
#include "sysdep.h"
|
||||
#include "asm-syntax.h"
|
||||
#define func __mpn_submul_1
|
||||
#define ADDSUB sub
|
||||
|
||||
.text
|
||||
ENTRY (__mpn_submul_1)
|
||||
movq %rdx, %r11
|
||||
leaq (%rsi,%r11,8), %rsi
|
||||
leaq (%rdi,%r11,8), %rdi
|
||||
negq %r11
|
||||
xorl %r8d, %r8d
|
||||
.p2align 3
|
||||
L(loop):
|
||||
movq (%rsi,%r11,8), %rax
|
||||
movq (%rdi,%r11,8), %r10
|
||||
mulq %rcx
|
||||
subq %r8, %r10
|
||||
movl $0, %r8d
|
||||
adcl %r8d, %r8d
|
||||
subq %rax, %r10
|
||||
adcq %rdx, %r8
|
||||
movq %r10, (%rdi,%r11,8)
|
||||
incq %r11
|
||||
jne L(loop)
|
||||
movq %r8, %rax
|
||||
ret
|
||||
END (__mpn_submul_1)
|
||||
#include "addmul_1.S"
|
||||
|
Loading…
x
Reference in New Issue
Block a user