1996-03-01 18:45:35 +00:00
|
|
|
! SPARC __mpn_sub_n -- Subtract two limb vectors of the same length > 0 and
|
1995-02-18 01:27:10 +00:00
|
|
|
! store difference in a third limb vector.
|
|
|
|
|
1996-03-01 18:45:35 +00:00
|
|
|
! Copyright (C) 1995, 1996 Free Software Foundation, Inc.
|
1995-02-18 01:27:10 +00:00
|
|
|
|
|
|
|
! This file is part of the GNU MP Library.
|
|
|
|
|
|
|
|
! The GNU MP Library is free software; you can redistribute it and/or modify
|
|
|
|
! it under the terms of the GNU Library General Public License as published by
|
|
|
|
! the Free Software Foundation; either version 2 of the License, or (at your
|
|
|
|
! option) any later version.
|
|
|
|
|
|
|
|
! The GNU MP Library is distributed in the hope that it will be useful, but
|
|
|
|
! WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
|
|
|
|
! or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Library General Public
|
|
|
|
! License for more details.
|
|
|
|
|
|
|
|
! You should have received a copy of the GNU Library General Public License
|
|
|
|
! along with the GNU MP Library; see the file COPYING.LIB. If not, write to
|
|
|
|
! the Free Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
|
|
|
|
|
|
|
|
|
|
|
|
! INPUT PARAMETERS
|
1995-12-04 18:37:56 +00:00
|
|
|
#define res_ptr %o0
|
|
|
|
#define s1_ptr %o1
|
|
|
|
#define s2_ptr %o2
|
|
|
|
#define size %o3
|
1995-02-18 01:27:10 +00:00
|
|
|
|
|
|
|
#include "sysdep.h"
|
|
|
|
|
|
|
|
.text
|
|
|
|
.align 4
|
|
|
|
.global C_SYMBOL_NAME(__mpn_sub_n)
|
|
|
|
C_SYMBOL_NAME(__mpn_sub_n):
|
1995-12-04 18:37:56 +00:00
|
|
|
xor s2_ptr,res_ptr,%g1
|
|
|
|
andcc %g1,4,%g0
|
|
|
|
bne L1 ! branch if alignment differs
|
|
|
|
nop
|
|
|
|
! ** V1a **
|
|
|
|
andcc res_ptr,4,%g0 ! res_ptr unaligned? Side effect: cy=0
|
1996-03-01 18:45:35 +00:00
|
|
|
be L_v1 ! if no, branch
|
1995-12-04 18:37:56 +00:00
|
|
|
nop
|
|
|
|
/* Add least significant limb separately to align res_ptr and s2_ptr */
|
|
|
|
ld [s1_ptr],%g4
|
|
|
|
add s1_ptr,4,s1_ptr
|
|
|
|
ld [s2_ptr],%g2
|
|
|
|
add s2_ptr,4,s2_ptr
|
|
|
|
add size,-1,size
|
|
|
|
subcc %g4,%g2,%o4
|
|
|
|
st %o4,[res_ptr]
|
|
|
|
add res_ptr,4,res_ptr
|
|
|
|
L_v1: addx %g0,%g0,%o4 ! save cy in register
|
|
|
|
cmp size,2 ! if size < 2 ...
|
|
|
|
bl Lend2 ! ... branch to tail code
|
|
|
|
subcc %g0,%o4,%g0 ! restore cy
|
|
|
|
|
|
|
|
ld [s1_ptr+0],%g4
|
|
|
|
addcc size,-10,size
|
|
|
|
ld [s1_ptr+4],%g1
|
|
|
|
ldd [s2_ptr+0],%g2
|
|
|
|
blt Lfin1
|
|
|
|
subcc %g0,%o4,%g0 ! restore cy
|
|
|
|
/* Add blocks of 8 limbs until less than 8 limbs remain */
|
|
|
|
Loop1: subxcc %g4,%g2,%o4
|
|
|
|
ld [s1_ptr+8],%g4
|
|
|
|
subxcc %g1,%g3,%o5
|
|
|
|
ld [s1_ptr+12],%g1
|
|
|
|
ldd [s2_ptr+8],%g2
|
|
|
|
std %o4,[res_ptr+0]
|
|
|
|
subxcc %g4,%g2,%o4
|
|
|
|
ld [s1_ptr+16],%g4
|
|
|
|
subxcc %g1,%g3,%o5
|
|
|
|
ld [s1_ptr+20],%g1
|
|
|
|
ldd [s2_ptr+16],%g2
|
|
|
|
std %o4,[res_ptr+8]
|
|
|
|
subxcc %g4,%g2,%o4
|
|
|
|
ld [s1_ptr+24],%g4
|
|
|
|
subxcc %g1,%g3,%o5
|
|
|
|
ld [s1_ptr+28],%g1
|
|
|
|
ldd [s2_ptr+24],%g2
|
|
|
|
std %o4,[res_ptr+16]
|
|
|
|
subxcc %g4,%g2,%o4
|
|
|
|
ld [s1_ptr+32],%g4
|
|
|
|
subxcc %g1,%g3,%o5
|
|
|
|
ld [s1_ptr+36],%g1
|
|
|
|
ldd [s2_ptr+32],%g2
|
|
|
|
std %o4,[res_ptr+24]
|
|
|
|
addx %g0,%g0,%o4 ! save cy in register
|
|
|
|
addcc size,-8,size
|
|
|
|
add s1_ptr,32,s1_ptr
|
|
|
|
add s2_ptr,32,s2_ptr
|
|
|
|
add res_ptr,32,res_ptr
|
|
|
|
bge Loop1
|
|
|
|
subcc %g0,%o4,%g0 ! restore cy
|
|
|
|
|
|
|
|
Lfin1: addcc size,8-2,size
|
|
|
|
blt Lend1
|
|
|
|
subcc %g0,%o4,%g0 ! restore cy
|
|
|
|
/* Add blocks of 2 limbs until less than 2 limbs remain */
|
|
|
|
Loope1: subxcc %g4,%g2,%o4
|
|
|
|
ld [s1_ptr+8],%g4
|
|
|
|
subxcc %g1,%g3,%o5
|
|
|
|
ld [s1_ptr+12],%g1
|
|
|
|
ldd [s2_ptr+8],%g2
|
|
|
|
std %o4,[res_ptr+0]
|
|
|
|
addx %g0,%g0,%o4 ! save cy in register
|
|
|
|
addcc size,-2,size
|
|
|
|
add s1_ptr,8,s1_ptr
|
|
|
|
add s2_ptr,8,s2_ptr
|
|
|
|
add res_ptr,8,res_ptr
|
|
|
|
bge Loope1
|
|
|
|
subcc %g0,%o4,%g0 ! restore cy
|
|
|
|
Lend1: subxcc %g4,%g2,%o4
|
|
|
|
subxcc %g1,%g3,%o5
|
|
|
|
std %o4,[res_ptr+0]
|
|
|
|
addx %g0,%g0,%o4 ! save cy in register
|
|
|
|
|
|
|
|
andcc size,1,%g0
|
|
|
|
be Lret1
|
|
|
|
subcc %g0,%o4,%g0 ! restore cy
|
|
|
|
/* Add last limb */
|
|
|
|
ld [s1_ptr+8],%g4
|
|
|
|
ld [s2_ptr+8],%g2
|
|
|
|
subxcc %g4,%g2,%o4
|
|
|
|
st %o4,[res_ptr+8]
|
|
|
|
|
|
|
|
Lret1: retl
|
|
|
|
addx %g0,%g0,%o0 ! return carry-out from most sign. limb
|
|
|
|
|
|
|
|
L1: xor s1_ptr,res_ptr,%g1
|
|
|
|
andcc %g1,4,%g0
|
|
|
|
bne L2
|
|
|
|
nop
|
|
|
|
! ** V1b **
|
|
|
|
andcc res_ptr,4,%g0 ! res_ptr unaligned? Side effect: cy=0
|
1996-03-01 18:45:35 +00:00
|
|
|
be L_v1b ! if no, branch
|
1995-12-04 18:37:56 +00:00
|
|
|
nop
|
1996-03-01 18:45:35 +00:00
|
|
|
/* Add least significant limb separately to align res_ptr and s1_ptr */
|
1995-12-04 18:37:56 +00:00
|
|
|
ld [s2_ptr],%g4
|
|
|
|
add s2_ptr,4,s2_ptr
|
|
|
|
ld [s1_ptr],%g2
|
|
|
|
add s1_ptr,4,s1_ptr
|
|
|
|
add size,-1,size
|
|
|
|
subcc %g2,%g4,%o4
|
|
|
|
st %o4,[res_ptr]
|
|
|
|
add res_ptr,4,res_ptr
|
|
|
|
L_v1b: addx %g0,%g0,%o4 ! save cy in register
|
|
|
|
cmp size,2 ! if size < 2 ...
|
|
|
|
bl Lend2 ! ... branch to tail code
|
|
|
|
subcc %g0,%o4,%g0 ! restore cy
|
|
|
|
|
|
|
|
ld [s2_ptr+0],%g4
|
|
|
|
addcc size,-10,size
|
|
|
|
ld [s2_ptr+4],%g1
|
|
|
|
ldd [s1_ptr+0],%g2
|
|
|
|
blt Lfin1b
|
|
|
|
subcc %g0,%o4,%g0 ! restore cy
|
|
|
|
/* Add blocks of 8 limbs until less than 8 limbs remain */
|
|
|
|
Loop1b: subxcc %g2,%g4,%o4
|
|
|
|
ld [s2_ptr+8],%g4
|
|
|
|
subxcc %g3,%g1,%o5
|
|
|
|
ld [s2_ptr+12],%g1
|
|
|
|
ldd [s1_ptr+8],%g2
|
|
|
|
std %o4,[res_ptr+0]
|
|
|
|
subxcc %g2,%g4,%o4
|
|
|
|
ld [s2_ptr+16],%g4
|
|
|
|
subxcc %g3,%g1,%o5
|
|
|
|
ld [s2_ptr+20],%g1
|
|
|
|
ldd [s1_ptr+16],%g2
|
|
|
|
std %o4,[res_ptr+8]
|
|
|
|
subxcc %g2,%g4,%o4
|
|
|
|
ld [s2_ptr+24],%g4
|
|
|
|
subxcc %g3,%g1,%o5
|
|
|
|
ld [s2_ptr+28],%g1
|
|
|
|
ldd [s1_ptr+24],%g2
|
|
|
|
std %o4,[res_ptr+16]
|
|
|
|
subxcc %g2,%g4,%o4
|
|
|
|
ld [s2_ptr+32],%g4
|
|
|
|
subxcc %g3,%g1,%o5
|
|
|
|
ld [s2_ptr+36],%g1
|
|
|
|
ldd [s1_ptr+32],%g2
|
|
|
|
std %o4,[res_ptr+24]
|
|
|
|
addx %g0,%g0,%o4 ! save cy in register
|
|
|
|
addcc size,-8,size
|
|
|
|
add s1_ptr,32,s1_ptr
|
|
|
|
add s2_ptr,32,s2_ptr
|
|
|
|
add res_ptr,32,res_ptr
|
|
|
|
bge Loop1b
|
|
|
|
subcc %g0,%o4,%g0 ! restore cy
|
|
|
|
|
|
|
|
Lfin1b: addcc size,8-2,size
|
|
|
|
blt Lend1b
|
|
|
|
subcc %g0,%o4,%g0 ! restore cy
|
|
|
|
/* Add blocks of 2 limbs until less than 2 limbs remain */
|
|
|
|
Loope1b:subxcc %g2,%g4,%o4
|
|
|
|
ld [s2_ptr+8],%g4
|
|
|
|
subxcc %g3,%g1,%o5
|
|
|
|
ld [s2_ptr+12],%g1
|
|
|
|
ldd [s1_ptr+8],%g2
|
|
|
|
std %o4,[res_ptr+0]
|
|
|
|
addx %g0,%g0,%o4 ! save cy in register
|
|
|
|
addcc size,-2,size
|
|
|
|
add s1_ptr,8,s1_ptr
|
|
|
|
add s2_ptr,8,s2_ptr
|
|
|
|
add res_ptr,8,res_ptr
|
|
|
|
bge Loope1b
|
|
|
|
subcc %g0,%o4,%g0 ! restore cy
|
|
|
|
Lend1b: subxcc %g2,%g4,%o4
|
|
|
|
subxcc %g3,%g1,%o5
|
|
|
|
std %o4,[res_ptr+0]
|
|
|
|
addx %g0,%g0,%o4 ! save cy in register
|
|
|
|
|
|
|
|
andcc size,1,%g0
|
|
|
|
be Lret1b
|
|
|
|
subcc %g0,%o4,%g0 ! restore cy
|
|
|
|
/* Add last limb */
|
|
|
|
ld [s2_ptr+8],%g4
|
|
|
|
ld [s1_ptr+8],%g2
|
|
|
|
subxcc %g2,%g4,%o4
|
|
|
|
st %o4,[res_ptr+8]
|
|
|
|
|
|
|
|
Lret1b: retl
|
|
|
|
addx %g0,%g0,%o0 ! return carry-out from most sign. limb
|
|
|
|
|
|
|
|
! ** V2 **
|
|
|
|
/* If we come here, the alignment of s1_ptr and res_ptr as well as the
|
|
|
|
alignment of s2_ptr and res_ptr differ. Since there are only two ways
|
|
|
|
things can be aligned (that we care about) we now know that the alignment
|
|
|
|
of s1_ptr and s2_ptr are the same. */
|
|
|
|
|
|
|
|
L2: cmp size,1
|
|
|
|
be Ljone
|
|
|
|
nop
|
|
|
|
andcc s1_ptr,4,%g0 ! s1_ptr unaligned? Side effect: cy=0
|
1996-03-01 18:45:35 +00:00
|
|
|
be L_v2 ! if no, branch
|
1995-12-04 18:37:56 +00:00
|
|
|
nop
|
|
|
|
/* Add least significant limb separately to align s1_ptr and s2_ptr */
|
|
|
|
ld [s1_ptr],%g4
|
|
|
|
add s1_ptr,4,s1_ptr
|
|
|
|
ld [s2_ptr],%g2
|
|
|
|
add s2_ptr,4,s2_ptr
|
|
|
|
add size,-1,size
|
|
|
|
subcc %g4,%g2,%o4
|
|
|
|
st %o4,[res_ptr]
|
|
|
|
add res_ptr,4,res_ptr
|
|
|
|
|
|
|
|
L_v2: addx %g0,%g0,%o4 ! save cy in register
|
|
|
|
addcc size,-8,size
|
|
|
|
blt Lfin2
|
|
|
|
subcc %g0,%o4,%g0 ! restore cy
|
|
|
|
/* Add blocks of 8 limbs until less than 8 limbs remain */
|
|
|
|
Loop2: ldd [s1_ptr+0],%g2
|
|
|
|
ldd [s2_ptr+0],%o4
|
|
|
|
subxcc %g2,%o4,%g2
|
|
|
|
st %g2,[res_ptr+0]
|
|
|
|
subxcc %g3,%o5,%g3
|
|
|
|
st %g3,[res_ptr+4]
|
|
|
|
ldd [s1_ptr+8],%g2
|
|
|
|
ldd [s2_ptr+8],%o4
|
|
|
|
subxcc %g2,%o4,%g2
|
|
|
|
st %g2,[res_ptr+8]
|
|
|
|
subxcc %g3,%o5,%g3
|
|
|
|
st %g3,[res_ptr+12]
|
|
|
|
ldd [s1_ptr+16],%g2
|
|
|
|
ldd [s2_ptr+16],%o4
|
|
|
|
subxcc %g2,%o4,%g2
|
|
|
|
st %g2,[res_ptr+16]
|
|
|
|
subxcc %g3,%o5,%g3
|
|
|
|
st %g3,[res_ptr+20]
|
|
|
|
ldd [s1_ptr+24],%g2
|
|
|
|
ldd [s2_ptr+24],%o4
|
|
|
|
subxcc %g2,%o4,%g2
|
|
|
|
st %g2,[res_ptr+24]
|
|
|
|
subxcc %g3,%o5,%g3
|
|
|
|
st %g3,[res_ptr+28]
|
|
|
|
addx %g0,%g0,%o4 ! save cy in register
|
|
|
|
addcc size,-8,size
|
|
|
|
add s1_ptr,32,s1_ptr
|
|
|
|
add s2_ptr,32,s2_ptr
|
|
|
|
add res_ptr,32,res_ptr
|
|
|
|
bge Loop2
|
|
|
|
subcc %g0,%o4,%g0 ! restore cy
|
|
|
|
|
|
|
|
Lfin2: addcc size,8-2,size
|
|
|
|
blt Lend2
|
|
|
|
subcc %g0,%o4,%g0 ! restore cy
|
|
|
|
Loope2: ldd [s1_ptr+0],%g2
|
|
|
|
ldd [s2_ptr+0],%o4
|
|
|
|
subxcc %g2,%o4,%g2
|
|
|
|
st %g2,[res_ptr+0]
|
|
|
|
subxcc %g3,%o5,%g3
|
|
|
|
st %g3,[res_ptr+4]
|
|
|
|
addx %g0,%g0,%o4 ! save cy in register
|
|
|
|
addcc size,-2,size
|
|
|
|
add s1_ptr,8,s1_ptr
|
|
|
|
add s2_ptr,8,s2_ptr
|
|
|
|
add res_ptr,8,res_ptr
|
|
|
|
bge Loope2
|
|
|
|
subcc %g0,%o4,%g0 ! restore cy
|
|
|
|
Lend2: andcc size,1,%g0
|
|
|
|
be Lret2
|
|
|
|
subcc %g0,%o4,%g0 ! restore cy
|
|
|
|
/* Add last limb */
|
|
|
|
Ljone: ld [s1_ptr],%g4
|
|
|
|
ld [s2_ptr],%g2
|
|
|
|
subxcc %g4,%g2,%o4
|
|
|
|
st %o4,[res_ptr]
|
|
|
|
|
|
|
|
Lret2: retl
|
|
|
|
addx %g0,%g0,%o0 ! return carry-out from most sign. limb
|