Use mvcle for big blocks (> 64K) and a mvc loop for small blocks.
This commit is contained in:
parent
a6d214835f
commit
3c204435e9
@ -34,12 +34,14 @@ ENTRY(__bcopy)
|
|||||||
jnl .L0
|
jnl .L0
|
||||||
alr %r1,%r2
|
alr %r1,%r2
|
||||||
clr %r1,%r3
|
clr %r1,%r3
|
||||||
jh .L5
|
jh .L7
|
||||||
.L0: ahi %r4,-1 # length - 1
|
.L0: ahi %r4,-1 # length - 1
|
||||||
lr %r1,%r4
|
lr %r1,%r4
|
||||||
srl %r1,8
|
srl %r1,8
|
||||||
ltr %r1,%r1
|
ltr %r1,%r1 # < 256 bytes to move ?
|
||||||
jz .L2
|
jz .L2
|
||||||
|
chi %r1,255 # > 1MB to move ?
|
||||||
|
jh .L5
|
||||||
.L1: mvc 0(256,%r3),0(%r2) # move in 256 byte chunks
|
.L1: mvc 0(256,%r3),0(%r2) # move in 256 byte chunks
|
||||||
la %r2,256(%r2)
|
la %r2,256(%r2)
|
||||||
la %r3,256(%r3)
|
la %r3,256(%r3)
|
||||||
@ -49,22 +51,31 @@ ENTRY(__bcopy)
|
|||||||
.L3: ex %r4,0(%r1) # execute mvc with length ((%r4)&255)+1
|
.L3: ex %r4,0(%r1) # execute mvc with length ((%r4)&255)+1
|
||||||
.L4: br %r14
|
.L4: br %r14
|
||||||
|
|
||||||
.L5: # destructive overlay, can not use mvcle
|
# data copies > 1MB are faster with mvcle.
|
||||||
|
.L5: ahi %r4,1 # length + 1
|
||||||
|
lr %r5,%r4 # source length
|
||||||
|
lr %r4,%r2 # source address
|
||||||
|
lr %r2,%r3 # set destination
|
||||||
|
lr %r3,%r5 # destination length = source length
|
||||||
|
.L6: mvcle %r2,%r4,0 # thats it, MVCLE is your friend
|
||||||
|
jo .L6
|
||||||
|
br %r14
|
||||||
|
.L7: # destructive overlay, can not use mvcle
|
||||||
lr %r1,%r2 # bcopy is called with source,dest
|
lr %r1,%r2 # bcopy is called with source,dest
|
||||||
lr %r2,%r3 # memmove with dest,source! Oh, well...
|
lr %r2,%r3 # memmove with dest,source! Oh, well...
|
||||||
lr %r3,%r1
|
lr %r3,%r1
|
||||||
basr %r1,0
|
basr %r1,0
|
||||||
.L6:
|
.L8:
|
||||||
#ifdef PIC
|
#ifdef PIC
|
||||||
al %r1,.L7-.L6(%r1) # get address of global offset table
|
al %r1,.L9-.L8(%r1) # get address of global offset table
|
||||||
# load address of memmove
|
# load address of memmove
|
||||||
l %r1,memmove@GOT12(%r1)
|
l %r1,memmove@GOT12(%r1)
|
||||||
br %r1
|
br %r1
|
||||||
.L7: .long _GLOBAL_OFFSET_TABLE_-.L6
|
.L9: .long _GLOBAL_OFFSET_TABLE_-.L8
|
||||||
#else
|
#else
|
||||||
al %r1,.L7-.L6(%r1) # load address of memmove
|
al %r1,.L9-.L8(%r1) # load address of memmove
|
||||||
br %r1 # jump to memmove
|
br %r1 # jump to memmove
|
||||||
.L7: .long memmove-.L6
|
.L9: .long memmove-.L8
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
END(__bcopy)
|
END(__bcopy)
|
||||||
|
@ -33,8 +33,11 @@ ENTRY(memcpy)
|
|||||||
ahi %r4,-1 # length - 1
|
ahi %r4,-1 # length - 1
|
||||||
lr %r1,%r2 # copy destination address
|
lr %r1,%r2 # copy destination address
|
||||||
lr %r5,%r4
|
lr %r5,%r4
|
||||||
sra %r5,8
|
srl %r5,8
|
||||||
|
ltr %r5,%r5 # < 256 bytes to move ?
|
||||||
jz .L1
|
jz .L1
|
||||||
|
chi %r5,255 # > 1MB to move ?
|
||||||
|
jh .L4
|
||||||
.L0: mvc 0(256,%r1),0(%r3) # move in 256 byte chunks
|
.L0: mvc 0(256,%r1),0(%r3) # move in 256 byte chunks
|
||||||
la %r1,256(%r1)
|
la %r1,256(%r1)
|
||||||
la %r3,256(%r3)
|
la %r3,256(%r3)
|
||||||
@ -43,5 +46,15 @@ ENTRY(memcpy)
|
|||||||
mvc 0(1,%r1),0(%r3) # instruction for execute
|
mvc 0(1,%r1),0(%r3) # instruction for execute
|
||||||
.L2: ex %r4,0(%r5) # execute mvc with length ((%r4)&255)+1
|
.L2: ex %r4,0(%r5) # execute mvc with length ((%r4)&255)+1
|
||||||
.L3: br %r14
|
.L3: br %r14
|
||||||
|
# data copies > 1MB are faster with mvcle.
|
||||||
|
.L4: ahi %r4,1 # length + 1
|
||||||
|
lr %r5,%r4 # source length
|
||||||
|
lr %r4,%r2 # source address
|
||||||
|
lr %r2,%r3 # set destination
|
||||||
|
lr %r3,%r5 # destination length = source length
|
||||||
|
.L5: mvcle %r2,%r4,0 # thats it, MVCLE is your friend
|
||||||
|
jo .L5
|
||||||
|
lr %r2,%r1 # return destination address
|
||||||
|
br %r14
|
||||||
END(memcpy)
|
END(memcpy)
|
||||||
|
|
||||||
|
@ -34,11 +34,13 @@ ENTRY(__bcopy)
|
|||||||
jnl .L0
|
jnl .L0
|
||||||
algr %r1,%r2
|
algr %r1,%r2
|
||||||
clgr %r1,%r3
|
clgr %r1,%r3
|
||||||
jh .L5
|
jh .L7
|
||||||
.L0: aghi %r4,-1 # length - 1
|
.L0: aghi %r4,-1 # length - 1
|
||||||
srlg %r1,%r4,8
|
srlg %r1,%r4,8
|
||||||
ltgr %r1,%r1
|
ltgr %r1,%r1 # < 256 bytes to move ?
|
||||||
jz .L2
|
jz .L2
|
||||||
|
cghi %r1,255 # > 1MB to move ?
|
||||||
|
jh .L5
|
||||||
.L1: mvc 0(256,%r3),0(%r2) # move in 256 byte chunks
|
.L1: mvc 0(256,%r3),0(%r2) # move in 256 byte chunks
|
||||||
la %r2,256(%r2)
|
la %r2,256(%r2)
|
||||||
la %r3,256(%r3)
|
la %r3,256(%r3)
|
||||||
@ -47,8 +49,16 @@ ENTRY(__bcopy)
|
|||||||
mvc 0(1,%r3),0(%r2) # instruction for execute
|
mvc 0(1,%r3),0(%r2) # instruction for execute
|
||||||
.L3: ex %r4,0(%r1) # execute mvc with length ((%r4)&255)+1
|
.L3: ex %r4,0(%r1) # execute mvc with length ((%r4)&255)+1
|
||||||
.L4: br %r14
|
.L4: br %r14
|
||||||
|
# data copies > 1MB are faster with mvcle.
|
||||||
.L5: # destructive overlay, can not use mvcle
|
.L5: aghi %r4,1 # length + 1
|
||||||
|
lgr %r5,%r4 # source length
|
||||||
|
lgr %r4,%r2 # source address
|
||||||
|
lgr %r2,%r3 # set destination
|
||||||
|
lgr %r3,%r5 # destination length = source length
|
||||||
|
.L6: mvcle %r2,%r4,0 # thats it, MVCLE is your friend
|
||||||
|
jo .L6
|
||||||
|
br %r14
|
||||||
|
.L7: # destructive overlay, can not use mvcle
|
||||||
lgr %r1,%r2 # bcopy is called with source,dest
|
lgr %r1,%r2 # bcopy is called with source,dest
|
||||||
lgr %r2,%r3 # memmove with dest,source! Oh, well...
|
lgr %r2,%r3 # memmove with dest,source! Oh, well...
|
||||||
lgr %r3,%r1
|
lgr %r3,%r1
|
||||||
|
@ -32,8 +32,11 @@ ENTRY(memcpy)
|
|||||||
jz .L3
|
jz .L3
|
||||||
aghi %r4,-1 # length - 1
|
aghi %r4,-1 # length - 1
|
||||||
lgr %r1,%r2 # copy destination address
|
lgr %r1,%r2 # copy destination address
|
||||||
srag %r5,%r4,8
|
srlg %r5,%r4,8
|
||||||
|
ltgr %r5,%r5 # < 256 bytes to mvoe ?
|
||||||
jz .L1
|
jz .L1
|
||||||
|
chi %r6,255 # > 1 MB to move ?
|
||||||
|
jh .L4
|
||||||
.L0: mvc 0(256,%r1),0(%r3) # move in 256 byte chunks
|
.L0: mvc 0(256,%r1),0(%r3) # move in 256 byte chunks
|
||||||
la %r1,256(%r1)
|
la %r1,256(%r1)
|
||||||
la %r3,256(%r3)
|
la %r3,256(%r3)
|
||||||
@ -42,5 +45,15 @@ ENTRY(memcpy)
|
|||||||
mvc 0(1,%r1),0(%r3) # instruction for execute
|
mvc 0(1,%r1),0(%r3) # instruction for execute
|
||||||
.L2: ex %r4,0(%r5) # execute mvc with length ((%r4)&255)+1
|
.L2: ex %r4,0(%r5) # execute mvc with length ((%r4)&255)+1
|
||||||
.L3: br %r14
|
.L3: br %r14
|
||||||
|
# data copies > 1MB are faster with mvcle.
|
||||||
|
.L4: aghi %r4,1 # length + 1
|
||||||
|
lgr %r5,%r4 # source length
|
||||||
|
lgr %r4,%r2 # source address
|
||||||
|
lgr %r2,%r3 # set destination
|
||||||
|
lgr %r3,%r5 # destination length = source length
|
||||||
|
.L5: mvcle %r2,%r4,0 # thats it, MVCLE is your friend
|
||||||
|
jo .L5
|
||||||
|
lgr %r2,%r1 # return destination address
|
||||||
|
br %r14
|
||||||
END(memcpy)
|
END(memcpy)
|
||||||
|
|
||||||
|
Loading…
x
Reference in New Issue
Block a user