glibc/sysdeps/i386/i686/multiarch/strcmp-ssse3.S

2222 lines
36 KiB
ArmAsm

/* strcmp with SSSE3
Copyright (C) 2010 Free Software Foundation, Inc.
Contributed by Intel Corporation.
This file is part of the GNU C Library.
The GNU C Library is free software; you can redistribute it and/or
modify it under the terms of the GNU Lesser General Public
License as published by the Free Software Foundation; either
version 2.1 of the License, or (at your option) any later version.
The GNU C Library is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
Lesser General Public License for more details.
You should have received a copy of the GNU Lesser General Public
License along with the GNU C Library; if not, write to the Free
Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
02111-1307 USA. */
#ifndef NOT_IN_libc
#include <sysdep.h>
#include "asm-syntax.h"
#define CFI_PUSH(REG) \
cfi_adjust_cfa_offset (4); \
cfi_rel_offset (REG, 0)
#define CFI_POP(REG) \
cfi_adjust_cfa_offset (-4); \
cfi_restore (REG)
#define PUSH(REG) pushl REG; CFI_PUSH (REG)
#define POP(REG) popl REG; CFI_POP (REG)
#ifndef USE_AS_STRNCMP
# ifndef STRCMP
# define STRCMP __strcmp_ssse3
# endif
# define STR1 4
# define STR2 STR1+4
# define RETURN ret; .p2align 4
# define UPDATE_STRNCMP_COUNTER
#else
# ifndef STRCMP
# define STRCMP __strncmp_ssse3
# endif
# define STR1 8
# define STR2 STR1+4
# define CNT STR2+4
# define RETURN POP (%ebp); ret; .p2align 4; CFI_PUSH (%ebp)
# define UPDATE_STRNCMP_COUNTER \
/* calculate left number to compare */ \
mov $16, %esi; \
sub %ecx, %esi; \
cmp %esi, %ebp; \
jbe L(more8byteseq); \
sub %esi, %ebp
#endif
.section .text.ssse3,"ax",@progbits
ENTRY (STRCMP)
#ifdef USE_AS_STRNCMP
PUSH (%ebp)
#endif
movl STR1(%esp), %edx
movl STR2(%esp), %eax
#ifdef USE_AS_STRNCMP
movl CNT(%esp), %ebp
cmp $16, %ebp
jb L(less16bytes_sncmp)
jmp L(more16bytes)
#endif
movzbl (%eax), %ecx
cmpb %cl, (%edx)
jne L(neq)
cmpl $0, %ecx
je L(eq)
movzbl 1(%eax), %ecx
cmpb %cl, 1(%edx)
jne L(neq)
cmpl $0, %ecx
je L(eq)
movzbl 2(%eax), %ecx
cmpb %cl, 2(%edx)
jne L(neq)
cmpl $0, %ecx
je L(eq)
movzbl 3(%eax), %ecx
cmpb %cl, 3(%edx)
jne L(neq)
cmpl $0, %ecx
je L(eq)
movzbl 4(%eax), %ecx
cmpb %cl, 4(%edx)
jne L(neq)
cmpl $0, %ecx
je L(eq)
movzbl 5(%eax), %ecx
cmpb %cl, 5(%edx)
jne L(neq)
cmpl $0, %ecx
je L(eq)
movzbl 6(%eax), %ecx
cmpb %cl, 6(%edx)
jne L(neq)
cmpl $0, %ecx
je L(eq)
movzbl 7(%eax), %ecx
cmpb %cl, 7(%edx)
jne L(neq)
cmpl $0, %ecx
je L(eq)
add $8, %edx
add $8, %eax
#ifdef USE_AS_STRNCMP
cmp $8, %ebp
lea -8(%ebp), %ebp
je L(eq)
L(more16bytes):
#endif
movl %edx, %ecx
and $0xfff, %ecx
cmp $0xff0, %ecx
ja L(crosspage)
mov %eax, %ecx
and $0xfff, %ecx
cmp $0xff0, %ecx
ja L(crosspage)
pxor %xmm0, %xmm0
movlpd (%eax), %xmm1
movlpd (%edx), %xmm2
movhpd 8(%eax), %xmm1
movhpd 8(%edx), %xmm2
pcmpeqb %xmm1, %xmm0
pcmpeqb %xmm2, %xmm1
psubb %xmm0, %xmm1
pmovmskb %xmm1, %ecx
sub $0xffff, %ecx
jnz L(less16bytes)
#ifdef USE_AS_STRNCMP
cmp $16, %ebp
lea -16(%ebp), %ebp
jbe L(eq)
#endif
add $16, %eax
add $16, %edx
L(crosspage):
PUSH (%ebx)
PUSH (%edi)
PUSH (%esi)
#ifdef USE_AS_STRNCMP
cfi_remember_state
#endif
movl %edx, %edi
movl %eax, %ecx
and $0xf, %ecx
and $0xf, %edi
xor %ecx, %eax
xor %edi, %edx
xor %ebx, %ebx
cmp %edi, %ecx
je L(ashr_0)
ja L(bigger)
or $0x20, %ebx
xchg %edx, %eax
xchg %ecx, %edi
L(bigger):
lea 15(%edi), %edi
sub %ecx, %edi
cmp $8, %edi
jle L(ashr_less_8)
cmp $14, %edi
je L(ashr_15)
cmp $13, %edi
je L(ashr_14)
cmp $12, %edi
je L(ashr_13)
cmp $11, %edi
je L(ashr_12)
cmp $10, %edi
je L(ashr_11)
cmp $9, %edi
je L(ashr_10)
L(ashr_less_8):
je L(ashr_9)
cmp $7, %edi
je L(ashr_8)
cmp $6, %edi
je L(ashr_7)
cmp $5, %edi
je L(ashr_6)
cmp $4, %edi
je L(ashr_5)
cmp $3, %edi
je L(ashr_4)
cmp $2, %edi
je L(ashr_3)
cmp $1, %edi
je L(ashr_2)
cmp $0, %edi
je L(ashr_1)
/*
* The following cases will be handled by ashr_0
* ecx(offset of esi) eax(offset of edi) relative offset corresponding case
* n(0~15) n(0~15) 15(15+ n-n) ashr_0
*/
.p2align 4
L(ashr_0):
mov $0xffff, %esi
movdqa (%eax), %xmm1
pxor %xmm0, %xmm0
pcmpeqb %xmm1, %xmm0
pcmpeqb (%edx), %xmm1
psubb %xmm0, %xmm1
pmovmskb %xmm1, %edi
shr %cl, %esi
shr %cl, %edi
sub %edi, %esi
mov %ecx, %edi
jne L(less32bytes)
UPDATE_STRNCMP_COUNTER
mov $0x10, %ebx
mov $0x10, %ecx
pxor %xmm0, %xmm0
.p2align 4
L(loop_ashr_0):
movdqa (%eax, %ecx), %xmm1
movdqa (%edx, %ecx), %xmm2
pcmpeqb %xmm1, %xmm0
pcmpeqb %xmm2, %xmm1
psubb %xmm0, %xmm1
pmovmskb %xmm1, %esi
sub $0xffff, %esi
jnz L(exit)
#ifdef USE_AS_STRNCMP
cmp $16, %ebp
lea -16(%ebp), %ebp
jbe L(more8byteseq)
#endif
add $16, %ecx
jmp L(loop_ashr_0)
/*
* The following cases will be handled by ashr_1
* ecx(offset of esi) eax(offset of edi) relative offset corresponding case
* n(15) n -15 0(15 +(n-15) - n) ashr_1
*/
.p2align 4
L(ashr_1):
mov $0xffff, %esi
pxor %xmm0, %xmm0
movdqa (%edx), %xmm2
movdqa (%eax), %xmm1
pcmpeqb %xmm1, %xmm0
pslldq $15, %xmm2
pcmpeqb %xmm1, %xmm2
psubb %xmm0, %xmm2
pmovmskb %xmm2, %edi
shr %cl, %esi
shr %cl, %edi
sub %edi, %esi
lea -15(%ecx), %edi
jnz L(less32bytes)
UPDATE_STRNCMP_COUNTER
movdqa (%edx), %xmm3
pxor %xmm0, %xmm0
mov $16, %ecx
or $1, %ebx
lea 1(%edx), %edi
and $0xfff, %edi
sub $0x1000, %edi
.p2align 4
L(loop_ashr_1):
add $16, %edi
jg L(nibble_ashr_1)
L(gobble_ashr_1):
movdqa (%eax, %ecx), %xmm1
movdqa (%edx, %ecx), %xmm2
movdqa %xmm2, %xmm4
palignr $1, %xmm3, %xmm2
pcmpeqb %xmm1, %xmm0
pcmpeqb %xmm2, %xmm1
psubb %xmm0, %xmm1
pmovmskb %xmm1, %esi
sub $0xffff, %esi
jnz L(exit)
#ifdef USE_AS_STRNCMP
cmp $16, %ebp
lea -16(%ebp), %ebp
jbe L(more8byteseq)
#endif
add $16, %ecx
movdqa %xmm4, %xmm3
add $16, %edi
jg L(nibble_ashr_1)
movdqa (%eax, %ecx), %xmm1
movdqa (%edx, %ecx), %xmm2
movdqa %xmm2, %xmm4
palignr $1, %xmm3, %xmm2
pcmpeqb %xmm1, %xmm0
pcmpeqb %xmm2, %xmm1
psubb %xmm0, %xmm1
pmovmskb %xmm1, %esi
sub $0xffff, %esi
jnz L(exit)
#ifdef USE_AS_STRNCMP
cmp $16, %ebp
lea -16(%ebp), %ebp
jbe L(more8byteseq)
#endif
add $16, %ecx
movdqa %xmm4, %xmm3
jmp L(loop_ashr_1)
.p2align 4
L(nibble_ashr_1):
pcmpeqb %xmm3, %xmm0
pmovmskb %xmm0, %esi
test $0xfffe, %esi
jnz L(ashr_1_exittail)
#ifdef USE_AS_STRNCMP
cmp $15, %ebp
jbe L(ashr_1_exittail)
#endif
pxor %xmm0, %xmm0
sub $0x1000, %edi
jmp L(gobble_ashr_1)
.p2align 4
L(ashr_1_exittail):
movdqa (%eax, %ecx), %xmm1
psrldq $1, %xmm0
psrldq $1, %xmm3
jmp L(aftertail)
/*
* The following cases will be handled by ashr_2
* ecx(offset of esi) eax(offset of edi) relative offset corresponding case
* n(14~15) n -14 1(15 +(n-14) - n) ashr_2
*/
.p2align 4
L(ashr_2):
mov $0xffff, %esi
pxor %xmm0, %xmm0
movdqa (%edx), %xmm2
movdqa (%eax), %xmm1
pcmpeqb %xmm1, %xmm0
pslldq $14, %xmm2
pcmpeqb %xmm1, %xmm2
psubb %xmm0, %xmm2
pmovmskb %xmm2, %edi
shr %cl, %esi
shr %cl, %edi
sub %edi, %esi
lea -14(%ecx), %edi
jnz L(less32bytes)
UPDATE_STRNCMP_COUNTER
movdqa (%edx), %xmm3
pxor %xmm0, %xmm0
mov $16, %ecx
or $2, %ebx
lea 2(%edx), %edi
and $0xfff, %edi
sub $0x1000, %edi
.p2align 4
L(loop_ashr_2):
add $16, %edi
jg L(nibble_ashr_2)
L(gobble_ashr_2):
movdqa (%eax, %ecx), %xmm1
movdqa (%edx, %ecx), %xmm2
movdqa %xmm2, %xmm4
palignr $2, %xmm3, %xmm2
pcmpeqb %xmm1, %xmm0
pcmpeqb %xmm2, %xmm1
psubb %xmm0, %xmm1
pmovmskb %xmm1, %esi
sub $0xffff, %esi
jnz L(exit)
#ifdef USE_AS_STRNCMP
cmp $16, %ebp
lea -16(%ebp), %ebp
jbe L(more8byteseq)
#endif
add $16, %ecx
movdqa %xmm4, %xmm3
add $16, %edi
jg L(nibble_ashr_2)
movdqa (%eax, %ecx), %xmm1
movdqa (%edx, %ecx), %xmm2
movdqa %xmm2, %xmm4
palignr $2, %xmm3, %xmm2
pcmpeqb %xmm1, %xmm0
pcmpeqb %xmm2, %xmm1
psubb %xmm0, %xmm1
pmovmskb %xmm1, %esi
sub $0xffff, %esi
jnz L(exit)
#ifdef USE_AS_STRNCMP
cmp $16, %ebp
lea -16(%ebp), %ebp
jbe L(more8byteseq)
#endif
add $16, %ecx
movdqa %xmm4, %xmm3
jmp L(loop_ashr_2)
.p2align 4
L(nibble_ashr_2):
pcmpeqb %xmm3, %xmm0
pmovmskb %xmm0, %esi
test $0xfffc, %esi
jnz L(ashr_2_exittail)
#ifdef USE_AS_STRNCMP
cmp $14, %ebp
jbe L(ashr_2_exittail)
#endif
pxor %xmm0, %xmm0
sub $0x1000, %edi
jmp L(gobble_ashr_2)
.p2align 4
L(ashr_2_exittail):
movdqa (%eax, %ecx), %xmm1
psrldq $2, %xmm0
psrldq $2, %xmm3
jmp L(aftertail)
/*
* The following cases will be handled by ashr_3
* ecx(offset of esi) eax(offset of edi) relative offset corresponding case
* n(13~15) n -13 2(15 +(n-13) - n) ashr_3
*/
.p2align 4
L(ashr_3):
mov $0xffff, %esi
pxor %xmm0, %xmm0
movdqa (%edx), %xmm2
movdqa (%eax), %xmm1
pcmpeqb %xmm1, %xmm0
pslldq $13, %xmm2
pcmpeqb %xmm1, %xmm2
psubb %xmm0, %xmm2
pmovmskb %xmm2, %edi
shr %cl, %esi
shr %cl, %edi
sub %edi, %esi
lea -13(%ecx), %edi
jnz L(less32bytes)
UPDATE_STRNCMP_COUNTER
movdqa (%edx), %xmm3
pxor %xmm0, %xmm0
mov $16, %ecx
or $3, %ebx
lea 3(%edx), %edi
and $0xfff, %edi
sub $0x1000, %edi
.p2align 4
L(loop_ashr_3):
add $16, %edi
jg L(nibble_ashr_3)
L(gobble_ashr_3):
movdqa (%eax, %ecx), %xmm1
movdqa (%edx, %ecx), %xmm2
movdqa %xmm2, %xmm4
palignr $3, %xmm3, %xmm2
pcmpeqb %xmm1, %xmm0
pcmpeqb %xmm2, %xmm1
psubb %xmm0, %xmm1
pmovmskb %xmm1, %esi
sub $0xffff, %esi
jnz L(exit)
#ifdef USE_AS_STRNCMP
cmp $16, %ebp
lea -16(%ebp), %ebp
jbe L(more8byteseq)
#endif
add $16, %ecx
movdqa %xmm4, %xmm3
add $16, %edi
jg L(nibble_ashr_3)
movdqa (%eax, %ecx), %xmm1
movdqa (%edx, %ecx), %xmm2
movdqa %xmm2, %xmm4
palignr $3, %xmm3, %xmm2
pcmpeqb %xmm1, %xmm0
pcmpeqb %xmm2, %xmm1
psubb %xmm0, %xmm1
pmovmskb %xmm1, %esi
sub $0xffff, %esi
jnz L(exit)
#ifdef USE_AS_STRNCMP
cmp $16, %ebp
lea -16(%ebp), %ebp
jbe L(more8byteseq)
#endif
add $16, %ecx
movdqa %xmm4, %xmm3
jmp L(loop_ashr_3)
.p2align 4
L(nibble_ashr_3):
pcmpeqb %xmm3, %xmm0
pmovmskb %xmm0, %esi
test $0xfff8, %esi
jnz L(ashr_3_exittail)
#ifdef USE_AS_STRNCMP
cmp $13, %ebp
jbe L(ashr_3_exittail)
#endif
pxor %xmm0, %xmm0
sub $0x1000, %edi
jmp L(gobble_ashr_3)
.p2align 4
L(ashr_3_exittail):
movdqa (%eax, %ecx), %xmm1
psrldq $3, %xmm0
psrldq $3, %xmm3
jmp L(aftertail)
/*
* The following cases will be handled by ashr_4
* ecx(offset of esi) eax(offset of edi) relative offset corresponding case
* n(12~15) n -12 3(15 +(n-12) - n) ashr_4
*/
.p2align 4
L(ashr_4):
mov $0xffff, %esi
pxor %xmm0, %xmm0
movdqa (%edx), %xmm2
movdqa (%eax), %xmm1
pcmpeqb %xmm1, %xmm0
pslldq $12, %xmm2
pcmpeqb %xmm1, %xmm2
psubb %xmm0, %xmm2
pmovmskb %xmm2, %edi
shr %cl, %esi
shr %cl, %edi
sub %edi, %esi
lea -12(%ecx), %edi
jnz L(less32bytes)
UPDATE_STRNCMP_COUNTER
movdqa (%edx), %xmm3
pxor %xmm0, %xmm0
mov $16, %ecx
or $4, %ebx
lea 4(%edx), %edi
and $0xfff, %edi
sub $0x1000, %edi
.p2align 4
L(loop_ashr_4):
add $16, %edi
jg L(nibble_ashr_4)
L(gobble_ashr_4):
movdqa (%eax, %ecx), %xmm1
movdqa (%edx, %ecx), %xmm2
movdqa %xmm2, %xmm4
palignr $4, %xmm3, %xmm2
pcmpeqb %xmm1, %xmm0
pcmpeqb %xmm2, %xmm1
psubb %xmm0, %xmm1
pmovmskb %xmm1, %esi
sub $0xffff, %esi
jnz L(exit)
#ifdef USE_AS_STRNCMP
cmp $16, %ebp
lea -16(%ebp), %ebp
jbe L(more8byteseq)
#endif
add $16, %ecx
movdqa %xmm4, %xmm3
add $16, %edi
jg L(nibble_ashr_4)
movdqa (%eax, %ecx), %xmm1
movdqa (%edx, %ecx), %xmm2
movdqa %xmm2, %xmm4
palignr $4, %xmm3, %xmm2
pcmpeqb %xmm1, %xmm0
pcmpeqb %xmm2, %xmm1
psubb %xmm0, %xmm1
pmovmskb %xmm1, %esi
sub $0xffff, %esi
jnz L(exit)
#ifdef USE_AS_STRNCMP
cmp $16, %ebp
lea -16(%ebp), %ebp
jbe L(more8byteseq)
#endif
add $16, %ecx
movdqa %xmm4, %xmm3
jmp L(loop_ashr_4)
.p2align 4
L(nibble_ashr_4):
pcmpeqb %xmm3, %xmm0
pmovmskb %xmm0, %esi
test $0xfff0, %esi
jnz L(ashr_4_exittail)
#ifdef USE_AS_STRNCMP
cmp $12, %ebp
jbe L(ashr_4_exittail)
#endif
pxor %xmm0, %xmm0
sub $0x1000, %edi
jmp L(gobble_ashr_4)
.p2align 4
L(ashr_4_exittail):
movdqa (%eax, %ecx), %xmm1
psrldq $4, %xmm0
psrldq $4, %xmm3
jmp L(aftertail)
/*
* The following cases will be handled by ashr_5
* ecx(offset of esi) eax(offset of edi) relative offset corresponding case
* n(11~15) n -11 4(15 +(n-11) - n) ashr_5
*/
.p2align 4
L(ashr_5):
mov $0xffff, %esi
pxor %xmm0, %xmm0
movdqa (%edx), %xmm2
movdqa (%eax), %xmm1
pcmpeqb %xmm1, %xmm0
pslldq $11, %xmm2
pcmpeqb %xmm1, %xmm2
psubb %xmm0, %xmm2
pmovmskb %xmm2, %edi
shr %cl, %esi
shr %cl, %edi
sub %edi, %esi
lea -11(%ecx), %edi
jnz L(less32bytes)
UPDATE_STRNCMP_COUNTER
movdqa (%edx), %xmm3
pxor %xmm0, %xmm0
mov $16, %ecx
or $5, %ebx
lea 5(%edx), %edi
and $0xfff, %edi
sub $0x1000, %edi
.p2align 4
L(loop_ashr_5):
add $16, %edi
jg L(nibble_ashr_5)
L(gobble_ashr_5):
movdqa (%eax, %ecx), %xmm1
movdqa (%edx, %ecx), %xmm2
movdqa %xmm2, %xmm4
palignr $5, %xmm3, %xmm2
pcmpeqb %xmm1, %xmm0
pcmpeqb %xmm2, %xmm1
psubb %xmm0, %xmm1
pmovmskb %xmm1, %esi
sub $0xffff, %esi
jnz L(exit)
#ifdef USE_AS_STRNCMP
cmp $16, %ebp
lea -16(%ebp), %ebp
jbe L(more8byteseq)
#endif
add $16, %ecx
movdqa %xmm4, %xmm3
add $16, %edi
jg L(nibble_ashr_5)
movdqa (%eax, %ecx), %xmm1
movdqa (%edx, %ecx), %xmm2
movdqa %xmm2, %xmm4
palignr $5, %xmm3, %xmm2
pcmpeqb %xmm1, %xmm0
pcmpeqb %xmm2, %xmm1
psubb %xmm0, %xmm1
pmovmskb %xmm1, %esi
sub $0xffff, %esi
jnz L(exit)
#ifdef USE_AS_STRNCMP
cmp $16, %ebp
lea -16(%ebp), %ebp
jbe L(more8byteseq)
#endif
add $16, %ecx
movdqa %xmm4, %xmm3
jmp L(loop_ashr_5)
.p2align 4
L(nibble_ashr_5):
pcmpeqb %xmm3, %xmm0
pmovmskb %xmm0, %esi
test $0xffe0, %esi
jnz L(ashr_5_exittail)
#ifdef USE_AS_STRNCMP
cmp $11, %ebp
jbe L(ashr_5_exittail)
#endif
pxor %xmm0, %xmm0
sub $0x1000, %edi
jmp L(gobble_ashr_5)
.p2align 4
L(ashr_5_exittail):
movdqa (%eax, %ecx), %xmm1
psrldq $5, %xmm0
psrldq $5, %xmm3
jmp L(aftertail)
/*
* The following cases will be handled by ashr_6
* ecx(offset of esi) eax(offset of edi) relative offset corresponding case
* n(10~15) n -10 5(15 +(n-10) - n) ashr_6
*/
.p2align 4
L(ashr_6):
mov $0xffff, %esi
pxor %xmm0, %xmm0
movdqa (%edx), %xmm2
movdqa (%eax), %xmm1
pcmpeqb %xmm1, %xmm0
pslldq $10, %xmm2
pcmpeqb %xmm1, %xmm2
psubb %xmm0, %xmm2
pmovmskb %xmm2, %edi
shr %cl, %esi
shr %cl, %edi
sub %edi, %esi
lea -10(%ecx), %edi
jnz L(less32bytes)
UPDATE_STRNCMP_COUNTER
movdqa (%edx), %xmm3
pxor %xmm0, %xmm0
mov $16, %ecx
or $6, %ebx
lea 6(%edx), %edi
and $0xfff, %edi
sub $0x1000, %edi
.p2align 4
L(loop_ashr_6):
add $16, %edi
jg L(nibble_ashr_6)
L(gobble_ashr_6):
movdqa (%eax, %ecx), %xmm1
movdqa (%edx, %ecx), %xmm2
movdqa %xmm2, %xmm4
palignr $6, %xmm3, %xmm2
pcmpeqb %xmm1, %xmm0
pcmpeqb %xmm2, %xmm1
psubb %xmm0, %xmm1
pmovmskb %xmm1, %esi
sub $0xffff, %esi
jnz L(exit)
#ifdef USE_AS_STRNCMP
cmp $16, %ebp
lea -16(%ebp), %ebp
jbe L(more8byteseq)
#endif
add $16, %ecx
movdqa %xmm4, %xmm3
add $16, %edi
jg L(nibble_ashr_6)
movdqa (%eax, %ecx), %xmm1
movdqa (%edx, %ecx), %xmm2
movdqa %xmm2, %xmm4
palignr $6, %xmm3, %xmm2
pcmpeqb %xmm1, %xmm0
pcmpeqb %xmm2, %xmm1
psubb %xmm0, %xmm1
pmovmskb %xmm1, %esi
sub $0xffff, %esi
jnz L(exit)
#ifdef USE_AS_STRNCMP
cmp $16, %ebp
lea -16(%ebp), %ebp
jbe L(more8byteseq)
#endif
add $16, %ecx
movdqa %xmm4, %xmm3
jmp L(loop_ashr_6)
.p2align 4
L(nibble_ashr_6):
pcmpeqb %xmm3, %xmm0
pmovmskb %xmm0, %esi
test $0xffc0, %esi
jnz L(ashr_6_exittail)
#ifdef USE_AS_STRNCMP
cmp $10, %ebp
jbe L(ashr_6_exittail)
#endif
pxor %xmm0, %xmm0
sub $0x1000, %edi
jmp L(gobble_ashr_6)
.p2align 4
L(ashr_6_exittail):
movdqa (%eax, %ecx), %xmm1
psrldq $6, %xmm0
psrldq $6, %xmm3
jmp L(aftertail)
/*
* The following cases will be handled by ashr_7
* ecx(offset of esi) eax(offset of edi) relative offset corresponding case
* n(9~15) n - 9 6(15 +(n-9) - n) ashr_7
*/
.p2align 4
L(ashr_7):
mov $0xffff, %esi
pxor %xmm0, %xmm0
movdqa (%edx), %xmm2
movdqa (%eax), %xmm1
pcmpeqb %xmm1, %xmm0
pslldq $9, %xmm2
pcmpeqb %xmm1, %xmm2
psubb %xmm0, %xmm2
pmovmskb %xmm2, %edi
shr %cl, %esi
shr %cl, %edi
sub %edi, %esi
lea -9(%ecx), %edi
jnz L(less32bytes)
UPDATE_STRNCMP_COUNTER
movdqa (%edx), %xmm3
pxor %xmm0, %xmm0
mov $16, %ecx
or $7, %ebx
lea 8(%edx), %edi
and $0xfff, %edi
sub $0x1000, %edi
.p2align 4
L(loop_ashr_7):
add $16, %edi
jg L(nibble_ashr_7)
L(gobble_ashr_7):
movdqa (%eax, %ecx), %xmm1
movdqa (%edx, %ecx), %xmm2
movdqa %xmm2, %xmm4
palignr $7, %xmm3, %xmm2
pcmpeqb %xmm1, %xmm0
pcmpeqb %xmm2, %xmm1
psubb %xmm0, %xmm1
pmovmskb %xmm1, %esi
sub $0xffff, %esi
jnz L(exit)
#ifdef USE_AS_STRNCMP
cmp $16, %ebp
lea -16(%ebp), %ebp
jbe L(more8byteseq)
#endif
add $16, %ecx
movdqa %xmm4, %xmm3
add $16, %edi
jg L(nibble_ashr_7)
movdqa (%eax, %ecx), %xmm1
movdqa (%edx, %ecx), %xmm2
movdqa %xmm2, %xmm4
palignr $7, %xmm3, %xmm2
pcmpeqb %xmm1, %xmm0
pcmpeqb %xmm2, %xmm1
psubb %xmm0, %xmm1
pmovmskb %xmm1, %esi
sub $0xffff, %esi
jnz L(exit)
#ifdef USE_AS_STRNCMP
cmp $16, %ebp
lea -16(%ebp), %ebp
jbe L(more8byteseq)
#endif
add $16, %ecx
movdqa %xmm4, %xmm3
jmp L(loop_ashr_7)
.p2align 4
L(nibble_ashr_7):
pcmpeqb %xmm3, %xmm0
pmovmskb %xmm0, %esi
test $0xff80, %esi
jnz L(ashr_7_exittail)
#ifdef USE_AS_STRNCMP
cmp $9, %ebp
jbe L(ashr_7_exittail)
#endif
pxor %xmm0, %xmm0
pxor %xmm0, %xmm0
sub $0x1000, %edi
jmp L(gobble_ashr_7)
.p2align 4
L(ashr_7_exittail):
movdqa (%eax, %ecx), %xmm1
psrldq $7, %xmm0
psrldq $7, %xmm3
jmp L(aftertail)
/*
* The following cases will be handled by ashr_8
* ecx(offset of esi) eax(offset of edi) relative offset corresponding case
* n(8~15) n - 8 7(15 +(n-8) - n) ashr_8
*/
.p2align 4
L(ashr_8):
mov $0xffff, %esi
pxor %xmm0, %xmm0
movdqa (%edx), %xmm2
movdqa (%eax), %xmm1
pcmpeqb %xmm1, %xmm0
pslldq $8, %xmm2
pcmpeqb %xmm1, %xmm2
psubb %xmm0, %xmm2
pmovmskb %xmm2, %edi
shr %cl, %esi
shr %cl, %edi
sub %edi, %esi
lea -8(%ecx), %edi
jnz L(less32bytes)
UPDATE_STRNCMP_COUNTER
movdqa (%edx), %xmm3
pxor %xmm0, %xmm0
mov $16, %ecx
or $8, %ebx
lea 8(%edx), %edi
and $0xfff, %edi
sub $0x1000, %edi
.p2align 4
L(loop_ashr_8):
add $16, %edi
jg L(nibble_ashr_8)
L(gobble_ashr_8):
movdqa (%eax, %ecx), %xmm1
movdqa (%edx, %ecx), %xmm2
movdqa %xmm2, %xmm4
palignr $8, %xmm3, %xmm2
pcmpeqb %xmm1, %xmm0
pcmpeqb %xmm2, %xmm1
psubb %xmm0, %xmm1
pmovmskb %xmm1, %esi
sub $0xffff, %esi
jnz L(exit)
#ifdef USE_AS_STRNCMP
cmp $16, %ebp
lea -16(%ebp), %ebp
jbe L(more8byteseq)
#endif
add $16, %ecx
movdqa %xmm4, %xmm3
add $16, %edi
jg L(nibble_ashr_8)
movdqa (%eax, %ecx), %xmm1
movdqa (%edx, %ecx), %xmm2
movdqa %xmm2, %xmm4
palignr $8, %xmm3, %xmm2
pcmpeqb %xmm1, %xmm0
pcmpeqb %xmm2, %xmm1
psubb %xmm0, %xmm1
pmovmskb %xmm1, %esi
sub $0xffff, %esi
jnz L(exit)
#ifdef USE_AS_STRNCMP
cmp $16, %ebp
lea -16(%ebp), %ebp
jbe L(more8byteseq)
#endif
add $16, %ecx
movdqa %xmm4, %xmm3
jmp L(loop_ashr_8)
.p2align 4
L(nibble_ashr_8):
pcmpeqb %xmm3, %xmm0
pmovmskb %xmm0, %esi
test $0xff00, %esi
jnz L(ashr_8_exittail)
#ifdef USE_AS_STRNCMP
cmp $8, %ebp
jbe L(ashr_8_exittail)
#endif
pxor %xmm0, %xmm0
pxor %xmm0, %xmm0
sub $0x1000, %edi
jmp L(gobble_ashr_8)
.p2align 4
L(ashr_8_exittail):
movdqa (%eax, %ecx), %xmm1
psrldq $8, %xmm0
psrldq $8, %xmm3
jmp L(aftertail)
/*
* The following cases will be handled by ashr_9
* ecx(offset of esi) eax(offset of edi) relative offset corresponding case
* n(7~15) n - 7 8(15 +(n-7) - n) ashr_9
*/
.p2align 4
L(ashr_9):
mov $0xffff, %esi
pxor %xmm0, %xmm0
movdqa (%edx), %xmm2
movdqa (%eax), %xmm1
pcmpeqb %xmm1, %xmm0
pslldq $7, %xmm2
pcmpeqb %xmm1, %xmm2
psubb %xmm0, %xmm2
pmovmskb %xmm2, %edi
shr %cl, %esi
shr %cl, %edi
sub %edi, %esi
lea -7(%ecx), %edi
jnz L(less32bytes)
UPDATE_STRNCMP_COUNTER
movdqa (%edx), %xmm3
pxor %xmm0, %xmm0
mov $16, %ecx
or $9, %ebx
lea 9(%edx), %edi
and $0xfff, %edi
sub $0x1000, %edi
.p2align 4
L(loop_ashr_9):
add $16, %edi
jg L(nibble_ashr_9)
L(gobble_ashr_9):
movdqa (%eax, %ecx), %xmm1
movdqa (%edx, %ecx), %xmm2
movdqa %xmm2, %xmm4
palignr $9, %xmm3, %xmm2
pcmpeqb %xmm1, %xmm0
pcmpeqb %xmm2, %xmm1
psubb %xmm0, %xmm1
pmovmskb %xmm1, %esi
sub $0xffff, %esi
jnz L(exit)
#ifdef USE_AS_STRNCMP
cmp $16, %ebp
lea -16(%ebp), %ebp
jbe L(more8byteseq)
#endif
add $16, %ecx
movdqa %xmm4, %xmm3
add $16, %edi
jg L(nibble_ashr_9)
movdqa (%eax, %ecx), %xmm1
movdqa (%edx, %ecx), %xmm2
movdqa %xmm2, %xmm4
palignr $9, %xmm3, %xmm2
pcmpeqb %xmm1, %xmm0
pcmpeqb %xmm2, %xmm1
psubb %xmm0, %xmm1
pmovmskb %xmm1, %esi
sub $0xffff, %esi
jnz L(exit)
#ifdef USE_AS_STRNCMP
cmp $16, %ebp
lea -16(%ebp), %ebp
jbe L(more8byteseq)
#endif
add $16, %ecx
movdqa %xmm4, %xmm3
jmp L(loop_ashr_9)
.p2align 4
L(nibble_ashr_9):
pcmpeqb %xmm3, %xmm0
pmovmskb %xmm0, %esi
test $0xfe00, %esi
jnz L(ashr_9_exittail)
#ifdef USE_AS_STRNCMP
cmp $7, %ebp
jbe L(ashr_9_exittail)
#endif
pxor %xmm0, %xmm0
sub $0x1000, %edi
jmp L(gobble_ashr_9)
.p2align 4
L(ashr_9_exittail):
movdqa (%eax, %ecx), %xmm1
psrldq $9, %xmm0
psrldq $9, %xmm3
jmp L(aftertail)
/*
* The following cases will be handled by ashr_10
* ecx(offset of esi) eax(offset of edi) relative offset corresponding case
* n(6~15) n - 6 9(15 +(n-6) - n) ashr_10
*/
.p2align 4
L(ashr_10):
mov $0xffff, %esi
pxor %xmm0, %xmm0
movdqa (%edx), %xmm2
movdqa (%eax), %xmm1
pcmpeqb %xmm1, %xmm0
pslldq $6, %xmm2
pcmpeqb %xmm1, %xmm2
psubb %xmm0, %xmm2
pmovmskb %xmm2, %edi
shr %cl, %esi
shr %cl, %edi
sub %edi, %esi
lea -6(%ecx), %edi
jnz L(less32bytes)
UPDATE_STRNCMP_COUNTER
movdqa (%edx), %xmm3
pxor %xmm0, %xmm0
mov $16, %ecx
or $10, %ebx
lea 10(%edx), %edi
and $0xfff, %edi
sub $0x1000, %edi
.p2align 4
L(loop_ashr_10):
add $16, %edi
jg L(nibble_ashr_10)
L(gobble_ashr_10):
movdqa (%eax, %ecx), %xmm1
movdqa (%edx, %ecx), %xmm2
movdqa %xmm2, %xmm4
palignr $10, %xmm3, %xmm2
pcmpeqb %xmm1, %xmm0
pcmpeqb %xmm2, %xmm1
psubb %xmm0, %xmm1
pmovmskb %xmm1, %esi
sub $0xffff, %esi
jnz L(exit)
#ifdef USE_AS_STRNCMP
cmp $16, %ebp
lea -16(%ebp), %ebp
jbe L(more8byteseq)
#endif
add $16, %ecx
movdqa %xmm4, %xmm3
add $16, %edi
jg L(nibble_ashr_10)
movdqa (%eax, %ecx), %xmm1
movdqa (%edx, %ecx), %xmm2
movdqa %xmm2, %xmm4
palignr $10, %xmm3, %xmm2
pcmpeqb %xmm1, %xmm0
pcmpeqb %xmm2, %xmm1
psubb %xmm0, %xmm1
pmovmskb %xmm1, %esi
sub $0xffff, %esi
jnz L(exit)
#ifdef USE_AS_STRNCMP
cmp $16, %ebp
lea -16(%ebp), %ebp
jbe L(more8byteseq)
#endif
add $16, %ecx
movdqa %xmm4, %xmm3
jmp L(loop_ashr_10)
.p2align 4
L(nibble_ashr_10):
pcmpeqb %xmm3, %xmm0
pmovmskb %xmm0, %esi
test $0xfc00, %esi
jnz L(ashr_10_exittail)
#ifdef USE_AS_STRNCMP
cmp $6, %ebp
jbe L(ashr_10_exittail)
#endif
pxor %xmm0, %xmm0
sub $0x1000, %edi
jmp L(gobble_ashr_10)
.p2align 4
L(ashr_10_exittail):
movdqa (%eax, %ecx), %xmm1
psrldq $10, %xmm0
psrldq $10, %xmm3
jmp L(aftertail)
/*
* The following cases will be handled by ashr_11
* ecx(offset of esi) eax(offset of edi) relative offset corresponding case
* n(5~15) n - 5 10(15 +(n-5) - n) ashr_11
*/
.p2align 4
L(ashr_11):
mov $0xffff, %esi
pxor %xmm0, %xmm0
movdqa (%edx), %xmm2
movdqa (%eax), %xmm1
pcmpeqb %xmm1, %xmm0
pslldq $5, %xmm2
pcmpeqb %xmm1, %xmm2
psubb %xmm0, %xmm2
pmovmskb %xmm2, %edi
shr %cl, %esi
shr %cl, %edi
sub %edi, %esi
lea -5(%ecx), %edi
jnz L(less32bytes)
UPDATE_STRNCMP_COUNTER
movdqa (%edx), %xmm3
pxor %xmm0, %xmm0
mov $16, %ecx
or $11, %ebx
lea 11(%edx), %edi
and $0xfff, %edi
sub $0x1000, %edi
.p2align 4
L(loop_ashr_11):
add $16, %edi
jg L(nibble_ashr_11)
L(gobble_ashr_11):
movdqa (%eax, %ecx), %xmm1
movdqa (%edx, %ecx), %xmm2
movdqa %xmm2, %xmm4
palignr $11, %xmm3, %xmm2
pcmpeqb %xmm1, %xmm0
pcmpeqb %xmm2, %xmm1
psubb %xmm0, %xmm1
pmovmskb %xmm1, %esi
sub $0xffff, %esi
jnz L(exit)
#ifdef USE_AS_STRNCMP
cmp $16, %ebp
lea -16(%ebp), %ebp
jbe L(more8byteseq)
#endif
add $16, %ecx
movdqa %xmm4, %xmm3
add $16, %edi
jg L(nibble_ashr_11)
movdqa (%eax, %ecx), %xmm1
movdqa (%edx, %ecx), %xmm2
movdqa %xmm2, %xmm4
palignr $11, %xmm3, %xmm2
pcmpeqb %xmm1, %xmm0
pcmpeqb %xmm2, %xmm1
psubb %xmm0, %xmm1
pmovmskb %xmm1, %esi
sub $0xffff, %esi
jnz L(exit)
#ifdef USE_AS_STRNCMP
cmp $16, %ebp
lea -16(%ebp), %ebp
jbe L(more8byteseq)
#endif
add $16, %ecx
movdqa %xmm4, %xmm3
jmp L(loop_ashr_11)
.p2align 4
L(nibble_ashr_11):
pcmpeqb %xmm3, %xmm0
pmovmskb %xmm0, %esi
test $0xf800, %esi
jnz L(ashr_11_exittail)
#ifdef USE_AS_STRNCMP
cmp $5, %ebp
jbe L(ashr_11_exittail)
#endif
pxor %xmm0, %xmm0
sub $0x1000, %edi
jmp L(gobble_ashr_11)
.p2align 4
L(ashr_11_exittail):
movdqa (%eax, %ecx), %xmm1
psrldq $11, %xmm0
psrldq $11, %xmm3
jmp L(aftertail)
/*
* The following cases will be handled by ashr_12
* ecx(offset of esi) eax(offset of edi) relative offset corresponding case
* n(4~15) n - 4 11(15 +(n-4) - n) ashr_12
*/
.p2align 4
L(ashr_12):
mov $0xffff, %esi
pxor %xmm0, %xmm0
movdqa (%edx), %xmm2
movdqa (%eax), %xmm1
pcmpeqb %xmm1, %xmm0
pslldq $4, %xmm2
pcmpeqb %xmm1, %xmm2
psubb %xmm0, %xmm2
pmovmskb %xmm2, %edi
shr %cl, %esi
shr %cl, %edi
sub %edi, %esi
lea -4(%ecx), %edi
jnz L(less32bytes)
UPDATE_STRNCMP_COUNTER
movdqa (%edx), %xmm3
pxor %xmm0, %xmm0
mov $16, %ecx
or $12, %ebx
lea 12(%edx), %edi
and $0xfff, %edi
sub $0x1000, %edi
.p2align 4
L(loop_ashr_12):
add $16, %edi
jg L(nibble_ashr_12)
L(gobble_ashr_12):
movdqa (%eax, %ecx), %xmm1
movdqa (%edx, %ecx), %xmm2
movdqa %xmm2, %xmm4
palignr $12, %xmm3, %xmm2
pcmpeqb %xmm1, %xmm0
pcmpeqb %xmm2, %xmm1
psubb %xmm0, %xmm1
pmovmskb %xmm1, %esi
sub $0xffff, %esi
jnz L(exit)
#ifdef USE_AS_STRNCMP
cmp $16, %ebp
lea -16(%ebp), %ebp
jbe L(more8byteseq)
#endif
add $16, %ecx
movdqa %xmm4, %xmm3
add $16, %edi
jg L(nibble_ashr_12)
movdqa (%eax, %ecx), %xmm1
movdqa (%edx, %ecx), %xmm2
movdqa %xmm2, %xmm4
palignr $12, %xmm3, %xmm2
pcmpeqb %xmm1, %xmm0
pcmpeqb %xmm2, %xmm1
psubb %xmm0, %xmm1
pmovmskb %xmm1, %esi
sub $0xffff, %esi
jnz L(exit)
#ifdef USE_AS_STRNCMP
cmp $16, %ebp
lea -16(%ebp), %ebp
jbe L(more8byteseq)
#endif
add $16, %ecx
movdqa %xmm4, %xmm3
jmp L(loop_ashr_12)
.p2align 4
L(nibble_ashr_12):
pcmpeqb %xmm3, %xmm0
pmovmskb %xmm0, %esi
test $0xf000, %esi
jnz L(ashr_12_exittail)
#ifdef USE_AS_STRNCMP
cmp $4, %ebp
jbe L(ashr_12_exittail)
#endif
pxor %xmm0, %xmm0
sub $0x1000, %edi
jmp L(gobble_ashr_12)
.p2align 4
L(ashr_12_exittail):
movdqa (%eax, %ecx), %xmm1
psrldq $12, %xmm0
psrldq $12, %xmm3
jmp L(aftertail)
/*
* The following cases will be handled by ashr_13
* ecx(offset of esi) eax(offset of edi) relative offset corresponding case
* n(3~15) n - 3 12(15 +(n-3) - n) ashr_13
*/
.p2align 4
L(ashr_13):
mov $0xffff, %esi
pxor %xmm0, %xmm0
movdqa (%edx), %xmm2
movdqa (%eax), %xmm1
pcmpeqb %xmm1, %xmm0
pslldq $3, %xmm2
pcmpeqb %xmm1, %xmm2
psubb %xmm0, %xmm2
pmovmskb %xmm2, %edi
shr %cl, %esi
shr %cl, %edi
sub %edi, %esi
lea -3(%ecx), %edi
jnz L(less32bytes)
UPDATE_STRNCMP_COUNTER
movdqa (%edx), %xmm3
pxor %xmm0, %xmm0
mov $16, %ecx
or $13, %ebx
lea 13(%edx), %edi
and $0xfff, %edi
sub $0x1000, %edi
.p2align 4
L(loop_ashr_13):
add $16, %edi
jg L(nibble_ashr_13)
L(gobble_ashr_13):
movdqa (%eax, %ecx), %xmm1
movdqa (%edx, %ecx), %xmm2
movdqa %xmm2, %xmm4
palignr $13, %xmm3, %xmm2
pcmpeqb %xmm1, %xmm0
pcmpeqb %xmm2, %xmm1
psubb %xmm0, %xmm1
pmovmskb %xmm1, %esi
sub $0xffff, %esi
jnz L(exit)
#ifdef USE_AS_STRNCMP
cmp $16, %ebp
lea -16(%ebp), %ebp
jbe L(more8byteseq)
#endif
add $16, %ecx
movdqa %xmm4, %xmm3
add $16, %edi
jg L(nibble_ashr_13)
movdqa (%eax, %ecx), %xmm1
movdqa (%edx, %ecx), %xmm2
movdqa %xmm2, %xmm4
palignr $13, %xmm3, %xmm2
pcmpeqb %xmm1, %xmm0
pcmpeqb %xmm2, %xmm1
psubb %xmm0, %xmm1
pmovmskb %xmm1, %esi
sub $0xffff, %esi
jnz L(exit)
#ifdef USE_AS_STRNCMP
cmp $16, %ebp
lea -16(%ebp), %ebp
jbe L(more8byteseq)
#endif
add $16, %ecx
movdqa %xmm4, %xmm3
jmp L(loop_ashr_13)
.p2align 4
L(nibble_ashr_13):
pcmpeqb %xmm3, %xmm0
pmovmskb %xmm0, %esi
test $0xe000, %esi
jnz L(ashr_13_exittail)
#ifdef USE_AS_STRNCMP
cmp $3, %ebp
jbe L(ashr_13_exittail)
#endif
pxor %xmm0, %xmm0
sub $0x1000, %edi
jmp L(gobble_ashr_13)
.p2align 4
L(ashr_13_exittail):
movdqa (%eax, %ecx), %xmm1
psrldq $13, %xmm0
psrldq $13, %xmm3
jmp L(aftertail)
/*
* The following cases will be handled by ashr_14
* ecx(offset of esi) eax(offset of edi) relative offset corresponding case
* n(2~15) n - 2 13(15 +(n-2) - n) ashr_14
*/
.p2align 4
L(ashr_14):
mov $0xffff, %esi
pxor %xmm0, %xmm0
movdqa (%edx), %xmm2
movdqa (%eax), %xmm1
pcmpeqb %xmm1, %xmm0
pslldq $2, %xmm2
pcmpeqb %xmm1, %xmm2
psubb %xmm0, %xmm2
pmovmskb %xmm2, %edi
shr %cl, %esi
shr %cl, %edi
sub %edi, %esi
lea -2(%ecx), %edi
jnz L(less32bytes)
UPDATE_STRNCMP_COUNTER
movdqa (%edx), %xmm3
pxor %xmm0, %xmm0
mov $16, %ecx
or $14, %ebx
lea 14(%edx), %edi
and $0xfff, %edi
sub $0x1000, %edi
.p2align 4
L(loop_ashr_14):
add $16, %edi
jg L(nibble_ashr_14)
L(gobble_ashr_14):
movdqa (%eax, %ecx), %xmm1
movdqa (%edx, %ecx), %xmm2
movdqa %xmm2, %xmm4
palignr $14, %xmm3, %xmm2
pcmpeqb %xmm1, %xmm0
pcmpeqb %xmm2, %xmm1
psubb %xmm0, %xmm1
pmovmskb %xmm1, %esi
sub $0xffff, %esi
jnz L(exit)
#ifdef USE_AS_STRNCMP
cmp $16, %ebp
lea -16(%ebp), %ebp
jbe L(more8byteseq)
#endif
add $16, %ecx
movdqa %xmm4, %xmm3
add $16, %edi
jg L(nibble_ashr_14)
movdqa (%eax, %ecx), %xmm1
movdqa (%edx, %ecx), %xmm2
movdqa %xmm2, %xmm4
palignr $14, %xmm3, %xmm2
pcmpeqb %xmm1, %xmm0
pcmpeqb %xmm2, %xmm1
psubb %xmm0, %xmm1
pmovmskb %xmm1, %esi
sub $0xffff, %esi
jnz L(exit)
#ifdef USE_AS_STRNCMP
cmp $16, %ebp
lea -16(%ebp), %ebp
jbe L(more8byteseq)
#endif
add $16, %ecx
movdqa %xmm4, %xmm3
jmp L(loop_ashr_14)
.p2align 4
L(nibble_ashr_14):
pcmpeqb %xmm3, %xmm0
pmovmskb %xmm0, %esi
test $0xc000, %esi
jnz L(ashr_14_exittail)
#ifdef USE_AS_STRNCMP
cmp $2, %ebp
jbe L(ashr_14_exittail)
#endif
pxor %xmm0, %xmm0
sub $0x1000, %edi
jmp L(gobble_ashr_14)
.p2align 4
L(ashr_14_exittail):
movdqa (%eax, %ecx), %xmm1
psrldq $14, %xmm0
psrldq $14, %xmm3
jmp L(aftertail)
/*
* The following cases will be handled by ashr_14
* ecx(offset of esi) eax(offset of edi) relative offset corresponding case
* n(1~15) n - 1 14(15 +(n-1) - n) ashr_15
*/
.p2align 4
L(ashr_15):
mov $0xffff, %esi
pxor %xmm0, %xmm0
movdqa (%edx), %xmm2
movdqa (%eax), %xmm1
pcmpeqb %xmm1, %xmm0
pslldq $1, %xmm2
pcmpeqb %xmm1, %xmm2
psubb %xmm0, %xmm2
pmovmskb %xmm2, %edi
shr %cl, %esi
shr %cl, %edi
sub %edi, %esi
lea -1(%ecx), %edi
jnz L(less32bytes)
UPDATE_STRNCMP_COUNTER
movdqa (%edx), %xmm3
pxor %xmm0, %xmm0
mov $16, %ecx
or $15, %ebx
lea 15(%edx), %edi
and $0xfff, %edi
sub $0x1000, %edi
.p2align 4
L(loop_ashr_15):
add $16, %edi
jg L(nibble_ashr_15)
L(gobble_ashr_15):
movdqa (%eax, %ecx), %xmm1
movdqa (%edx, %ecx), %xmm2
movdqa %xmm2, %xmm4
palignr $15, %xmm3, %xmm2
pcmpeqb %xmm1, %xmm0
pcmpeqb %xmm2, %xmm1
psubb %xmm0, %xmm1
pmovmskb %xmm1, %esi
sub $0xffff, %esi
jnz L(exit)
#ifdef USE_AS_STRNCMP
cmp $16, %ebp
lea -16(%ebp), %ebp
jbe L(more8byteseq)
#endif
add $16, %ecx
movdqa %xmm4, %xmm3
add $16, %edi
jg L(nibble_ashr_15)
movdqa (%eax, %ecx), %xmm1
movdqa (%edx, %ecx), %xmm2
movdqa %xmm2, %xmm4
palignr $15, %xmm3, %xmm2
pcmpeqb %xmm1, %xmm0
pcmpeqb %xmm2, %xmm1
psubb %xmm0, %xmm1
pmovmskb %xmm1, %esi
sub $0xffff, %esi
jnz L(exit)
#ifdef USE_AS_STRNCMP
cmp $16, %ebp
lea -16(%ebp), %ebp
jbe L(more8byteseq)
#endif
add $16, %ecx
movdqa %xmm4, %xmm3
jmp L(loop_ashr_15)
.p2align 4
L(nibble_ashr_15):
pcmpeqb %xmm3, %xmm0
pmovmskb %xmm0, %esi
test $0x8000, %esi
jnz L(ashr_15_exittail)
#ifdef USE_AS_STRNCMP
cmp $1, %ebp
jbe L(ashr_15_exittail)
#endif
pxor %xmm0, %xmm0
sub $0x1000, %edi
jmp L(gobble_ashr_15)
.p2align 4
L(ashr_15_exittail):
movdqa (%eax, %ecx), %xmm1
psrldq $15, %xmm0
psrldq $15, %xmm3
jmp L(aftertail)
.p2align 4
L(aftertail):
pcmpeqb %xmm3, %xmm1
psubb %xmm0, %xmm1
pmovmskb %xmm1, %esi
not %esi
L(exit):
mov %ebx, %edi
and $0x1f, %edi
lea -16(%edi, %ecx), %edi
L(less32bytes):
add %edi, %edx
add %ecx, %eax
test $0x20, %ebx
jz L(ret2)
xchg %eax, %edx
.p2align 4
L(ret2):
mov %esi, %ecx
POP (%esi)
POP (%edi)
POP (%ebx)
L(less16bytes):
test %cl, %cl
jz L(2next_8_bytes)
test $0x01, %cl
jnz L(Byte0)
test $0x02, %cl
jnz L(Byte1)
test $0x04, %cl
jnz L(Byte2)
test $0x08, %cl
jnz L(Byte3)
test $0x10, %cl
jnz L(Byte4)
test $0x20, %cl
jnz L(Byte5)
test $0x40, %cl
jnz L(Byte6)
#ifdef USE_AS_STRNCMP
cmp $7, %ebp
jbe L(eq)
#endif
movzx 7(%eax), %ecx
movzx 7(%edx), %eax
sub %ecx, %eax
RETURN
L(Byte0):
#ifdef USE_AS_STRNCMP
cmp $0, %ebp
jbe L(eq)
#endif
movzx (%eax), %ecx
movzx (%edx), %eax
sub %ecx, %eax
RETURN
L(Byte1):
#ifdef USE_AS_STRNCMP
cmp $1, %ebp
jbe L(eq)
#endif
movzx 1(%eax), %ecx
movzx 1(%edx), %eax
sub %ecx, %eax
RETURN
L(Byte2):
#ifdef USE_AS_STRNCMP
cmp $2, %ebp
jbe L(eq)
#endif
movzx 2(%eax), %ecx
movzx 2(%edx), %eax
sub %ecx, %eax
RETURN
L(Byte3):
#ifdef USE_AS_STRNCMP
cmp $3, %ebp
jbe L(eq)
#endif
movzx 3(%eax), %ecx
movzx 3(%edx), %eax
sub %ecx, %eax
RETURN
L(Byte4):
#ifdef USE_AS_STRNCMP
cmp $4, %ebp
jbe L(eq)
#endif
movzx 4(%eax), %ecx
movzx 4(%edx), %eax
sub %ecx, %eax
RETURN
L(Byte5):
#ifdef USE_AS_STRNCMP
cmp $5, %ebp
jbe L(eq)
#endif
movzx 5(%eax), %ecx
movzx 5(%edx), %eax
sub %ecx, %eax
RETURN
L(Byte6):
#ifdef USE_AS_STRNCMP
cmp $6, %ebp
jbe L(eq)
#endif
movzx 6(%eax), %ecx
movzx 6(%edx), %eax
sub %ecx, %eax
RETURN
L(2next_8_bytes):
add $8, %eax
add $8, %edx
#ifdef USE_AS_STRNCMP
cmp $8, %ebp
lea -8(%ebp), %ebp
jbe L(eq)
#endif
test $0x01, %ch
jnz L(Byte0)
test $0x02, %ch
jnz L(Byte1)
test $0x04, %ch
jnz L(Byte2)
test $0x08, %ch
jnz L(Byte3)
test $0x10, %ch
jnz L(Byte4)
test $0x20, %ch
jnz L(Byte5)
test $0x40, %ch
jnz L(Byte6)
#ifdef USE_AS_STRNCMP
cmp $7, %ebp
jbe L(eq)
#endif
movzx 7(%eax), %ecx
movzx 7(%edx), %eax
sub %ecx, %eax
RETURN
L(neq):
mov $1, %eax
ja L(neq_bigger)
neg %eax
L(neq_bigger):
#ifdef USE_AS_STRNCMP
POP (%ebp)
#endif
ret
#ifdef USE_AS_STRNCMP
.p2align 4
cfi_restore_state
L(more8byteseq):
POP (%esi)
POP (%edi)
POP (%ebx)
#endif
L(eq):
#ifdef USE_AS_STRNCMP
POP (%ebp)
#endif
xorl %eax, %eax
ret
#ifdef USE_AS_STRNCMP
.p2align 4
CFI_PUSH (%ebp)
L(less16bytes_sncmp):
test %ebp, %ebp
jz L(eq)
movzbl (%eax), %ecx
cmpb %cl, (%edx)
jne L(neq)
test %cl, %cl
je L(eq)
cmp $1, %ebp
je L(eq)
movzbl 1(%eax), %ecx
cmpb %cl, 1(%edx)
jne L(neq)
test %cl, %cl
je L(eq)
cmp $2, %ebp
je L(eq)
movzbl 2(%eax), %ecx
cmpb %cl, 2(%edx)
jne L(neq)
test %cl, %cl
je L(eq)
cmp $3, %ebp
je L(eq)
movzbl 3(%eax), %ecx
cmpb %cl, 3(%edx)
jne L(neq)
test %cl, %cl
je L(eq)
cmp $4, %ebp
je L(eq)
movzbl 4(%eax), %ecx
cmpb %cl, 4(%edx)
jne L(neq)
test %cl, %cl
je L(eq)
cmp $5, %ebp
je L(eq)
movzbl 5(%eax), %ecx
cmpb %cl, 5(%edx)
jne L(neq)
test %cl, %cl
je L(eq)
cmp $6, %ebp
je L(eq)
movzbl 6(%eax), %ecx
cmpb %cl, 6(%edx)
jne L(neq)
test %cl, %cl
je L(eq)
cmp $7, %ebp
je L(eq)
movzbl 7(%eax), %ecx
cmpb %cl, 7(%edx)
jne L(neq)
test %cl, %cl
je L(eq)
cmp $8, %ebp
je L(eq)
movzbl 8(%eax), %ecx
cmpb %cl, 8(%edx)
jne L(neq)
test %cl, %cl
je L(eq)
cmp $9, %ebp
je L(eq)
movzbl 9(%eax), %ecx
cmpb %cl, 9(%edx)
jne L(neq)
test %cl, %cl
je L(eq)
cmp $10, %ebp
je L(eq)
movzbl 10(%eax), %ecx
cmpb %cl, 10(%edx)
jne L(neq)
test %cl, %cl
je L(eq)
cmp $11, %ebp
je L(eq)
movzbl 11(%eax), %ecx
cmpb %cl, 11(%edx)
jne L(neq)
test %cl, %cl
je L(eq)
cmp $12, %ebp
je L(eq)
movzbl 12(%eax), %ecx
cmpb %cl, 12(%edx)
jne L(neq)
test %cl, %cl
je L(eq)
cmp $13, %ebp
je L(eq)
movzbl 13(%eax), %ecx
cmpb %cl, 13(%edx)
jne L(neq)
test %cl, %cl
je L(eq)
cmp $14, %ebp
je L(eq)
movzbl 14(%eax), %ecx
cmpb %cl, 14(%edx)
jne L(neq)
test %cl, %cl
je L(eq)
cmp $15, %ebp
je L(eq)
movzbl 15(%eax), %ecx
cmpb %cl, 15(%edx)
jne L(neq)
test %cl, %cl
je L(eq)
POP (%ebp)
xor %eax, %eax
ret
#endif
END (STRCMP)
#endif