forked from ports/contrib
sdl_gfx: add the missing patch to go with it
This commit is contained in:
parent
46b3bfd2d9
commit
f84f53058d
@ -6,7 +6,7 @@
|
||||
|
||||
name=sdl_gfx
|
||||
version=2.0.13
|
||||
release=2
|
||||
release=1
|
||||
source=(http://www.ferzkopp.net/Software/SDL_gfx-${version%.*}/SDL_gfx-$version.tar.gz
|
||||
sdl_gfx-$version-imageFilter-asm.fix.patch)
|
||||
|
||||
|
548
sdl_gfx/sdl_gfx-2.0.13-imageFilter-asm.fix.patch
Normal file
548
sdl_gfx/sdl_gfx-2.0.13-imageFilter-asm.fix.patch
Normal file
@ -0,0 +1,548 @@
|
||||
--- SDL_gfx-2.0.13.orig/SDL_imageFilter.c 2004-11-29 20:53:35.000000000 +0100
|
||||
+++ SDL_gfx-2.0.13/SDL_imageFilter.c 2008-04-22 18:11:27.000000000 +0200
|
||||
@@ -81,13 +81,13 @@
|
||||
"mov %3, %%ecx \n\t" // load loop counter (SIZE) into ecx
|
||||
"shr $3, %%ecx \n\t" // counter/8 (MMX loads 8 bytes at a time)
|
||||
".align 16 \n\t" // 16 byte allignment of the loop entry
|
||||
- ".L1010: \n\t" "movq (%%eax), %%mm1 \n\t" // load 8 bytes from Src1 into mm1
|
||||
+ "1: movq (%%eax), %%mm1 \n\t" // load 8 bytes from Src1 into mm1
|
||||
"paddusb (%%ebx), %%mm1 \n\t" // mm1=Src1+Src2 (add 8 bytes with saturation)
|
||||
"movq %%mm1, (%%edi) \n\t" // store result in Dest
|
||||
"add $8, %%eax \n\t" // increase Src1, Src2 and Dest
|
||||
"add $8, %%ebx \n\t" // register pointers by 8
|
||||
"add $8, %%edi \n\t" "dec %%ecx \n\t" // decrease loop counter
|
||||
- "jnz .L1010 \n\t" // check loop termination, proceed if required
|
||||
+ "jnz 1b \n\t" // check loop termination, proceed if required
|
||||
"emms \n\t" // exit MMX state
|
||||
"popa \n\t":"=m" (Dest) // %0
|
||||
:"m"(Src2), // %1
|
||||
@@ -158,7 +158,7 @@
|
||||
"mov %3, %%ecx \n\t" // load loop counter (SIZE) into ecx
|
||||
"shr $3, %%ecx \n\t" // counter/8 (MMX loads 8 bytes at a time)
|
||||
".align 16 \n\t" // 16 byte allignment of the loop entry
|
||||
- ".L21011: \n\t"
|
||||
+ "1: \n\t"
|
||||
"movq (%%eax), %%mm1 \n\t" // load 8 bytes from Src1 into mm1
|
||||
"movq (%%ebx), %%mm2 \n\t" // load 8 bytes from Src2 into mm2
|
||||
// --- Byte shift via Word shift ---
|
||||
@@ -174,7 +174,7 @@
|
||||
"add $8, %%ebx \n\t" // register pointers by 8
|
||||
"add $8, %%edi \n\t"
|
||||
"dec %%ecx \n\t" // decrease loop counter
|
||||
- "jnz .L21011 \n\t" // check loop termination, proceed if required
|
||||
+ "jnz 1b \n\t" // check loop termination, proceed if required
|
||||
"emms \n\t" // exit MMX state
|
||||
"popa \n\t":"=m" (Dest) // %0
|
||||
:"m"(Src2), // %1
|
||||
@@ -241,13 +241,13 @@
|
||||
"mov %3, %%ecx \n\t" // load loop counter (SIZE) into ecx
|
||||
"shr $3, %%ecx \n\t" // counter/8 (MMX loads 8 bytes at a time)
|
||||
".align 16 \n\t" // 16 byte allignment of the loop entry
|
||||
- ".L1012: \n\t" "movq (%%eax), %%mm1 \n\t" // load 8 bytes from Src1 into mm1
|
||||
+ "1: movq (%%eax), %%mm1 \n\t" // load 8 bytes from Src1 into mm1
|
||||
"psubusb (%%ebx), %%mm1 \n\t" // mm1=Src1-Src2 (sub 8 bytes with saturation)
|
||||
"movq %%mm1, (%%edi) \n\t" // store result in Dest
|
||||
"add $8, %%eax \n\t" // increase Src1, Src2 and Dest
|
||||
"add $8, %%ebx \n\t" // register pointers by 8
|
||||
"add $8, %%edi \n\t" "dec %%ecx \n\t" // decrease loop counter
|
||||
- "jnz .L1012 \n\t" // check loop termination, proceed if required
|
||||
+ "jnz 1b \n\t" // check loop termination, proceed if required
|
||||
"emms \n\t" // exit MMX state
|
||||
"popa \n\t":"=m" (Dest) // %0
|
||||
:"m"(Src2), // %1
|
||||
@@ -313,7 +313,7 @@
|
||||
"mov %3, %%ecx \n\t" // load loop counter (SIZE) into ecx
|
||||
"shr $3, %%ecx \n\t" // counter/8 (MMX loads 8 bytes at a time)
|
||||
".align 16 \n\t" // 16 byte allignment of the loop entry
|
||||
- ".L1013: \n\t" "movq (%%eax), %%mm1 \n\t" // load 8 bytes from Src1 into mm1
|
||||
+ "1: movq (%%eax), %%mm1 \n\t" // load 8 bytes from Src1 into mm1
|
||||
"movq (%%ebx), %%mm2 \n\t" // load 8 bytes from Src2 into mm2
|
||||
"psubusb (%%ebx), %%mm1 \n\t" // mm1=Src1-Src2 (sub 8 bytes with saturation)
|
||||
"psubusb (%%eax), %%mm2 \n\t" // mm2=Src2-Src1 (sub 8 bytes with saturation)
|
||||
@@ -322,7 +322,7 @@
|
||||
"add $8, %%eax \n\t" // increase Src1, Src2 and Dest
|
||||
"add $8, %%ebx \n\t" // register pointers by 8
|
||||
"add $8, %%edi \n\t" "dec %%ecx \n\t" // decrease loop counter
|
||||
- "jnz .L1013 \n\t" // check loop termination, proceed if required
|
||||
+ "jnz 1b \n\t" // check loop termination, proceed if required
|
||||
"emms \n\t" // exit MMX state
|
||||
"popa \n\t":"=m" (Dest) // %0
|
||||
:"m"(Src2), // %1
|
||||
@@ -388,7 +388,7 @@
|
||||
"shr $3, %%ecx \n\t" // counter/8 (MMX loads 8 bytes at a time)
|
||||
"pxor %%mm0, %%mm0 \n\t" // zero mm0 register
|
||||
".align 16 \n\t" // 16 byte allignment of the loop entry
|
||||
- ".L1014: \n\t" "movq (%%eax), %%mm1 \n\t" // load 8 bytes from Src1 into mm1
|
||||
+ "1: movq (%%eax), %%mm1 \n\t" // load 8 bytes from Src1 into mm1
|
||||
"movq (%%ebx), %%mm3 \n\t" // load 8 bytes from Src2 into mm3
|
||||
"movq %%mm1, %%mm2 \n\t" // copy mm1 into mm2
|
||||
"movq %%mm3, %%mm4 \n\t" // copy mm3 into mm4
|
||||
@@ -412,7 +412,7 @@
|
||||
"add $8, %%eax \n\t" // increase Src1, Src2 and Dest
|
||||
"add $8, %%ebx \n\t" // register pointers by 8
|
||||
"add $8, %%edi \n\t" "dec %%ecx \n\t" // decrease loop counter
|
||||
- "jnz .L1014 \n\t" // check loop termination, proceed if required
|
||||
+ "jnz 1b \n\t" // check loop termination, proceed if required
|
||||
"emms \n\t" // exit MMX state
|
||||
"popa \n\t":"=m" (Dest) // %0
|
||||
:"m"(Src2), // %1
|
||||
@@ -481,13 +481,13 @@
|
||||
"mov %0, %%edi \n\t" // load Dest address into edi
|
||||
"mov %3, %%ecx \n\t" // load loop counter (SIZE) into ecx
|
||||
".align 16 \n\t" // 16 byte allignment of the loop entry
|
||||
- ".L10141: \n\t" "mov (%%edx), %%al \n\t" // load a byte from Src1
|
||||
+ "1:mov (%%edx), %%al \n\t" // load a byte from Src1
|
||||
"mulb (%%esi) \n\t" // mul with a byte from Src2
|
||||
- ".L10142: \n\t" "mov %%al, (%%edi) \n\t" // move a byte result to Dest
|
||||
+ "mov %%al, (%%edi) \n\t" // move a byte result to Dest
|
||||
"inc %%edx \n\t" // increment Src1, Src2, Dest
|
||||
"inc %%esi \n\t" // pointer registers by one
|
||||
"inc %%edi \n\t" "dec %%ecx \n\t" // decrease loop counter
|
||||
- "jnz .L10141 \n\t" // check loop termination, proceed if required
|
||||
+ "jnz 1b \n\t" // check loop termination, proceed if required
|
||||
"popa \n\t":"=m" (Dest) // %0
|
||||
:"m"(Src2), // %1
|
||||
"m"(Src1), // %2
|
||||
@@ -549,7 +549,7 @@
|
||||
"shr $3, %%ecx \n\t" // counter/8 (MMX loads 8 bytes at a time)
|
||||
"pxor %%mm0, %%mm0 \n\t" // zero mm0 register
|
||||
".align 16 \n\t" // 16 byte allignment of the loop entry
|
||||
- ".L1015: \n\t" "movq (%%eax), %%mm1 \n\t" // load 8 bytes from Src1 into mm1
|
||||
+ "1: movq (%%eax), %%mm1 \n\t" // load 8 bytes from Src1 into mm1
|
||||
"movq (%%ebx), %%mm3 \n\t" // load 8 bytes from Src2 into mm3
|
||||
"movq %%mm1, %%mm2 \n\t" // copy mm1 into mm2
|
||||
"movq %%mm3, %%mm4 \n\t" // copy mm3 into mm4
|
||||
@@ -566,7 +566,7 @@
|
||||
"add $8, %%eax \n\t" // increase Src1, Src2 and Dest
|
||||
"add $8, %%ebx \n\t" // register pointers by 8
|
||||
"add $8, %%edi \n\t" "dec %%ecx \n\t" // decrease loop counter
|
||||
- "jnz .L1015 \n\t" // check loop termination, proceed if required
|
||||
+ "jnz 1b \n\t" // check loop termination, proceed if required
|
||||
"emms \n\t" // exit MMX state
|
||||
"popa \n\t":"=m" (Dest) // %0
|
||||
:"m"(Src2), // %1
|
||||
@@ -634,7 +634,7 @@
|
||||
"shr $3, %%ecx \n\t" // counter/8 (MMX loads 8 bytes at a time)
|
||||
"pxor %%mm0, %%mm0 \n\t" // zero mm0 register
|
||||
".align 16 \n\t" // 16 byte allignment of the loop entry
|
||||
- ".L1016: \n\t" "movq (%%eax), %%mm1 \n\t" // load 8 bytes from Src1 into mm1
|
||||
+ "1: movq (%%eax), %%mm1 \n\t" // load 8 bytes from Src1 into mm1
|
||||
"movq (%%ebx), %%mm3 \n\t" // load 8 bytes from Src2 into mm3
|
||||
"movq %%mm1, %%mm2 \n\t" // copy mm1 into mm2
|
||||
"movq %%mm3, %%mm4 \n\t" // copy mm3 into mm4
|
||||
@@ -653,7 +653,7 @@
|
||||
"add $8, %%eax \n\t" // increase Src1, Src2 and Dest
|
||||
"add $8, %%ebx \n\t" // register pointers by 8
|
||||
"add $8, %%edi \n\t" "dec %%ecx \n\t" // decrease loop counter
|
||||
- "jnz .L1016 \n\t" // check loop termination, proceed if required
|
||||
+ "jnz 1b \n\t" // check loop termination, proceed if required
|
||||
"emms \n\t" // exit MMX state
|
||||
"popa \n\t":"=m" (Dest) // %0
|
||||
:"m"(Src2), // %1
|
||||
@@ -720,13 +720,13 @@
|
||||
"mov %3, %%ecx \n\t" // load loop counter (SIZE) into ecx
|
||||
"shr $3, %%ecx \n\t" // counter/8 (MMX loads 8 bytes at a time)
|
||||
".align 16 \n\t" // 16 byte allignment of the loop entry
|
||||
- ".L1017: \n\t" "movq (%%eax), %%mm1 \n\t" // load 8 bytes from Src1 into mm1
|
||||
+ "1: movq (%%eax), %%mm1 \n\t" // load 8 bytes from Src1 into mm1
|
||||
"pand (%%ebx), %%mm1 \n\t" // mm1=Src1&Src2
|
||||
"movq %%mm1, (%%edi) \n\t" // store result in Dest
|
||||
"add $8, %%eax \n\t" // increase Src1, Src2 and Dest
|
||||
"add $8, %%ebx \n\t" // register pointers by 8
|
||||
"add $8, %%edi \n\t" "dec %%ecx \n\t" // decrease loop counter
|
||||
- "jnz .L1017 \n\t" // check loop termination, proceed if required
|
||||
+ "jnz 1b \n\t" // check loop termination, proceed if required
|
||||
"emms \n\t" // exit MMX state
|
||||
"popa \n\t":"=m" (Dest) // %0
|
||||
:"m"(Src2), // %1
|
||||
@@ -792,13 +792,13 @@
|
||||
"mov %3, %%ecx \n\t" // load loop counter (SIZE) into ecx
|
||||
"shr $3, %%ecx \n\t" // counter/8 (MMX loads 8 bytes at a time)
|
||||
".align 16 \n\t" // 16 byte allignment of the loop entry
|
||||
- ".L91017: \n\t" "movq (%%eax), %%mm1 \n\t" // load 8 bytes from Src1 into mm1
|
||||
+ "1: movq (%%eax), %%mm1 \n\t" // load 8 bytes from Src1 into mm1
|
||||
"por (%%ebx), %%mm1 \n\t" // mm1=Src1|Src2
|
||||
"movq %%mm1, (%%edi) \n\t" // store result in Dest
|
||||
"add $8, %%eax \n\t" // increase Src1, Src2 and Dest
|
||||
"add $8, %%ebx \n\t" // register pointers by 8
|
||||
"add $8, %%edi \n\t" "dec %%ecx \n\t" // decrease loop counter
|
||||
- "jnz .L91017 \n\t" // check loop termination, proceed if required
|
||||
+ "jnz 1b \n\t" // check loop termination, proceed if required
|
||||
"emms \n\t" // exit MMX state
|
||||
"popa \n\t":"=m" (Dest) // %0
|
||||
:"m"(Src2), // %1
|
||||
@@ -860,17 +860,17 @@
|
||||
"mov %0, %%edi \n\t" // load Dest address into edi
|
||||
"mov %3, %%ecx \n\t" // load loop counter (SIZE) into ecx
|
||||
".align 16 \n\t" // 16 byte allignment of the loop entry
|
||||
- ".L10191: \n\t" "mov (%%esi), %%bl \n\t" // load a byte from Src2
|
||||
+ "1: mov (%%esi), %%bl \n\t" // load a byte from Src2
|
||||
"cmp $0, %%bl \n\t" // check if it zero
|
||||
- "jnz .L10192 \n\t" "movb $255, (%%edi) \n\t" // division by zero = 255 !!!
|
||||
- "jmp .L10193 \n\t" ".L10192: \n\t" "xor %%ah, %%ah \n\t" // prepare AX, zero AH register
|
||||
+ "jnz 2f \n\t" "movb $255, (%%edi) \n\t" // division by zero = 255 !!!
|
||||
+ "jmp 3f \n\t" "2: \n\t" "xor %%ah, %%ah \n\t" // prepare AX, zero AH register
|
||||
"mov (%%edx), %%al \n\t" // load a byte from Src1 into AL
|
||||
"div %%bl \n\t" // divide AL by BL
|
||||
"mov %%al, (%%edi) \n\t" // move a byte result to Dest
|
||||
- ".L10193: \n\t" "inc %%edx \n\t" // increment Src1, Src2, Dest
|
||||
+ "3: inc %%edx \n\t" // increment Src1, Src2, Dest
|
||||
"inc %%esi \n\t" // pointer registers by one
|
||||
"inc %%edi \n\t" "dec %%ecx \n\t" // decrease loop counter
|
||||
- "jnz .L10191 \n\t" // check loop termination, proceed if required
|
||||
+ "jnz 1b \n\t" // check loop termination, proceed if required
|
||||
"popa \n\t":"=m" (Dest) // %0
|
||||
:"m"(Src2), // %1
|
||||
"m"(Src1), // %2
|
||||
@@ -907,12 +907,12 @@
|
||||
"mov %2, %%ecx \n\t" // load loop counter (SIZE) into ecx
|
||||
"shr $3, %%ecx \n\t" // counter/8 (MMX loads 8 bytes at a time)
|
||||
".align 16 \n\t" // 16 byte allignment of the loop entry
|
||||
- ".L91117: \n\t" "movq (%%eax), %%mm0 \n\t" // load 8 bytes from Src1 into mm1
|
||||
+ "1: movq (%%eax), %%mm0 \n\t" // load 8 bytes from Src1 into mm1
|
||||
"pxor %%mm1, %%mm0 \n\t" // negate mm0 by xoring with mm1
|
||||
"movq %%mm0, (%%edi) \n\t" // store result in Dest
|
||||
"add $8, %%eax \n\t" // increase Src1, Src2 and Dest
|
||||
"add $8, %%edi \n\t" "dec %%ecx \n\t" // decrease loop counter
|
||||
- "jnz .L91117 \n\t" // check loop termination, proceed if required
|
||||
+ "jnz 1b \n\t" // check loop termination, proceed if required
|
||||
"emms \n\t" // exit MMX state
|
||||
"popa \n\t":"=m" (Dest) // %0
|
||||
:"m"(Src1), // %1
|
||||
@@ -980,14 +980,14 @@
|
||||
"mov %2, %%ecx \n\t" // load loop counter (SIZE) into ecx
|
||||
"shr $3, %%ecx \n\t" // counter/8 (MMX loads 8 bytes at a time)
|
||||
".align 16 \n\t" // 16 byte allignment of the loop entry
|
||||
- ".L1021: \n\t"
|
||||
+ "1: \n\t"
|
||||
"movq (%%eax), %%mm0 \n\t" // load 8 bytes from Src1 into MM0
|
||||
"paddusb %%mm1, %%mm0 \n\t" // MM0=SrcDest+C (add 8 bytes with saturation)
|
||||
"movq %%mm0, (%%edi) \n\t" // store result in Dest
|
||||
"add $8, %%eax \n\t" // increase Dest register pointer by 8
|
||||
"add $8, %%edi \n\t" // increase Dest register pointer by 8
|
||||
"dec %%ecx \n\t" // decrease loop counter
|
||||
- "jnz .L1021 \n\t" // check loop termination, proceed if required
|
||||
+ "jnz 1b \n\t" // check loop termination, proceed if required
|
||||
"emms \n\t" // exit MMX state
|
||||
"popa \n\t":"=m" (Dest) // %0
|
||||
:"m"(Src1), // %1
|
||||
@@ -1059,14 +1059,14 @@
|
||||
"mov %2, %%ecx \n\t" // load loop counter (SIZE) into ecx
|
||||
"shr $3, %%ecx \n\t" // counter/8 (MMX loads 8 bytes at a time)
|
||||
".align 16 \n\t" // 16 byte allignment of the loop entry
|
||||
- ".L11023: \n\t"
|
||||
+ "1: \n\t"
|
||||
"movq (%%eax), %%mm0 \n\t" // load 8 bytes from SrcDest into MM0
|
||||
"paddusb %%mm1, %%mm0 \n\t" // MM0=SrcDest+C (add 8 bytes with saturation)
|
||||
"movq %%mm0, (%%edi) \n\t" // store result in SrcDest
|
||||
"add $8, %%eax \n\t" // increase Src1 register pointer by 8
|
||||
"add $8, %%edi \n\t" // increase Dest register pointer by 8
|
||||
"dec %%ecx \n\t" // decrease loop counter
|
||||
- "jnz .L11023 \n\t" // check loop termination, proceed if required
|
||||
+ "jnz 1b \n\t" // check loop termination, proceed if required
|
||||
"emms \n\t" // exit MMX state
|
||||
"popa \n\t":"=m" (Dest) // %0
|
||||
:"m"(Src1), // %1
|
||||
@@ -1154,7 +1154,7 @@
|
||||
"mov %2, %%ecx \n\t" // load loop counter (SIZE) into ecx
|
||||
"shr $3, %%ecx \n\t" // counter/8 (MMX loads 8 bytes at a time)
|
||||
".align 16 \n\t" // 16 byte allignment of the loop entry
|
||||
- ".L1022: \n\t"
|
||||
+ "1: \n\t"
|
||||
"movq (%%eax), %%mm2 \n\t" // load 8 bytes from Src1 into MM2
|
||||
"psrlw $1, %%mm2 \n\t" // shift 4 WORDS of MM2 1 bit to the right
|
||||
// "pand %%mm0, %%mm2 \n\t" // apply Mask to 8 BYTES of MM2
|
||||
@@ -1164,7 +1164,7 @@
|
||||
"add $8, %%eax \n\t" // increase Src1 register pointer by 8
|
||||
"add $8, %%edi \n\t" // increase Dest register pointer by 8
|
||||
"dec %%ecx \n\t" // decrease loop counter
|
||||
- "jnz .L1022 \n\t" // check loop termination, proceed if required
|
||||
+ "jnz 1b \n\t" // check loop termination, proceed if required
|
||||
"emms \n\t" // exit MMX state
|
||||
"popa \n\t":"=m" (Dest) // %0
|
||||
:"m"(Src1), // %1
|
||||
@@ -1243,13 +1243,13 @@
|
||||
"mov %2, %%ecx \n\t" // load loop counter (SIZE) into ecx
|
||||
"shr $3, %%ecx \n\t" // counter/8 (MMX loads 8 bytes at a time)
|
||||
".align 16 \n\t" // 16 byte allignment of the loop entry
|
||||
- ".L1023: \n\t" "movq (%%eax), %%mm0 \n\t" // load 8 bytes from SrcDest into MM0
|
||||
+ "1: movq (%%eax), %%mm0 \n\t" // load 8 bytes from SrcDest into MM0
|
||||
"psubusb %%mm1, %%mm0 \n\t" // MM0=SrcDest-C (sub 8 bytes with saturation)
|
||||
"movq %%mm0, (%%edi) \n\t" // store result in SrcDest
|
||||
"add $8, %%eax \n\t" // increase Src1 register pointer by 8
|
||||
"add $8, %%edi \n\t" // increase Dest register pointer by 8
|
||||
"dec %%ecx \n\t" // decrease loop counter
|
||||
- "jnz .L1023 \n\t" // check loop termination, proceed if required
|
||||
+ "jnz 1b \n\t" // check loop termination, proceed if required
|
||||
"emms \n\t" // exit MMX state
|
||||
"popa \n\t":"=m" (Dest) // %0
|
||||
:"m"(Src1), // %1
|
||||
@@ -1322,13 +1322,13 @@
|
||||
"mov %2, %%ecx \n\t" // load loop counter (SIZE) into ecx
|
||||
"shr $3, %%ecx \n\t" // counter/8 (MMX loads 8 bytes at a time)
|
||||
".align 16 \n\t" // 16 byte allignment of the loop entry
|
||||
- ".L11024: \n\t" "movq (%%eax), %%mm0 \n\t" // load 8 bytes from SrcDest into MM0
|
||||
+ "1: movq (%%eax), %%mm0 \n\t" // load 8 bytes from SrcDest into MM0
|
||||
"psubusb %%mm1, %%mm0 \n\t" // MM0=SrcDest-C (sub 8 bytes with saturation)
|
||||
"movq %%mm0, (%%edi) \n\t" // store result in SrcDest
|
||||
"add $8, %%eax \n\t" // increase Src1 register pointer by 8
|
||||
"add $8, %%edi \n\t" // increase Dest register pointer by 8
|
||||
"dec %%ecx \n\t" // decrease loop counter
|
||||
- "jnz .L11024 \n\t" // check loop termination, proceed if required
|
||||
+ "jnz 1b \n\t" // check loop termination, proceed if required
|
||||
"emms \n\t" // exit MMX state
|
||||
"popa \n\t":"=m" (Dest) // %0
|
||||
:"m"(Src1), // %1
|
||||
@@ -1405,19 +1405,19 @@
|
||||
"mov %3, %%cl \n\t" // load loop counter (N) into CL
|
||||
"movd %%ecx, %%mm3 \n\t" // copy (N) into MM3
|
||||
"pcmpeqb %%mm1, %%mm1 \n\t" // generate all 1's in mm1
|
||||
- ".L10240: \n\t" // ** Prepare proper bit-Mask in MM1 **
|
||||
+ "1: \n\t" // ** Prepare proper bit-Mask in MM1 **
|
||||
"psrlw $1, %%mm1 \n\t" // shift 4 WORDS of MM1 1 bit to the right
|
||||
// "pand %%mm0, %%mm1 \n\t" // apply Mask to 8 BYTES of MM1
|
||||
".byte 0x0f, 0xdb, 0xc8 \n\t"
|
||||
"dec %%cl \n\t" // decrease loop counter
|
||||
- "jnz .L10240 \n\t" // check loop termination, proceed if required
|
||||
+ "jnz 1b \n\t" // check loop termination, proceed if required
|
||||
// ** Shift all bytes of the image **
|
||||
"mov %1, %%eax \n\t" // load Src1 address into eax
|
||||
"mov %0, %%edi \n\t" // load Dest address into edi
|
||||
"mov %2, %%ecx \n\t" // load loop counter (SIZE) into ecx
|
||||
"shr $3, %%ecx \n\t" // counter/8 (MMX loads 8 bytes at a time)
|
||||
".align 16 \n\t" // 16 byte allignment of the loop entry
|
||||
- ".L10241: \n\t"
|
||||
+ "2: \n\t"
|
||||
"movq (%%eax), %%mm0 \n\t" // load 8 bytes from SrcDest into MM0
|
||||
"psrlw %%mm3, %%mm0 \n\t" // shift 4 WORDS of MM0 (N) bits to the right
|
||||
// "pand %%mm1, %%mm0 \n\t" // apply proper bit-Mask to 8 BYTES of MM0
|
||||
@@ -1426,7 +1426,7 @@
|
||||
"add $8, %%eax \n\t" // increase Src1 register pointer by 8
|
||||
"add $8, %%edi \n\t" // increase Dest register pointer by 8
|
||||
"dec %%ecx \n\t" // decrease loop counter
|
||||
- "jnz .L10241 \n\t" // check loop termination, proceed if required
|
||||
+ "jnz 2b \n\t" // check loop termination, proceed if required
|
||||
"emms \n\t" // exit MMX state
|
||||
"popa \n\t":"=m" (Dest) // %0
|
||||
:"m"(Src1), // %1
|
||||
@@ -1495,13 +1495,13 @@
|
||||
"mov %2, %%ecx \n\t" // load loop counter (SIZE) into ecx
|
||||
"shr $3, %%ecx \n\t" // counter/8 (MMX loads 8 bytes at a time)
|
||||
".align 16 \n\t" // 16 byte allignment of the loop entry
|
||||
- ".L13023: \n\t" "movq (%%eax), %%mm0 \n\t" // load 8 bytes from SrcDest into MM0
|
||||
+ "1: movq (%%eax), %%mm0 \n\t" // load 8 bytes from SrcDest into MM0
|
||||
"psrld %3, %%mm0 \n\t" // MM0=SrcDest+C (add 8 bytes with saturation)
|
||||
"movq %%mm0, (%%edi) \n\t" // store result in SrcDest
|
||||
"add $8, %%eax \n\t" // increase Src1 register pointer by 8
|
||||
"add $8, %%edi \n\t" // increase Dest register pointer by 8
|
||||
"dec %%ecx \n\t" // decrease loop counter
|
||||
- "jnz .L13023 \n\t" // check loop termination, proceed if required
|
||||
+ "jnz 1b \n\t" // check loop termination, proceed if required
|
||||
"emms \n\t" // exit MMX state
|
||||
"popa \n\t":"=m" (Dest) // %0
|
||||
:"m"(Src1), // %1
|
||||
@@ -1581,8 +1581,8 @@
|
||||
"mov %2, %%ecx \n\t" // load loop counter (SIZE) into ecx
|
||||
"shr $3, %%ecx \n\t" // counter/8 (MMX loads 8 bytes at a time)
|
||||
"cmp $128, %%al \n\t" // if (C <= 128) execute more efficient code
|
||||
- "jg .L10251 \n\t" ".align 16 \n\t" // 16 byte allignment of the loop entry
|
||||
- ".L10250: \n\t" "movq (%%eax), %%mm3 \n\t" // load 8 bytes from Src1 into MM3
|
||||
+ "jg 2f \n\t" ".align 16 \n\t" // 16 byte allignment of the loop entry
|
||||
+ "1: movq (%%eax), %%mm3 \n\t" // load 8 bytes from Src1 into MM3
|
||||
"movq %%mm3, %%mm4 \n\t" // copy MM3 into MM4
|
||||
"punpcklbw %%mm0, %%mm3 \n\t" // unpack low bytes of SrcDest into words
|
||||
"punpckhbw %%mm0, %%mm4 \n\t" // unpack high bytes of SrcDest into words
|
||||
@@ -1593,9 +1593,9 @@
|
||||
"add $8, %%eax \n\t" // increase Src1 register pointer by 8
|
||||
"add $8, %%edi \n\t" // increase Dest register pointer by 8
|
||||
"dec %%ecx \n\t" // decrease loop counter
|
||||
- "jnz .L10250 \n\t" // check loop termination, proceed if required
|
||||
- "jmp .L10252 \n\t" ".align 16 \n\t" // 16 byte allignment of the loop entry
|
||||
- ".L10251: \n\t" "movq (%%eax), %%mm3 \n\t" // load 8 bytes from Src1 into MM3
|
||||
+ "jnz 1b \n\t" // check loop termination, proceed if required
|
||||
+ "jmp 3f \n\t" ".align 16 \n\t" // 16 byte allignment of the loop entry
|
||||
+ "2: movq (%%eax), %%mm3 \n\t" // load 8 bytes from Src1 into MM3
|
||||
"movq %%mm3, %%mm4 \n\t" // copy MM3 into MM4
|
||||
"punpcklbw %%mm0, %%mm3 \n\t" // unpack low bytes of SrcDest into words
|
||||
"punpckhbw %%mm0, %%mm4 \n\t" // unpack high bytes of SrcDest into words
|
||||
@@ -1615,8 +1615,8 @@
|
||||
"add $8, %%eax \n\t" // increase Src1 register pointer by 8
|
||||
"add $8, %%edi \n\t" // increase Dest register pointer by 8
|
||||
"dec %%ecx \n\t" // decrease loop counter
|
||||
- "jnz .L10251 \n\t" // check loop termination, proceed if required
|
||||
- ".L10252: \n\t" "emms \n\t" // exit MMX state
|
||||
+ "jnz 2b \n\t" // check loop termination, proceed if required
|
||||
+ "3: emms \n\t" // exit MMX state
|
||||
"popa \n\t":"=m" (Dest) // %0
|
||||
:"m"(Src1), // %1
|
||||
"m"(length), // %2
|
||||
@@ -1695,8 +1695,8 @@
|
||||
"mov %0, %%edi \n\t" // load Dest address into edi
|
||||
"mov %2, %%ecx \n\t" // load loop counter (SIZE) into ecx
|
||||
"shr $3, %%ecx \n\t" // counter/8 (MMX loads 8 bytes at a time)
|
||||
- ".align 16 \n\t" // 16 byte allignment of the loop entry
|
||||
- ".L1026: \n\t" "movq (%%eax), %%mm3 \n\t" // load 8 bytes from Src1 into MM3
|
||||
+ ".align 16 \n\t" // 16 byte allignment of the loop entry
|
||||
+ "1: movq (%%eax), %%mm3 \n\t" // load 8 bytes from Src1 into MM3
|
||||
"movq %%mm3, %%mm4 \n\t" // copy MM3 into MM4
|
||||
"punpcklbw %%mm0, %%mm3 \n\t" // unpack low bytes of SrcDest into words
|
||||
"punpckhbw %%mm0, %%mm4 \n\t" // unpack high bytes of SrcDest into words
|
||||
@@ -1709,7 +1709,7 @@
|
||||
"add $8, %%eax \n\t" // increase Src1 register pointer by 8
|
||||
"add $8, %%edi \n\t" // increase Dest register pointer by 8
|
||||
"dec %%ecx \n\t" // decrease loop counter
|
||||
- "jnz .L1026 \n\t" // check loop termination, proceed if required
|
||||
+ "jnz 1b \n\t" // check loop termination, proceed if required
|
||||
"emms \n\t" // exit MMX state
|
||||
"popa \n\t":"=m" (Dest) // %0
|
||||
:"m"(Src1), // %1
|
||||
@@ -1784,25 +1784,25 @@
|
||||
"mov %3, %%cl \n\t" // load loop counter (N) into CL
|
||||
"movd %%ecx, %%mm3 \n\t" // copy (N) into MM3
|
||||
"pcmpeqb %%mm1, %%mm1 \n\t" // generate all 1's in mm1
|
||||
- ".L10270: \n\t" // ** Prepare proper bit-Mask in MM1 **
|
||||
+ "1: \n\t" // ** Prepare proper bit-Mask in MM1 **
|
||||
"psllw $1, %%mm1 \n\t" // shift 4 WORDS of MM1 1 bit to the left
|
||||
// "pand %%mm0, %%mm1 \n\t" // apply Mask to 8 BYTES of MM1
|
||||
".byte 0x0f, 0xdb, 0xc8 \n\t" "dec %%cl \n\t" // decrease loop counter
|
||||
- "jnz .L10270 \n\t" // check loop termination, proceed if required
|
||||
+ "jnz 1b \n\t" // check loop termination, proceed if required
|
||||
// ** Shift all bytes of the image **
|
||||
"mov %1, %%eax \n\t" // load Src1 address into eax
|
||||
"mov %0, %%edi \n\t" // load SrcDest address into edi
|
||||
"mov %2, %%ecx \n\t" // load loop counter (SIZE) into ecx
|
||||
"shr $3, %%ecx \n\t" // counter/8 (MMX loads 8 bytes at a time)
|
||||
".align 16 \n\t" // 16 byte allignment of the loop entry
|
||||
- ".L10271: \n\t" "movq (%%eax), %%mm0 \n\t" // load 8 bytes from Src1 into MM0
|
||||
+ "2: movq (%%eax), %%mm0 \n\t" // load 8 bytes from Src1 into MM0
|
||||
"psllw %%mm3, %%mm0 \n\t" // shift 4 WORDS of MM0 (N) bits to the left
|
||||
// "pand %%mm1, %%mm0 \n\t" // apply proper bit-Mask to 8 BYTES of MM0
|
||||
".byte 0x0f, 0xdb, 0xc1 \n\t" "movq %%mm0, (%%edi) \n\t" // store result in Dest
|
||||
"add $8, %%eax \n\t" // increase Src1 register pointer by 8
|
||||
"add $8, %%edi \n\t" // increase Dest register pointer by 8
|
||||
"dec %%ecx \n\t" // decrease loop counter
|
||||
- "jnz .L10271 \n\t" // check loop termination, proceed if required
|
||||
+ "jnz 2b \n\t" // check loop termination, proceed if required
|
||||
"emms \n\t" // exit MMX state
|
||||
"popa \n\t":"=m" (Dest) // %0
|
||||
:"m"(Src1), // %1
|
||||
@@ -1870,13 +1870,13 @@
|
||||
"mov %2, %%ecx \n\t" // load loop counter (SIZE) into ecx
|
||||
"shr $3, %%ecx \n\t" // counter/8 (MMX loads 8 bytes at a time)
|
||||
".align 16 \n\t" // 16 byte allignment of the loop entry
|
||||
- ".L12023: \n\t" "movq (%%eax), %%mm0 \n\t" // load 8 bytes from SrcDest into MM0
|
||||
+ "1: movq (%%eax), %%mm0 \n\t" // load 8 bytes from SrcDest into MM0
|
||||
"pslld %3, %%mm0 \n\t" // MM0=SrcDest+C (add 8 bytes with saturation)
|
||||
"movq %%mm0, (%%edi) \n\t" // store result in SrcDest
|
||||
"add $8, %%eax \n\t" // increase Src1 register pointer by 8
|
||||
"add $8, %%edi \n\t" // increase Dest register pointer by 8
|
||||
"dec %%ecx \n\t" // decrease loop counter
|
||||
- "jnz .L12023 \n\t" // check loop termination, proceed if required
|
||||
+ "jnz 1b \n\t" // check loop termination, proceed if required
|
||||
"emms \n\t" // exit MMX state
|
||||
"popa \n\t":"=m" (Dest) // %0
|
||||
:"m"(Src1), // %1
|
||||
@@ -1949,8 +1949,8 @@
|
||||
"mov %2, %%ecx \n\t" // load loop counter (SIZE) into ecx
|
||||
"shr $3, %%ecx \n\t" // counter/8 (MMX loads 8 bytes at a time)
|
||||
"cmp $7, %%al \n\t" // if (N <= 7) execute more efficient code
|
||||
- "jg .L10281 \n\t" ".align 16 \n\t" // 16 byte allignment of the loop entry
|
||||
- ".L10280: \n\t" "movq (%%eax), %%mm3 \n\t" // load 8 bytes from Src1 into MM3
|
||||
+ "jg 2f \n\t" ".align 16 \n\t" // 16 byte allignment of the loop entry
|
||||
+ "1: movq (%%eax), %%mm3 \n\t" // load 8 bytes from Src1 into MM3
|
||||
"movq %%mm3, %%mm4 \n\t" // copy MM3 into MM4
|
||||
"punpcklbw %%mm0, %%mm3 \n\t" // unpack low bytes of SrcDest into words
|
||||
"punpckhbw %%mm0, %%mm4 \n\t" // unpack high bytes of SrcDest into words
|
||||
@@ -1961,9 +1961,9 @@
|
||||
"add $8, %%eax \n\t" // increase Src1 register pointer by 8
|
||||
"add $8, %%edi \n\t" // increase Dest register pointer by 8
|
||||
"dec %%ecx \n\t" // decrease loop counter
|
||||
- "jnz .L10280 \n\t" // check loop termination, proceed if required
|
||||
- "jmp .L10282 \n\t" ".align 16 \n\t" // 16 byte allignment of the loop entry
|
||||
- ".L10281: \n\t" "movq (%%eax), %%mm3 \n\t" // load 8 bytes from Src1 into MM3
|
||||
+ "jnz 1b \n\t" // check loop termination, proceed if required
|
||||
+ "jmp 3f \n\t" ".align 16 \n\t" // 16 byte allignment of the loop entry
|
||||
+ "2: movq (%%eax), %%mm3 \n\t" // load 8 bytes from Src1 into MM3
|
||||
"movq %%mm3, %%mm4 \n\t" // copy MM3 into MM4
|
||||
"punpcklbw %%mm0, %%mm3 \n\t" // unpack low bytes of SrcDest into words
|
||||
"punpckhbw %%mm0, %%mm4 \n\t" // unpack high bytes of SrcDest into words
|
||||
@@ -1983,8 +1983,8 @@
|
||||
"add $8, %%eax \n\t" // increase Src1 register pointer by 8
|
||||
"add $8, %%edi \n\t" // increase Dest register pointer by 8
|
||||
"dec %%ecx \n\t" // decrease loop counter
|
||||
- "jnz .L10281 \n\t" // check loop termination, proceed if required
|
||||
- ".L10282: \n\t" "emms \n\t" // exit MMX state
|
||||
+ "jnz 2b \n\t" // check loop termination, proceed if required
|
||||
+ "3: emms \n\t" // exit MMX state
|
||||
"popa \n\t":"=m" (Dest) // %0
|
||||
:"m"(Src1), // %1
|
||||
"m"(length), // %2
|
||||
@@ -2063,7 +2063,7 @@
|
||||
"mov %2, %%ecx \n\t" // load loop counter (SIZE) into ecx
|
||||
"shr $3, %%ecx \n\t" // counter/8 (MMX loads 8 bytes at a time)
|
||||
".align 16 \n\t" // 16 byte alignment of the loop entry
|
||||
- ".L1029: \n\t"
|
||||
+ "1: \n\t"
|
||||
"movq (%%eax), %%mm0 \n\t" // load 8 bytes from SrcDest into MM0
|
||||
"paddusb %%mm2, %%mm0 \n\t" // MM0=SrcDest+(0xFF-T) (add 8 bytes with saturation)
|
||||
"pcmpeqb %%mm1, %%mm0 \n\t" // binarize 255:0, comparing to 255
|
||||
@@ -2071,7 +2071,7 @@
|
||||
"add $8, %%eax \n\t" // increase Src1 register pointer by 8
|
||||
"add $8, %%edi \n\t" // increase Dest register pointer by 8
|
||||
"dec %%ecx \n\t" // decrease loop counter
|
||||
- "jnz .L1029 \n\t" // check loop termination, proceed if required
|
||||
+ "jnz 1b \n\t" // check loop termination, proceed if required
|
||||
"emms \n\t" // exit MMX state
|
||||
"popa \n\t":"=m" (Dest) // %0
|
||||
:"m"(Src1), // %1
|
||||
@@ -2154,7 +2154,7 @@
|
||||
"mov %2, %%ecx \n\t" // load loop counter (SIZE) into ecx
|
||||
"shr $3, %%ecx \n\t" // counter/8 (MMX loads 8 bytes at a time)
|
||||
".align 16 \n\t" // 16 byte allignment of the loop entry
|
||||
- ".L1030: \n\t"
|
||||
+ "1: \n\t"
|
||||
"movq (%%eax), %%mm0 \n\t" // load 8 bytes from Src1 into MM0
|
||||
"paddusb %%mm1, %%mm0 \n\t" // MM0=SrcDest+(0xFF-Tmax)
|
||||
"psubusb %%mm7, %%mm0 \n\t" // MM0=MM0-(0xFF-Tmax+Tmin)
|
||||
@@ -2163,7 +2163,7 @@
|
||||
"add $8, %%eax \n\t" // increase Src1 register pointer by 8
|
||||
"add $8, %%edi \n\t" // increase Dest register pointer by 8
|
||||
"dec %%ecx \n\t" // decrease loop counter
|
||||
- "jnz .L1030 \n\t" // check loop termination, proceed if required
|
||||
+ "jnz 1b \n\t" // check loop termination, proceed if required
|
||||
"emms \n\t" // exit MMX state
|
||||
"popa \n\t":"=m" (Dest) // %0
|
||||
:"m"(Src1), // %1
|
||||
@@ -2231,11 +2231,11 @@
|
||||
"mov %4, %%bx \n\t" // load Cmax in BX
|
||||
"sub %5, %%ax \n\t" // AX = Nmax - Nmin
|
||||
"sub %3, %%bx \n\t" // BX = Cmax - Cmin
|
||||
- "jz .L10311 \n\t" // check division by zero
|
||||
+ "jz 1f \n\t" // check division by zero
|
||||
"xor %%dx, %%dx \n\t" // prepare for division, zero DX
|
||||
"div %%bx \n\t" // AX = AX/BX
|
||||
- "jmp .L10312 \n\t" ".L10311: \n\t" "mov $255, %%ax \n\t" // if div by zero, assume result max. byte value
|
||||
- ".L10312: \n\t" // ** Duplicate AX in 4 words of MM0 **
|
||||
+ "jmp 2f \n\t" "1: \n\t" "mov $255, %%ax \n\t" // if div by zero, assume result max. byte value
|
||||
+ "2: \n\t" // ** Duplicate AX in 4 words of MM0 **
|
||||
"mov %%ax, %%bx \n\t" // copy AX into BX
|
||||
"shl $16, %%eax \n\t" // shift 2 bytes of EAX left
|
||||
"mov %%bx, %%ax \n\t" // copy BX into AX
|
||||
@@ -2264,7 +2264,7 @@
|
||||
"mov %2, %%ecx \n\t" // load loop counter (SIZE) into ecx
|
||||
"shr $3, %%ecx \n\t" // counter/8 (MMX loads 8 bytes at a time)
|
||||
".align 16 \n\t" // 16 byte allignment of the loop entry
|
||||
- ".L1031: \n\t"
|
||||
+ "1: \n\t"
|
||||
"movq (%%eax), %%mm3 \n\t" // load 8 bytes from Src1 into MM3
|
||||
"movq %%mm3, %%mm4 \n\t" // copy MM3 into MM4
|
||||
"punpcklbw %%mm7, %%mm3 \n\t" // unpack low bytes of SrcDest into words
|
||||
@@ -2289,7 +2289,7 @@
|
||||
"add $8, %%eax \n\t" // increase Src1 register pointer by 8
|
||||
"add $8, %%edi \n\t" // increase Dest register pointer by 8
|
||||
"dec %%ecx \n\t" // decrease loop counter
|
||||
- "jnz .L1031 \n\t" // check loop termination, proceed if required
|
||||
+ "jnz 1b \n\t" // check loop termination, proceed if required
|
||||
"emms \n\t" // exit MMX state
|
||||
"popa \n\t":"=m" (Dest) // %0
|
||||
:"m"(Src1), // %1
|
Loading…
x
Reference in New Issue
Block a user