contrib/sdl_gfx/sdl_gfx-2.0.13-imageFilter-asm.fix.patch
2008-05-23 19:29:07 +10:00

549 lines
35 KiB
Diff

--- SDL_gfx-2.0.13.orig/SDL_imageFilter.c 2004-11-29 20:53:35.000000000 +0100
+++ SDL_gfx-2.0.13/SDL_imageFilter.c 2008-04-22 18:11:27.000000000 +0200
@@ -81,13 +81,13 @@
"mov %3, %%ecx \n\t" // load loop counter (SIZE) into ecx
"shr $3, %%ecx \n\t" // counter/8 (MMX loads 8 bytes at a time)
".align 16 \n\t" // 16 byte allignment of the loop entry
- ".L1010: \n\t" "movq (%%eax), %%mm1 \n\t" // load 8 bytes from Src1 into mm1
+ "1: movq (%%eax), %%mm1 \n\t" // load 8 bytes from Src1 into mm1
"paddusb (%%ebx), %%mm1 \n\t" // mm1=Src1+Src2 (add 8 bytes with saturation)
"movq %%mm1, (%%edi) \n\t" // store result in Dest
"add $8, %%eax \n\t" // increase Src1, Src2 and Dest
"add $8, %%ebx \n\t" // register pointers by 8
"add $8, %%edi \n\t" "dec %%ecx \n\t" // decrease loop counter
- "jnz .L1010 \n\t" // check loop termination, proceed if required
+ "jnz 1b \n\t" // check loop termination, proceed if required
"emms \n\t" // exit MMX state
"popa \n\t":"=m" (Dest) // %0
:"m"(Src2), // %1
@@ -158,7 +158,7 @@
"mov %3, %%ecx \n\t" // load loop counter (SIZE) into ecx
"shr $3, %%ecx \n\t" // counter/8 (MMX loads 8 bytes at a time)
".align 16 \n\t" // 16 byte allignment of the loop entry
- ".L21011: \n\t"
+ "1: \n\t"
"movq (%%eax), %%mm1 \n\t" // load 8 bytes from Src1 into mm1
"movq (%%ebx), %%mm2 \n\t" // load 8 bytes from Src2 into mm2
// --- Byte shift via Word shift ---
@@ -174,7 +174,7 @@
"add $8, %%ebx \n\t" // register pointers by 8
"add $8, %%edi \n\t"
"dec %%ecx \n\t" // decrease loop counter
- "jnz .L21011 \n\t" // check loop termination, proceed if required
+ "jnz 1b \n\t" // check loop termination, proceed if required
"emms \n\t" // exit MMX state
"popa \n\t":"=m" (Dest) // %0
:"m"(Src2), // %1
@@ -241,13 +241,13 @@
"mov %3, %%ecx \n\t" // load loop counter (SIZE) into ecx
"shr $3, %%ecx \n\t" // counter/8 (MMX loads 8 bytes at a time)
".align 16 \n\t" // 16 byte allignment of the loop entry
- ".L1012: \n\t" "movq (%%eax), %%mm1 \n\t" // load 8 bytes from Src1 into mm1
+ "1: movq (%%eax), %%mm1 \n\t" // load 8 bytes from Src1 into mm1
"psubusb (%%ebx), %%mm1 \n\t" // mm1=Src1-Src2 (sub 8 bytes with saturation)
"movq %%mm1, (%%edi) \n\t" // store result in Dest
"add $8, %%eax \n\t" // increase Src1, Src2 and Dest
"add $8, %%ebx \n\t" // register pointers by 8
"add $8, %%edi \n\t" "dec %%ecx \n\t" // decrease loop counter
- "jnz .L1012 \n\t" // check loop termination, proceed if required
+ "jnz 1b \n\t" // check loop termination, proceed if required
"emms \n\t" // exit MMX state
"popa \n\t":"=m" (Dest) // %0
:"m"(Src2), // %1
@@ -313,7 +313,7 @@
"mov %3, %%ecx \n\t" // load loop counter (SIZE) into ecx
"shr $3, %%ecx \n\t" // counter/8 (MMX loads 8 bytes at a time)
".align 16 \n\t" // 16 byte allignment of the loop entry
- ".L1013: \n\t" "movq (%%eax), %%mm1 \n\t" // load 8 bytes from Src1 into mm1
+ "1: movq (%%eax), %%mm1 \n\t" // load 8 bytes from Src1 into mm1
"movq (%%ebx), %%mm2 \n\t" // load 8 bytes from Src2 into mm2
"psubusb (%%ebx), %%mm1 \n\t" // mm1=Src1-Src2 (sub 8 bytes with saturation)
"psubusb (%%eax), %%mm2 \n\t" // mm2=Src2-Src1 (sub 8 bytes with saturation)
@@ -322,7 +322,7 @@
"add $8, %%eax \n\t" // increase Src1, Src2 and Dest
"add $8, %%ebx \n\t" // register pointers by 8
"add $8, %%edi \n\t" "dec %%ecx \n\t" // decrease loop counter
- "jnz .L1013 \n\t" // check loop termination, proceed if required
+ "jnz 1b \n\t" // check loop termination, proceed if required
"emms \n\t" // exit MMX state
"popa \n\t":"=m" (Dest) // %0
:"m"(Src2), // %1
@@ -388,7 +388,7 @@
"shr $3, %%ecx \n\t" // counter/8 (MMX loads 8 bytes at a time)
"pxor %%mm0, %%mm0 \n\t" // zero mm0 register
".align 16 \n\t" // 16 byte allignment of the loop entry
- ".L1014: \n\t" "movq (%%eax), %%mm1 \n\t" // load 8 bytes from Src1 into mm1
+ "1: movq (%%eax), %%mm1 \n\t" // load 8 bytes from Src1 into mm1
"movq (%%ebx), %%mm3 \n\t" // load 8 bytes from Src2 into mm3
"movq %%mm1, %%mm2 \n\t" // copy mm1 into mm2
"movq %%mm3, %%mm4 \n\t" // copy mm3 into mm4
@@ -412,7 +412,7 @@
"add $8, %%eax \n\t" // increase Src1, Src2 and Dest
"add $8, %%ebx \n\t" // register pointers by 8
"add $8, %%edi \n\t" "dec %%ecx \n\t" // decrease loop counter
- "jnz .L1014 \n\t" // check loop termination, proceed if required
+ "jnz 1b \n\t" // check loop termination, proceed if required
"emms \n\t" // exit MMX state
"popa \n\t":"=m" (Dest) // %0
:"m"(Src2), // %1
@@ -481,13 +481,13 @@
"mov %0, %%edi \n\t" // load Dest address into edi
"mov %3, %%ecx \n\t" // load loop counter (SIZE) into ecx
".align 16 \n\t" // 16 byte allignment of the loop entry
- ".L10141: \n\t" "mov (%%edx), %%al \n\t" // load a byte from Src1
+ "1:mov (%%edx), %%al \n\t" // load a byte from Src1
"mulb (%%esi) \n\t" // mul with a byte from Src2
- ".L10142: \n\t" "mov %%al, (%%edi) \n\t" // move a byte result to Dest
+ "mov %%al, (%%edi) \n\t" // move a byte result to Dest
"inc %%edx \n\t" // increment Src1, Src2, Dest
"inc %%esi \n\t" // pointer registers by one
"inc %%edi \n\t" "dec %%ecx \n\t" // decrease loop counter
- "jnz .L10141 \n\t" // check loop termination, proceed if required
+ "jnz 1b \n\t" // check loop termination, proceed if required
"popa \n\t":"=m" (Dest) // %0
:"m"(Src2), // %1
"m"(Src1), // %2
@@ -549,7 +549,7 @@
"shr $3, %%ecx \n\t" // counter/8 (MMX loads 8 bytes at a time)
"pxor %%mm0, %%mm0 \n\t" // zero mm0 register
".align 16 \n\t" // 16 byte allignment of the loop entry
- ".L1015: \n\t" "movq (%%eax), %%mm1 \n\t" // load 8 bytes from Src1 into mm1
+ "1: movq (%%eax), %%mm1 \n\t" // load 8 bytes from Src1 into mm1
"movq (%%ebx), %%mm3 \n\t" // load 8 bytes from Src2 into mm3
"movq %%mm1, %%mm2 \n\t" // copy mm1 into mm2
"movq %%mm3, %%mm4 \n\t" // copy mm3 into mm4
@@ -566,7 +566,7 @@
"add $8, %%eax \n\t" // increase Src1, Src2 and Dest
"add $8, %%ebx \n\t" // register pointers by 8
"add $8, %%edi \n\t" "dec %%ecx \n\t" // decrease loop counter
- "jnz .L1015 \n\t" // check loop termination, proceed if required
+ "jnz 1b \n\t" // check loop termination, proceed if required
"emms \n\t" // exit MMX state
"popa \n\t":"=m" (Dest) // %0
:"m"(Src2), // %1
@@ -634,7 +634,7 @@
"shr $3, %%ecx \n\t" // counter/8 (MMX loads 8 bytes at a time)
"pxor %%mm0, %%mm0 \n\t" // zero mm0 register
".align 16 \n\t" // 16 byte allignment of the loop entry
- ".L1016: \n\t" "movq (%%eax), %%mm1 \n\t" // load 8 bytes from Src1 into mm1
+ "1: movq (%%eax), %%mm1 \n\t" // load 8 bytes from Src1 into mm1
"movq (%%ebx), %%mm3 \n\t" // load 8 bytes from Src2 into mm3
"movq %%mm1, %%mm2 \n\t" // copy mm1 into mm2
"movq %%mm3, %%mm4 \n\t" // copy mm3 into mm4
@@ -653,7 +653,7 @@
"add $8, %%eax \n\t" // increase Src1, Src2 and Dest
"add $8, %%ebx \n\t" // register pointers by 8
"add $8, %%edi \n\t" "dec %%ecx \n\t" // decrease loop counter
- "jnz .L1016 \n\t" // check loop termination, proceed if required
+ "jnz 1b \n\t" // check loop termination, proceed if required
"emms \n\t" // exit MMX state
"popa \n\t":"=m" (Dest) // %0
:"m"(Src2), // %1
@@ -720,13 +720,13 @@
"mov %3, %%ecx \n\t" // load loop counter (SIZE) into ecx
"shr $3, %%ecx \n\t" // counter/8 (MMX loads 8 bytes at a time)
".align 16 \n\t" // 16 byte allignment of the loop entry
- ".L1017: \n\t" "movq (%%eax), %%mm1 \n\t" // load 8 bytes from Src1 into mm1
+ "1: movq (%%eax), %%mm1 \n\t" // load 8 bytes from Src1 into mm1
"pand (%%ebx), %%mm1 \n\t" // mm1=Src1&Src2
"movq %%mm1, (%%edi) \n\t" // store result in Dest
"add $8, %%eax \n\t" // increase Src1, Src2 and Dest
"add $8, %%ebx \n\t" // register pointers by 8
"add $8, %%edi \n\t" "dec %%ecx \n\t" // decrease loop counter
- "jnz .L1017 \n\t" // check loop termination, proceed if required
+ "jnz 1b \n\t" // check loop termination, proceed if required
"emms \n\t" // exit MMX state
"popa \n\t":"=m" (Dest) // %0
:"m"(Src2), // %1
@@ -792,13 +792,13 @@
"mov %3, %%ecx \n\t" // load loop counter (SIZE) into ecx
"shr $3, %%ecx \n\t" // counter/8 (MMX loads 8 bytes at a time)
".align 16 \n\t" // 16 byte allignment of the loop entry
- ".L91017: \n\t" "movq (%%eax), %%mm1 \n\t" // load 8 bytes from Src1 into mm1
+ "1: movq (%%eax), %%mm1 \n\t" // load 8 bytes from Src1 into mm1
"por (%%ebx), %%mm1 \n\t" // mm1=Src1|Src2
"movq %%mm1, (%%edi) \n\t" // store result in Dest
"add $8, %%eax \n\t" // increase Src1, Src2 and Dest
"add $8, %%ebx \n\t" // register pointers by 8
"add $8, %%edi \n\t" "dec %%ecx \n\t" // decrease loop counter
- "jnz .L91017 \n\t" // check loop termination, proceed if required
+ "jnz 1b \n\t" // check loop termination, proceed if required
"emms \n\t" // exit MMX state
"popa \n\t":"=m" (Dest) // %0
:"m"(Src2), // %1
@@ -860,17 +860,17 @@
"mov %0, %%edi \n\t" // load Dest address into edi
"mov %3, %%ecx \n\t" // load loop counter (SIZE) into ecx
".align 16 \n\t" // 16 byte allignment of the loop entry
- ".L10191: \n\t" "mov (%%esi), %%bl \n\t" // load a byte from Src2
+ "1: mov (%%esi), %%bl \n\t" // load a byte from Src2
"cmp $0, %%bl \n\t" // check if it zero
- "jnz .L10192 \n\t" "movb $255, (%%edi) \n\t" // division by zero = 255 !!!
- "jmp .L10193 \n\t" ".L10192: \n\t" "xor %%ah, %%ah \n\t" // prepare AX, zero AH register
+ "jnz 2f \n\t" "movb $255, (%%edi) \n\t" // division by zero = 255 !!!
+ "jmp 3f \n\t" "2: \n\t" "xor %%ah, %%ah \n\t" // prepare AX, zero AH register
"mov (%%edx), %%al \n\t" // load a byte from Src1 into AL
"div %%bl \n\t" // divide AL by BL
"mov %%al, (%%edi) \n\t" // move a byte result to Dest
- ".L10193: \n\t" "inc %%edx \n\t" // increment Src1, Src2, Dest
+ "3: inc %%edx \n\t" // increment Src1, Src2, Dest
"inc %%esi \n\t" // pointer registers by one
"inc %%edi \n\t" "dec %%ecx \n\t" // decrease loop counter
- "jnz .L10191 \n\t" // check loop termination, proceed if required
+ "jnz 1b \n\t" // check loop termination, proceed if required
"popa \n\t":"=m" (Dest) // %0
:"m"(Src2), // %1
"m"(Src1), // %2
@@ -907,12 +907,12 @@
"mov %2, %%ecx \n\t" // load loop counter (SIZE) into ecx
"shr $3, %%ecx \n\t" // counter/8 (MMX loads 8 bytes at a time)
".align 16 \n\t" // 16 byte allignment of the loop entry
- ".L91117: \n\t" "movq (%%eax), %%mm0 \n\t" // load 8 bytes from Src1 into mm1
+ "1: movq (%%eax), %%mm0 \n\t" // load 8 bytes from Src1 into mm1
"pxor %%mm1, %%mm0 \n\t" // negate mm0 by xoring with mm1
"movq %%mm0, (%%edi) \n\t" // store result in Dest
"add $8, %%eax \n\t" // increase Src1, Src2 and Dest
"add $8, %%edi \n\t" "dec %%ecx \n\t" // decrease loop counter
- "jnz .L91117 \n\t" // check loop termination, proceed if required
+ "jnz 1b \n\t" // check loop termination, proceed if required
"emms \n\t" // exit MMX state
"popa \n\t":"=m" (Dest) // %0
:"m"(Src1), // %1
@@ -980,14 +980,14 @@
"mov %2, %%ecx \n\t" // load loop counter (SIZE) into ecx
"shr $3, %%ecx \n\t" // counter/8 (MMX loads 8 bytes at a time)
".align 16 \n\t" // 16 byte allignment of the loop entry
- ".L1021: \n\t"
+ "1: \n\t"
"movq (%%eax), %%mm0 \n\t" // load 8 bytes from Src1 into MM0
"paddusb %%mm1, %%mm0 \n\t" // MM0=SrcDest+C (add 8 bytes with saturation)
"movq %%mm0, (%%edi) \n\t" // store result in Dest
"add $8, %%eax \n\t" // increase Dest register pointer by 8
"add $8, %%edi \n\t" // increase Dest register pointer by 8
"dec %%ecx \n\t" // decrease loop counter
- "jnz .L1021 \n\t" // check loop termination, proceed if required
+ "jnz 1b \n\t" // check loop termination, proceed if required
"emms \n\t" // exit MMX state
"popa \n\t":"=m" (Dest) // %0
:"m"(Src1), // %1
@@ -1059,14 +1059,14 @@
"mov %2, %%ecx \n\t" // load loop counter (SIZE) into ecx
"shr $3, %%ecx \n\t" // counter/8 (MMX loads 8 bytes at a time)
".align 16 \n\t" // 16 byte allignment of the loop entry
- ".L11023: \n\t"
+ "1: \n\t"
"movq (%%eax), %%mm0 \n\t" // load 8 bytes from SrcDest into MM0
"paddusb %%mm1, %%mm0 \n\t" // MM0=SrcDest+C (add 8 bytes with saturation)
"movq %%mm0, (%%edi) \n\t" // store result in SrcDest
"add $8, %%eax \n\t" // increase Src1 register pointer by 8
"add $8, %%edi \n\t" // increase Dest register pointer by 8
"dec %%ecx \n\t" // decrease loop counter
- "jnz .L11023 \n\t" // check loop termination, proceed if required
+ "jnz 1b \n\t" // check loop termination, proceed if required
"emms \n\t" // exit MMX state
"popa \n\t":"=m" (Dest) // %0
:"m"(Src1), // %1
@@ -1154,7 +1154,7 @@
"mov %2, %%ecx \n\t" // load loop counter (SIZE) into ecx
"shr $3, %%ecx \n\t" // counter/8 (MMX loads 8 bytes at a time)
".align 16 \n\t" // 16 byte allignment of the loop entry
- ".L1022: \n\t"
+ "1: \n\t"
"movq (%%eax), %%mm2 \n\t" // load 8 bytes from Src1 into MM2
"psrlw $1, %%mm2 \n\t" // shift 4 WORDS of MM2 1 bit to the right
// "pand %%mm0, %%mm2 \n\t" // apply Mask to 8 BYTES of MM2
@@ -1164,7 +1164,7 @@
"add $8, %%eax \n\t" // increase Src1 register pointer by 8
"add $8, %%edi \n\t" // increase Dest register pointer by 8
"dec %%ecx \n\t" // decrease loop counter
- "jnz .L1022 \n\t" // check loop termination, proceed if required
+ "jnz 1b \n\t" // check loop termination, proceed if required
"emms \n\t" // exit MMX state
"popa \n\t":"=m" (Dest) // %0
:"m"(Src1), // %1
@@ -1243,13 +1243,13 @@
"mov %2, %%ecx \n\t" // load loop counter (SIZE) into ecx
"shr $3, %%ecx \n\t" // counter/8 (MMX loads 8 bytes at a time)
".align 16 \n\t" // 16 byte allignment of the loop entry
- ".L1023: \n\t" "movq (%%eax), %%mm0 \n\t" // load 8 bytes from SrcDest into MM0
+ "1: movq (%%eax), %%mm0 \n\t" // load 8 bytes from SrcDest into MM0
"psubusb %%mm1, %%mm0 \n\t" // MM0=SrcDest-C (sub 8 bytes with saturation)
"movq %%mm0, (%%edi) \n\t" // store result in SrcDest
"add $8, %%eax \n\t" // increase Src1 register pointer by 8
"add $8, %%edi \n\t" // increase Dest register pointer by 8
"dec %%ecx \n\t" // decrease loop counter
- "jnz .L1023 \n\t" // check loop termination, proceed if required
+ "jnz 1b \n\t" // check loop termination, proceed if required
"emms \n\t" // exit MMX state
"popa \n\t":"=m" (Dest) // %0
:"m"(Src1), // %1
@@ -1322,13 +1322,13 @@
"mov %2, %%ecx \n\t" // load loop counter (SIZE) into ecx
"shr $3, %%ecx \n\t" // counter/8 (MMX loads 8 bytes at a time)
".align 16 \n\t" // 16 byte allignment of the loop entry
- ".L11024: \n\t" "movq (%%eax), %%mm0 \n\t" // load 8 bytes from SrcDest into MM0
+ "1: movq (%%eax), %%mm0 \n\t" // load 8 bytes from SrcDest into MM0
"psubusb %%mm1, %%mm0 \n\t" // MM0=SrcDest-C (sub 8 bytes with saturation)
"movq %%mm0, (%%edi) \n\t" // store result in SrcDest
"add $8, %%eax \n\t" // increase Src1 register pointer by 8
"add $8, %%edi \n\t" // increase Dest register pointer by 8
"dec %%ecx \n\t" // decrease loop counter
- "jnz .L11024 \n\t" // check loop termination, proceed if required
+ "jnz 1b \n\t" // check loop termination, proceed if required
"emms \n\t" // exit MMX state
"popa \n\t":"=m" (Dest) // %0
:"m"(Src1), // %1
@@ -1405,19 +1405,19 @@
"mov %3, %%cl \n\t" // load loop counter (N) into CL
"movd %%ecx, %%mm3 \n\t" // copy (N) into MM3
"pcmpeqb %%mm1, %%mm1 \n\t" // generate all 1's in mm1
- ".L10240: \n\t" // ** Prepare proper bit-Mask in MM1 **
+ "1: \n\t" // ** Prepare proper bit-Mask in MM1 **
"psrlw $1, %%mm1 \n\t" // shift 4 WORDS of MM1 1 bit to the right
// "pand %%mm0, %%mm1 \n\t" // apply Mask to 8 BYTES of MM1
".byte 0x0f, 0xdb, 0xc8 \n\t"
"dec %%cl \n\t" // decrease loop counter
- "jnz .L10240 \n\t" // check loop termination, proceed if required
+ "jnz 1b \n\t" // check loop termination, proceed if required
// ** Shift all bytes of the image **
"mov %1, %%eax \n\t" // load Src1 address into eax
"mov %0, %%edi \n\t" // load Dest address into edi
"mov %2, %%ecx \n\t" // load loop counter (SIZE) into ecx
"shr $3, %%ecx \n\t" // counter/8 (MMX loads 8 bytes at a time)
".align 16 \n\t" // 16 byte allignment of the loop entry
- ".L10241: \n\t"
+ "2: \n\t"
"movq (%%eax), %%mm0 \n\t" // load 8 bytes from SrcDest into MM0
"psrlw %%mm3, %%mm0 \n\t" // shift 4 WORDS of MM0 (N) bits to the right
// "pand %%mm1, %%mm0 \n\t" // apply proper bit-Mask to 8 BYTES of MM0
@@ -1426,7 +1426,7 @@
"add $8, %%eax \n\t" // increase Src1 register pointer by 8
"add $8, %%edi \n\t" // increase Dest register pointer by 8
"dec %%ecx \n\t" // decrease loop counter
- "jnz .L10241 \n\t" // check loop termination, proceed if required
+ "jnz 2b \n\t" // check loop termination, proceed if required
"emms \n\t" // exit MMX state
"popa \n\t":"=m" (Dest) // %0
:"m"(Src1), // %1
@@ -1495,13 +1495,13 @@
"mov %2, %%ecx \n\t" // load loop counter (SIZE) into ecx
"shr $3, %%ecx \n\t" // counter/8 (MMX loads 8 bytes at a time)
".align 16 \n\t" // 16 byte allignment of the loop entry
- ".L13023: \n\t" "movq (%%eax), %%mm0 \n\t" // load 8 bytes from SrcDest into MM0
+ "1: movq (%%eax), %%mm0 \n\t" // load 8 bytes from SrcDest into MM0
"psrld %3, %%mm0 \n\t" // MM0=SrcDest+C (add 8 bytes with saturation)
"movq %%mm0, (%%edi) \n\t" // store result in SrcDest
"add $8, %%eax \n\t" // increase Src1 register pointer by 8
"add $8, %%edi \n\t" // increase Dest register pointer by 8
"dec %%ecx \n\t" // decrease loop counter
- "jnz .L13023 \n\t" // check loop termination, proceed if required
+ "jnz 1b \n\t" // check loop termination, proceed if required
"emms \n\t" // exit MMX state
"popa \n\t":"=m" (Dest) // %0
:"m"(Src1), // %1
@@ -1581,8 +1581,8 @@
"mov %2, %%ecx \n\t" // load loop counter (SIZE) into ecx
"shr $3, %%ecx \n\t" // counter/8 (MMX loads 8 bytes at a time)
"cmp $128, %%al \n\t" // if (C <= 128) execute more efficient code
- "jg .L10251 \n\t" ".align 16 \n\t" // 16 byte allignment of the loop entry
- ".L10250: \n\t" "movq (%%eax), %%mm3 \n\t" // load 8 bytes from Src1 into MM3
+ "jg 2f \n\t" ".align 16 \n\t" // 16 byte allignment of the loop entry
+ "1: movq (%%eax), %%mm3 \n\t" // load 8 bytes from Src1 into MM3
"movq %%mm3, %%mm4 \n\t" // copy MM3 into MM4
"punpcklbw %%mm0, %%mm3 \n\t" // unpack low bytes of SrcDest into words
"punpckhbw %%mm0, %%mm4 \n\t" // unpack high bytes of SrcDest into words
@@ -1593,9 +1593,9 @@
"add $8, %%eax \n\t" // increase Src1 register pointer by 8
"add $8, %%edi \n\t" // increase Dest register pointer by 8
"dec %%ecx \n\t" // decrease loop counter
- "jnz .L10250 \n\t" // check loop termination, proceed if required
- "jmp .L10252 \n\t" ".align 16 \n\t" // 16 byte allignment of the loop entry
- ".L10251: \n\t" "movq (%%eax), %%mm3 \n\t" // load 8 bytes from Src1 into MM3
+ "jnz 1b \n\t" // check loop termination, proceed if required
+ "jmp 3f \n\t" ".align 16 \n\t" // 16 byte allignment of the loop entry
+ "2: movq (%%eax), %%mm3 \n\t" // load 8 bytes from Src1 into MM3
"movq %%mm3, %%mm4 \n\t" // copy MM3 into MM4
"punpcklbw %%mm0, %%mm3 \n\t" // unpack low bytes of SrcDest into words
"punpckhbw %%mm0, %%mm4 \n\t" // unpack high bytes of SrcDest into words
@@ -1615,8 +1615,8 @@
"add $8, %%eax \n\t" // increase Src1 register pointer by 8
"add $8, %%edi \n\t" // increase Dest register pointer by 8
"dec %%ecx \n\t" // decrease loop counter
- "jnz .L10251 \n\t" // check loop termination, proceed if required
- ".L10252: \n\t" "emms \n\t" // exit MMX state
+ "jnz 2b \n\t" // check loop termination, proceed if required
+ "3: emms \n\t" // exit MMX state
"popa \n\t":"=m" (Dest) // %0
:"m"(Src1), // %1
"m"(length), // %2
@@ -1695,8 +1695,8 @@
"mov %0, %%edi \n\t" // load Dest address into edi
"mov %2, %%ecx \n\t" // load loop counter (SIZE) into ecx
"shr $3, %%ecx \n\t" // counter/8 (MMX loads 8 bytes at a time)
- ".align 16 \n\t" // 16 byte allignment of the loop entry
- ".L1026: \n\t" "movq (%%eax), %%mm3 \n\t" // load 8 bytes from Src1 into MM3
+ ".align 16 \n\t" // 16 byte allignment of the loop entry
+ "1: movq (%%eax), %%mm3 \n\t" // load 8 bytes from Src1 into MM3
"movq %%mm3, %%mm4 \n\t" // copy MM3 into MM4
"punpcklbw %%mm0, %%mm3 \n\t" // unpack low bytes of SrcDest into words
"punpckhbw %%mm0, %%mm4 \n\t" // unpack high bytes of SrcDest into words
@@ -1709,7 +1709,7 @@
"add $8, %%eax \n\t" // increase Src1 register pointer by 8
"add $8, %%edi \n\t" // increase Dest register pointer by 8
"dec %%ecx \n\t" // decrease loop counter
- "jnz .L1026 \n\t" // check loop termination, proceed if required
+ "jnz 1b \n\t" // check loop termination, proceed if required
"emms \n\t" // exit MMX state
"popa \n\t":"=m" (Dest) // %0
:"m"(Src1), // %1
@@ -1784,25 +1784,25 @@
"mov %3, %%cl \n\t" // load loop counter (N) into CL
"movd %%ecx, %%mm3 \n\t" // copy (N) into MM3
"pcmpeqb %%mm1, %%mm1 \n\t" // generate all 1's in mm1
- ".L10270: \n\t" // ** Prepare proper bit-Mask in MM1 **
+ "1: \n\t" // ** Prepare proper bit-Mask in MM1 **
"psllw $1, %%mm1 \n\t" // shift 4 WORDS of MM1 1 bit to the left
// "pand %%mm0, %%mm1 \n\t" // apply Mask to 8 BYTES of MM1
".byte 0x0f, 0xdb, 0xc8 \n\t" "dec %%cl \n\t" // decrease loop counter
- "jnz .L10270 \n\t" // check loop termination, proceed if required
+ "jnz 1b \n\t" // check loop termination, proceed if required
// ** Shift all bytes of the image **
"mov %1, %%eax \n\t" // load Src1 address into eax
"mov %0, %%edi \n\t" // load SrcDest address into edi
"mov %2, %%ecx \n\t" // load loop counter (SIZE) into ecx
"shr $3, %%ecx \n\t" // counter/8 (MMX loads 8 bytes at a time)
".align 16 \n\t" // 16 byte allignment of the loop entry
- ".L10271: \n\t" "movq (%%eax), %%mm0 \n\t" // load 8 bytes from Src1 into MM0
+ "2: movq (%%eax), %%mm0 \n\t" // load 8 bytes from Src1 into MM0
"psllw %%mm3, %%mm0 \n\t" // shift 4 WORDS of MM0 (N) bits to the left
// "pand %%mm1, %%mm0 \n\t" // apply proper bit-Mask to 8 BYTES of MM0
".byte 0x0f, 0xdb, 0xc1 \n\t" "movq %%mm0, (%%edi) \n\t" // store result in Dest
"add $8, %%eax \n\t" // increase Src1 register pointer by 8
"add $8, %%edi \n\t" // increase Dest register pointer by 8
"dec %%ecx \n\t" // decrease loop counter
- "jnz .L10271 \n\t" // check loop termination, proceed if required
+ "jnz 2b \n\t" // check loop termination, proceed if required
"emms \n\t" // exit MMX state
"popa \n\t":"=m" (Dest) // %0
:"m"(Src1), // %1
@@ -1870,13 +1870,13 @@
"mov %2, %%ecx \n\t" // load loop counter (SIZE) into ecx
"shr $3, %%ecx \n\t" // counter/8 (MMX loads 8 bytes at a time)
".align 16 \n\t" // 16 byte allignment of the loop entry
- ".L12023: \n\t" "movq (%%eax), %%mm0 \n\t" // load 8 bytes from SrcDest into MM0
+ "1: movq (%%eax), %%mm0 \n\t" // load 8 bytes from SrcDest into MM0
"pslld %3, %%mm0 \n\t" // MM0=SrcDest+C (add 8 bytes with saturation)
"movq %%mm0, (%%edi) \n\t" // store result in SrcDest
"add $8, %%eax \n\t" // increase Src1 register pointer by 8
"add $8, %%edi \n\t" // increase Dest register pointer by 8
"dec %%ecx \n\t" // decrease loop counter
- "jnz .L12023 \n\t" // check loop termination, proceed if required
+ "jnz 1b \n\t" // check loop termination, proceed if required
"emms \n\t" // exit MMX state
"popa \n\t":"=m" (Dest) // %0
:"m"(Src1), // %1
@@ -1949,8 +1949,8 @@
"mov %2, %%ecx \n\t" // load loop counter (SIZE) into ecx
"shr $3, %%ecx \n\t" // counter/8 (MMX loads 8 bytes at a time)
"cmp $7, %%al \n\t" // if (N <= 7) execute more efficient code
- "jg .L10281 \n\t" ".align 16 \n\t" // 16 byte allignment of the loop entry
- ".L10280: \n\t" "movq (%%eax), %%mm3 \n\t" // load 8 bytes from Src1 into MM3
+ "jg 2f \n\t" ".align 16 \n\t" // 16 byte allignment of the loop entry
+ "1: movq (%%eax), %%mm3 \n\t" // load 8 bytes from Src1 into MM3
"movq %%mm3, %%mm4 \n\t" // copy MM3 into MM4
"punpcklbw %%mm0, %%mm3 \n\t" // unpack low bytes of SrcDest into words
"punpckhbw %%mm0, %%mm4 \n\t" // unpack high bytes of SrcDest into words
@@ -1961,9 +1961,9 @@
"add $8, %%eax \n\t" // increase Src1 register pointer by 8
"add $8, %%edi \n\t" // increase Dest register pointer by 8
"dec %%ecx \n\t" // decrease loop counter
- "jnz .L10280 \n\t" // check loop termination, proceed if required
- "jmp .L10282 \n\t" ".align 16 \n\t" // 16 byte allignment of the loop entry
- ".L10281: \n\t" "movq (%%eax), %%mm3 \n\t" // load 8 bytes from Src1 into MM3
+ "jnz 1b \n\t" // check loop termination, proceed if required
+ "jmp 3f \n\t" ".align 16 \n\t" // 16 byte allignment of the loop entry
+ "2: movq (%%eax), %%mm3 \n\t" // load 8 bytes from Src1 into MM3
"movq %%mm3, %%mm4 \n\t" // copy MM3 into MM4
"punpcklbw %%mm0, %%mm3 \n\t" // unpack low bytes of SrcDest into words
"punpckhbw %%mm0, %%mm4 \n\t" // unpack high bytes of SrcDest into words
@@ -1983,8 +1983,8 @@
"add $8, %%eax \n\t" // increase Src1 register pointer by 8
"add $8, %%edi \n\t" // increase Dest register pointer by 8
"dec %%ecx \n\t" // decrease loop counter
- "jnz .L10281 \n\t" // check loop termination, proceed if required
- ".L10282: \n\t" "emms \n\t" // exit MMX state
+ "jnz 2b \n\t" // check loop termination, proceed if required
+ "3: emms \n\t" // exit MMX state
"popa \n\t":"=m" (Dest) // %0
:"m"(Src1), // %1
"m"(length), // %2
@@ -2063,7 +2063,7 @@
"mov %2, %%ecx \n\t" // load loop counter (SIZE) into ecx
"shr $3, %%ecx \n\t" // counter/8 (MMX loads 8 bytes at a time)
".align 16 \n\t" // 16 byte alignment of the loop entry
- ".L1029: \n\t"
+ "1: \n\t"
"movq (%%eax), %%mm0 \n\t" // load 8 bytes from SrcDest into MM0
"paddusb %%mm2, %%mm0 \n\t" // MM0=SrcDest+(0xFF-T) (add 8 bytes with saturation)
"pcmpeqb %%mm1, %%mm0 \n\t" // binarize 255:0, comparing to 255
@@ -2071,7 +2071,7 @@
"add $8, %%eax \n\t" // increase Src1 register pointer by 8
"add $8, %%edi \n\t" // increase Dest register pointer by 8
"dec %%ecx \n\t" // decrease loop counter
- "jnz .L1029 \n\t" // check loop termination, proceed if required
+ "jnz 1b \n\t" // check loop termination, proceed if required
"emms \n\t" // exit MMX state
"popa \n\t":"=m" (Dest) // %0
:"m"(Src1), // %1
@@ -2154,7 +2154,7 @@
"mov %2, %%ecx \n\t" // load loop counter (SIZE) into ecx
"shr $3, %%ecx \n\t" // counter/8 (MMX loads 8 bytes at a time)
".align 16 \n\t" // 16 byte allignment of the loop entry
- ".L1030: \n\t"
+ "1: \n\t"
"movq (%%eax), %%mm0 \n\t" // load 8 bytes from Src1 into MM0
"paddusb %%mm1, %%mm0 \n\t" // MM0=SrcDest+(0xFF-Tmax)
"psubusb %%mm7, %%mm0 \n\t" // MM0=MM0-(0xFF-Tmax+Tmin)
@@ -2163,7 +2163,7 @@
"add $8, %%eax \n\t" // increase Src1 register pointer by 8
"add $8, %%edi \n\t" // increase Dest register pointer by 8
"dec %%ecx \n\t" // decrease loop counter
- "jnz .L1030 \n\t" // check loop termination, proceed if required
+ "jnz 1b \n\t" // check loop termination, proceed if required
"emms \n\t" // exit MMX state
"popa \n\t":"=m" (Dest) // %0
:"m"(Src1), // %1
@@ -2231,11 +2231,11 @@
"mov %4, %%bx \n\t" // load Cmax in BX
"sub %5, %%ax \n\t" // AX = Nmax - Nmin
"sub %3, %%bx \n\t" // BX = Cmax - Cmin
- "jz .L10311 \n\t" // check division by zero
+ "jz 1f \n\t" // check division by zero
"xor %%dx, %%dx \n\t" // prepare for division, zero DX
"div %%bx \n\t" // AX = AX/BX
- "jmp .L10312 \n\t" ".L10311: \n\t" "mov $255, %%ax \n\t" // if div by zero, assume result max. byte value
- ".L10312: \n\t" // ** Duplicate AX in 4 words of MM0 **
+ "jmp 2f \n\t" "1: \n\t" "mov $255, %%ax \n\t" // if div by zero, assume result max. byte value
+ "2: \n\t" // ** Duplicate AX in 4 words of MM0 **
"mov %%ax, %%bx \n\t" // copy AX into BX
"shl $16, %%eax \n\t" // shift 2 bytes of EAX left
"mov %%bx, %%ax \n\t" // copy BX into AX
@@ -2264,7 +2264,7 @@
"mov %2, %%ecx \n\t" // load loop counter (SIZE) into ecx
"shr $3, %%ecx \n\t" // counter/8 (MMX loads 8 bytes at a time)
".align 16 \n\t" // 16 byte allignment of the loop entry
- ".L1031: \n\t"
+ "1: \n\t"
"movq (%%eax), %%mm3 \n\t" // load 8 bytes from Src1 into MM3
"movq %%mm3, %%mm4 \n\t" // copy MM3 into MM4
"punpcklbw %%mm7, %%mm3 \n\t" // unpack low bytes of SrcDest into words
@@ -2289,7 +2289,7 @@
"add $8, %%eax \n\t" // increase Src1 register pointer by 8
"add $8, %%edi \n\t" // increase Dest register pointer by 8
"dec %%ecx \n\t" // decrease loop counter
- "jnz .L1031 \n\t" // check loop termination, proceed if required
+ "jnz 1b \n\t" // check loop termination, proceed if required
"emms \n\t" // exit MMX state
"popa \n\t":"=m" (Dest) // %0
:"m"(Src1), // %1