why comparing char varibles twice is faster than comparing short varibles once-Collection of common programming errors
I have thought one compare must be faster than two. But after my test, I found in debug mode short compare is a bit faster, and in release mode char compare is faster. And I want to know the true reason.
Following is the test code and test result. I wrote two simple function, func1 using two char compare, func2 using one short compare. The main funcion returns temporary return value to avoid compile optimization ignoring my test code. My compiler is GCC 4.7.2, CPU Intel(R) Xeon(R) CPU E5-2430 0 @ 2.20GHz (VM).
inline int func1(unsigned char word[2])
{
if (word[0] == 0xff && word[1] == 0xff)
return 1;
return 0;
}
inline int func2(unsigned char word[2])
{
if (*(unsigned short*)word == 0xffff)
return 1;
return 0;
}
int main()
{
int n_ret = 0;
for (int j = 0; j < 10000; ++j)
for (int i = 0; i < 70000; ++i)
n_ret += func2((unsigned char*)&i);
return n_ret;
}
Debug mode:
func1 func2
real 0m3.621s 0m3.586s
user 0m3.614s 0m3.579s
sys 0m0.001s 0m0.000s
Release mode:
func1 func2
real 0m0.833s 0m0.880s
user 0m0.831s 0m0.878s
sys 0m0.000s 0m0.002s
func1 edition’s assemblly code:
.cfi_startproc
movl $10000, %esi
xorl %eax, %eax
.p2align 4,,10
.p2align 3
.L6:
movl $1, %edx
xorl %ecx, %ecx
.p2align 4,,10
.p2align 3
.L8:
movl %edx, -24(%rsp)
addl $1, %edx
addl %ecx, %eax
cmpl $70001, %edx
je .L3
xorl %ecx, %ecx
cmpb $-1, -24(%rsp)
jne .L8
xorl %ecx, %ecx
cmpb $-1, -23(%rsp)
sete %cl
jmp .L8
.p2align 4,,10
.p2align 3
.L3:
subl $1, %esi
jne .L6
rep
ret
.cfi_endproc
func2 edition’s assemblly code:
.cfi_startproc
movl $10000, %esi
xorl %eax, %eax
.p2align 4,,10
.p2align 3
.L4:
movl $1, %edx
xorl %ecx, %ecx
jmp .L3
.p2align 4,,10
.p2align 3
.L7:
movzwl -24(%rsp), %ecx
.L3:
cmpw $-1, %cx
movl %edx, -24(%rsp)
sete %cl
addl $1, %edx
movzbl %cl, %ecx
addl %ecx, %eax
cmpl $70001, %edx
jne .L7
subl $1, %esi
jne .L4
rep
ret
.cfi_endproc