why comparing char varibles twice is faster than comparing short varibles once-Collection of common programming errors

I have thought one compare must be faster than two. But after my test, I found in debug mode short compare is a bit faster, and in release mode char compare is faster. And I want to know the true reason.

Following is the test code and test result. I wrote two simple function, func1 using two char compare, func2 using one short compare. The main funcion returns temporary return value to avoid compile optimization ignoring my test code. My compiler is GCC 4.7.2, CPU Intel(R) Xeon(R) CPU E5-2430 0 @ 2.20GHz (VM).

inline int func1(unsigned char word[2])
{
        if (word[0] == 0xff && word[1] == 0xff)
                return 1;
        return 0;
}


inline int func2(unsigned char word[2])
{
        if (*(unsigned short*)word == 0xffff)
                return 1;
        return 0;
}

int main()
{
        int n_ret = 0;
        for (int j = 0; j < 10000; ++j)
                for (int i = 0; i < 70000; ++i)
                        n_ret += func2((unsigned char*)&i);
        return n_ret;
}

Debug mode:

          func1      func2
real    0m3.621s    0m3.586s
user    0m3.614s    0m3.579s
sys     0m0.001s    0m0.000s

Release mode:

          func1      func2
real    0m0.833s    0m0.880s
user    0m0.831s    0m0.878s
sys     0m0.000s    0m0.002s

func1 edition’s assemblly code:

        .cfi_startproc
        movl    $10000, %esi
        xorl    %eax, %eax
        .p2align 4,,10
        .p2align 3
.L6:
        movl    $1, %edx
        xorl    %ecx, %ecx
        .p2align 4,,10
        .p2align 3
.L8:
        movl    %edx, -24(%rsp)
        addl    $1, %edx
        addl    %ecx, %eax
        cmpl    $70001, %edx
        je      .L3
        xorl    %ecx, %ecx
        cmpb    $-1, -24(%rsp)
        jne     .L8
        xorl    %ecx, %ecx
        cmpb    $-1, -23(%rsp)
        sete    %cl
        jmp     .L8
        .p2align 4,,10
        .p2align 3
.L3:
        subl    $1, %esi
        jne     .L6
        rep
        ret
        .cfi_endproc

func2 edition’s assemblly code:

        .cfi_startproc
        movl    $10000, %esi
        xorl    %eax, %eax
        .p2align 4,,10
        .p2align 3
.L4:
        movl    $1, %edx
        xorl    %ecx, %ecx
        jmp     .L3
        .p2align 4,,10
        .p2align 3
.L7:
        movzwl  -24(%rsp), %ecx
.L3:
        cmpw    $-1, %cx
        movl    %edx, -24(%rsp)
        sete    %cl
        addl    $1, %edx
        movzbl  %cl, %ecx
        addl    %ecx, %eax
        cmpl    $70001, %edx
        jne     .L7
        subl    $1, %esi
        jne     .L4
        rep
        ret
        .cfi_endproc