x86: Fix bug in strchrnul-evex512 [BZ #32078]

Issue was we were expecting not matches with CHAR before the start of
the string in the page cross case.

The check code in the page cross case:
```
    and    $0xffffffffffffffc0,%rax
    vmovdqa64 (%rax),%zmm17
    vpcmpneqb %zmm17,%zmm16,%k1
    vptestmb %zmm17,%zmm17,%k0{%k1}
    kmovq  %k0,%rax
    inc    %rax
    shr    %cl,%rax
    je     L(continue)
```

expects that all characters that neither match null nor CHAR will be
1s in `rax` prior to the `inc`. Then the `inc` will overflow all of
the 1s where no relevant match was found.

This is incorrect in the page-cross case, as the
`vmovdqa64 (%rax),%zmm17` loads from before the start of the input
string.

If there are matches with CHAR before the start of the string, `rax`
won't properly overflow.

The fix is quite simple. Just replace:

```
    inc    %rax
    shr    %cl,%rax
```
With:
```
    sar    %cl,%rax
    inc    %rax
```

The arithmetic shift will clear any matches prior to the start of the
string while maintaining the signbit so the 1s can properly overflow
to zero in the case of no matches.
Reviewed-by: H.J. Lu <hjl.tools@gmail.com>

(cherry picked from commit 7da0886247)
This commit is contained in:
Noah Goldstein 2024-08-13 23:29:14 +08:00 committed by H.J. Lu
parent 059f82c3b9
commit c005d1bd6f
2 changed files with 68 additions and 5 deletions

View File

@ -248,6 +248,69 @@ check1 (void)
check_result (impl, s, c, exp_result);
}
static void
check2 (void)
{
CHAR *s = (CHAR *) (buf1 + getpagesize () - 4 * sizeof (CHAR));
CHAR *s_begin = (CHAR *) (buf1 + getpagesize () - 64);
#ifndef USE_FOR_STRCHRNUL
CHAR *exp_result = NULL;
#else
CHAR *exp_result = s + 1;
#endif
CHAR val = 0x12;
for (; s_begin != s; ++s_begin)
*s_begin = val;
s[0] = val + 1;
s[1] = 0;
s[2] = val + 1;
s[3] = val + 1;
{
FOR_EACH_IMPL (impl, 0)
check_result (impl, s, val, exp_result);
}
s[3] = val;
{
FOR_EACH_IMPL (impl, 0)
check_result (impl, s, val, exp_result);
}
exp_result = s;
s[0] = val;
{
FOR_EACH_IMPL (impl, 0)
check_result (impl, s, val, exp_result);
}
s[3] = val + 1;
{
FOR_EACH_IMPL (impl, 0)
check_result (impl, s, val, exp_result);
}
s[0] = val + 1;
s[1] = val + 1;
s[2] = val + 1;
s[3] = val + 1;
s[4] = val;
exp_result = s + 4;
{
FOR_EACH_IMPL (impl, 0)
check_result (impl, s, val, exp_result);
}
s[4] = 0;
#ifndef USE_FOR_STRCHRNUL
exp_result = NULL;
#else
exp_result = s + 4;
#endif
{
FOR_EACH_IMPL (impl, 0)
check_result (impl, s, val, exp_result);
}
}
int
test_main (void)
{
@ -256,7 +319,7 @@ test_main (void)
test_init ();
check1 ();
check2 ();
printf ("%20s", "");
FOR_EACH_IMPL (impl, 0)
printf ("\t%s", impl->name);

View File

@ -124,13 +124,13 @@ L(page_cross):
VPCMPNE %VMM(1), %VMM(0), %k1
VPTEST %VMM(1), %VMM(1), %k0{%k1}
KMOV %k0, %VRAX
# ifdef USE_AS_WCSCHR
sar %cl, %VRAX
#ifdef USE_AS_WCSCHR
sub $VEC_MATCH_MASK, %VRAX
# else
#else
inc %VRAX
# endif
#endif
/* Ignore number of character for alignment adjustment. */
shr %cl, %VRAX
jz L(align_more)
bsf %VRAX, %VRAX