stdio-common: Fix scanf parsing for NaN types [BZ #30647]

The scanf family of functions like sscanf and fscanf currently
ignore nan() and nan(n-char-sequence).  This happens because
__vfscanf_internal only checks for 'nan'.

This commit adds support for all valid nan types i.e.  nan, nan()
and nan(n-char-sequence), where n-char-sequence can be
[a-zA-Z0-9_]+, thus fixing the bug 30647.  Any other representation
of NaN should result in conversion error.

New tests are also added to verify the correct parsing of NaN types for
float, double and long double formats.

Signed-off-by: Avinal Kumar <avinal.xlvii@gmail.com>
Reviewed-by: Adhemerval Zanella  <adhemerval.zanella@linaro.org>
This commit is contained in:
Avinal Kumar 2024-10-25 15:48:27 +05:30 committed by Adhemerval Zanella
parent ac73067cb7
commit 04e8698fcc
3 changed files with 129 additions and 1 deletions

View File

@ -261,6 +261,7 @@ tests := \
tst-scanf-binary-gnu89 \ tst-scanf-binary-gnu89 \
tst-scanf-bz27650 \ tst-scanf-bz27650 \
tst-scanf-intn \ tst-scanf-intn \
tst-scanf-nan \
tst-scanf-round \ tst-scanf-round \
tst-scanf-to_inpunct \ tst-scanf-to_inpunct \
tst-setvbuf1 \ tst-setvbuf1 \

View File

@ -0,0 +1,83 @@
/* Test scanf formats for nan, nan(), nan(n-char-sequence) types.
Copyright The GNU Toolchain Authors.
This file is part of the GNU C Library.
The GNU C Library is free software; you can redistribute it and/or
modify it under the terms of the GNU Lesser General Public
License as published by the Free Software Foundation; either
version 2.1 of the License, or (at your option) any later version.
The GNU C Library is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
Lesser General Public License for more details.
You should have received a copy of the GNU Lesser General Public
License along with the GNU C Library; if not, see
<https://www.gnu.org/licenses/>. */
#include <stdint.h>
#include <stdio.h>
#include <support/check.h>
#define CHECK_SCANF_RET(OK, STR, FMT, ...) \
do \
{ \
int ret = sscanf (STR, FMT, __VA_ARGS__); \
TEST_VERIFY (ret == (OK)); \
} \
while (0)
/* Valid nan types:
1. nan
2. nan()
3. nan([a-zA-Z0-9_]+)
Any other nan format is invalid and should produce a conversion error.
The return value denotes the number of valid conversions. On conversion
error the rest of the input is discarded. */
static int
do_test (void)
{
int a;
float b;
double c;
long double d;
/* All valid inputs. */
CHECK_SCANF_RET (1, "nan", "%lf", &c);
CHECK_SCANF_RET (1, "nan()", "%lf", &c);
CHECK_SCANF_RET (1, "nan(12345)", "%lf", &c);
CHECK_SCANF_RET (2, "nan12", "%lf%d", &c, &a);
CHECK_SCANF_RET (2, "nan nan()", "%f%Lf", &b, &d);
CHECK_SCANF_RET (2, "nan nan(12345foo)", "%lf%Lf", &c, &d);
CHECK_SCANF_RET (3, "nan nan() 12.234", "%lf%Lf%f", &c, &d, &b);
CHECK_SCANF_RET (4, "nannan()nan(foo)1234", "%lf%f%Lf%d", &c, &b, &d, &a);
/* Partially valid inputs. */
CHECK_SCANF_RET (1, "nan( )", "%3lf", &c);
CHECK_SCANF_RET (1, "nan nan(", "%lf%f", &c, &b);
/* Invalid inputs. */
/* Dangling parentheses. */
CHECK_SCANF_RET (0, "nan(", "%lf", &c);
CHECK_SCANF_RET (0, "nan(123", "%lf", &c);
CHECK_SCANF_RET (0, "nan(12345", "%lf%d", &c, &a);
/* Field width is not sufficient for valid conversion. */
CHECK_SCANF_RET (0, "nan()", "%4Lf", &d);
CHECK_SCANF_RET (0, "nan(1", "%5lf", &c);
/* Space is not a valid character. */
CHECK_SCANF_RET (0, "nan( )", "%lf", &c);
CHECK_SCANF_RET (0, "nan( )12.34", "%Lf%f", &d, &b);
CHECK_SCANF_RET (0, "nan(12 foo)", "%f", &b);
/* Period '.' is not a valid character. */
CHECK_SCANF_RET (0, "nan(12.34) nan(FooBar)", "%lf%Lf", &c, &d);
return 0;
}
#include <support/test-driver.c>

View File

@ -2028,7 +2028,51 @@ digits_extended_fail:
if (width > 0) if (width > 0)
--width; --width;
char_buffer_add (&charbuf, c); char_buffer_add (&charbuf, c);
/* It is "nan". */ /* It is at least "nan". Now we check for nan() and
nan(n-char-sequence). */
if (width != 0 && inchar () != EOF)
{
if (c == L_('('))
{
if (width > 0)
--width;
char_buffer_add (&charbuf, c);
/* A '(' was observed, check for a closing ')', there
may or may not be a n-char-sequence in between. We
have to check the longest prefix until there is a
conversion error or closing parenthesis. */
do
{
if (__glibc_unlikely (width == 0
|| inchar () == EOF))
{
/* Conversion error because we ran out of
characters. */
conv_error ();
break;
}
if (!((c >= L_('0') && c <= L_('9'))
|| (c >= L_('A') && c <= L_('Z'))
|| (c >= L_('a') && c <= L_('z'))
|| c == L_('_') || c == L_(')')))
{
/* Invalid character was observed. Only valid
characters are [a-zA-Z0-9_] and ')'. */
conv_error ();
break;
}
if (width > 0)
--width;
char_buffer_add (&charbuf, c);
}
while (c != L_(')'));
/* The loop only exits successfully when ')' is the
last character. */
}
else
/* It is only 'nan'. */
ungetc (c, s);
}
goto scan_float; goto scan_float;
} }
else if (TOLOWER (c) == L_('i')) else if (TOLOWER (c) == L_('i'))