stdio-common: Don't read real input beyond the field width in scanf

Fix a code pattern that repeats across '__vfscanf_internal' where the
remaining field width of 0 is incorrectly interpreted as no width limit,
which in turn results in reading input beyond the limit requested.  The
lack of width limit is indicated by the field width of -1 rather than 0,
set earlier on in the function.

The problematic code pattern is used for both integer and floating-point
conversions, but in the former case a corresponding conditional earlier
on prevents the field width from being 0 when executing the pattern.  It
does trigger in the latter case, where the decimal point is a multibyte
character or for multibyte digit characters.

Fix the code pattern by using 'width > 0' comparison, and apply the fix
throughout even to code handling integer conversions so as to interpret
the field width consistently and avoid people's confusion even if width
cannot be 0 at those places.

For multibyte digit characters there is an additional issue that causes
code to push back a partially fetched multibyte character multiple times
as execution proceeds through matching data retrieved against individual
digits that have to be rejected due to the field width limit preventing
the rest of the multibyte character from being retrieved.  It is because
code relies on 'ungetc' ignoring a request to push back EOF, however in
the out-of-limit field width condition the data held is not EOF but the
previously retrieved character byte instead.

Fix this issue by artificially assigning EOF to the character byte
storage variable where the out-of-limit field width condition prevents
further processing, and also apply the fix throughout except for the
decimal point/thousands separator case, which uses different code.

Add test cases accordingly.

Reviewed-by: Adhemerval Zanella <adhemerval.zanella@linaro.org>
This commit is contained in:
Maciej W. Rozycki 2025-08-11 17:42:12 +01:00
parent 8543577b04
commit b692181703
4 changed files with 134 additions and 11 deletions

View File

@ -249,6 +249,8 @@ tests = \
tst-mbswcs4 \
tst-mbswcs5 \
tst-mbswcs6 \
tst-scanf-width-digit \
tst-scanf-width-point \
tst-setlocale \
tst-setlocale2 \
tst-setlocale3 \

View File

@ -0,0 +1,60 @@
/* Verify multibyte digit extending beyond scanf field width.
Copyright (C) 2025 Free Software Foundation, Inc.
This file is part of the GNU C Library.
The GNU C Library is free software; you can redistribute it and/or
modify it under the terms of the GNU Lesser General Public
License as published by the Free Software Foundation; either
version 2.1 of the License, or (at your option) any later version.
The GNU C Library is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
Lesser General Public License for more details.
You should have received a copy of the GNU Lesser General Public
License along with the GNU C Library; if not, see
<https://www.gnu.org/licenses/>. */
#include <locale.h>
#include <stdio.h>
#include <string.h>
#include <libc-diag.h>
#include <support/check.h>
#define P1 "\xdb\xb1"
#define P2 "\xdb\xb2"
static int
do_test (void)
{
if (setlocale (LC_ALL, "fa_IR.UTF-8") == NULL)
FAIL_EXIT1 ("setlocale (LC_ALL, \"fa_IR.UTF-8\")");
char s[] = P1 P2;
FILE *f = fmemopen (s, strlen (s), "r");
if (f == NULL)
FAIL_EXIT1 ("fmemopen: %m");
/* Avoid: "warning: 'I' flag used with '%f' gnu_scanf format [-Wformat=]";
cf. GCC PR c/119514. */
DIAG_PUSH_NEEDS_COMMENT;
DIAG_IGNORE_NEEDS_COMMENT (4.9, "-Wformat");
/* This should succeed parsing a floating-point number, and leave '\xdb',
'\xb2' in the input. */
double d;
int c;
TEST_VERIFY_EXIT (fscanf (f, "%I3lf%n", &d, &c) == 1);
TEST_VERIFY_EXIT (d == 1.0);
TEST_VERIFY_EXIT (c == 2);
TEST_VERIFY_EXIT (fgetc (f) == 0xdb);
TEST_VERIFY_EXIT (fgetc (f) == 0xb2);
DIAG_POP_NEEDS_COMMENT;
return 0;
}
#include <support/test-driver.c>

View File

@ -0,0 +1,52 @@
/* Verify multibyte decimal point extending beyond scanf field width.
Copyright (C) 2025 Free Software Foundation, Inc.
This file is part of the GNU C Library.
The GNU C Library is free software; you can redistribute it and/or
modify it under the terms of the GNU Lesser General Public
License as published by the Free Software Foundation; either
version 2.1 of the License, or (at your option) any later version.
The GNU C Library is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
Lesser General Public License for more details.
You should have received a copy of the GNU Lesser General Public
License along with the GNU C Library; if not, see
<https://www.gnu.org/licenses/>. */
#include <locale.h>
#include <stdio.h>
#include <string.h>
#include <libc-diag.h>
#include <support/check.h>
#define PD "\xd9\xab"
static int
do_test (void)
{
if (setlocale (LC_ALL, "ps_AF.UTF-8") == NULL)
FAIL_EXIT1 ("setlocale (LC_ALL, \"ps_AF.UTF-8\")");
char s[] = "1" PD;
FILE *f = fmemopen (s, strlen (s), "r");
if (f == NULL)
FAIL_EXIT1 ("fmemopen: %m");
/* This should succeed parsing a floating-point number, and leave '\xd9',
'\xab' in the input. */
double d;
int c;
TEST_VERIFY_EXIT (fscanf (f, "%2lf%n", &d, &c) == 1);
TEST_VERIFY_EXIT (d == 1.0);
TEST_VERIFY_EXIT (c == 1);
TEST_VERIFY_EXIT (fgetc (f) == 0xd9);
TEST_VERIFY_EXIT (fgetc (f) == 0xab);
return 0;
}
#include <support/test-driver.c>

View File

@ -119,6 +119,15 @@
(void) (c != EOF \
? ++read_in \
: (size_t) (inchar_errno = errno)), c))
/* Same as INCHAR, but stop upon field exhaustion according to AVAIL. */
# define inchar_in_field(avail) \
({ \
if (avail == 0) \
c = EOF; \
else \
inchar (); \
c; \
})
# define ISSPACE(Ch) __isspace_l (Ch, loc)
# define ISDIGIT(Ch) __isdigit_l (Ch, loc)
# define ISXDIGIT(Ch) __isxdigit_l (Ch, loc)
@ -1639,7 +1648,7 @@ __vfscanf_internal (FILE *s, const char *format, va_list argptr,
++wcdigits[n];
#else
const char *cmpp;
int avail = width > 0 ? width : INT_MAX;
int avail = width >= 0 ? width : INT_MAX;
if (__glibc_unlikely (map != NULL))
mbdigits[n] = digits_extended[n];
@ -1657,7 +1666,7 @@ __vfscanf_internal (FILE *s, const char *format, va_list argptr,
break;
else
{
if (avail == 0 || inchar () == EOF)
if (inchar_in_field (avail) == EOF)
break;
--avail;
}
@ -1701,7 +1710,7 @@ __vfscanf_internal (FILE *s, const char *format, va_list argptr,
++wcdigits[n];
#else
const char *cmpp;
int avail = width > 0 ? width : INT_MAX;
int avail = width >= 0 ? width : INT_MAX;
cmpp = mbdigits[n];
while ((unsigned char) *cmpp == c && avail >= 0)
@ -1710,7 +1719,7 @@ __vfscanf_internal (FILE *s, const char *format, va_list argptr,
break;
else
{
if (avail == 0 || inchar () == EOF)
if (inchar_in_field (avail) == EOF)
break;
--avail;
}
@ -1757,7 +1766,7 @@ __vfscanf_internal (FILE *s, const char *format, va_list argptr,
break;
#else
const char *cmpp = thousands;
int avail = width > 0 ? width : INT_MAX;
int avail = width >= 0 ? width : INT_MAX;
while ((unsigned char) *cmpp == c && avail >= 0)
{
@ -1766,7 +1775,7 @@ __vfscanf_internal (FILE *s, const char *format, va_list argptr,
break;
else
{
if (avail == 0 || inchar () == EOF)
if (inchar_in_field (avail) == EOF)
break;
--avail;
}
@ -1837,7 +1846,7 @@ digits_extended_fail:
break;
#else
const char *cmpp = thousands;
int avail = width > 0 ? width : INT_MAX;
int avail = width >= 0 ? width : INT_MAX;
while ((unsigned char) *cmpp == c && avail >= 0)
{
@ -1846,7 +1855,7 @@ digits_extended_fail:
break;
else
{
if (avail == 0 || inchar () == EOF)
if (inchar_in_field (avail) == EOF)
break;
--avail;
}
@ -2225,7 +2234,7 @@ digits_extended_fail:
}
#else
const char *cmpp = decimal;
int avail = width > 0 ? width : INT_MAX;
int avail = width >= 0 ? width : INT_MAX;
if (! got_dot)
{
@ -2463,14 +2472,14 @@ digits_extended_fail:
}
#else
const char *cmpp = mbdigits[n];
int avail = width > 0 ? width : INT_MAX;
int avail = width >= 0 ? width : INT_MAX;
while ((unsigned char) *cmpp == c && avail >= 0)
if (*++cmpp == '\0')
break;
else
{
if (avail == 0 || inchar () == EOF)
if (inchar_in_field (avail) == EOF)
break;
--avail;
}