diff --git a/gen-locales.mk b/gen-locales.mk index 9c523d2a05..b005a72866 100644 --- a/gen-locales.mk +++ b/gen-locales.mk @@ -1,8 +1,19 @@ # defines target $(gen-locales) that generates the locales given in $(LOCALES) LOCALE_SRCS := $(shell echo "$(LOCALES)"|sed 's/\([^ .]*\)[^@ ]*\(@[^ ]*\)\?/\1\2/g') +# The CHARMAPS dependency handling must be able to process: +# 1. No character map e.g. eo, en_US +# 2. Character maps e.g. en_US.UTF-8 +# 3. Character maps with modifier e.g. tt_RU.UTF-8@iqtelif +# This complicates the processing slightly so we do it in multiple edits, +# the first captures the character map with the anchoring period while +# the rest of the edits remove the period to get a valid file or adjust +# the name to match the true name. CHARMAPS := $(shell echo "$(LOCALES)" | \ - sed -e 's/[^ .]*[.]\([^@ ]*\)\(@[^@ ]*\)*/\1/g' -e s/SJIS/SHIFT_JIS/g) + sed -e 's/\([^ .]*\)\([^@ ]*\)\(@[^@ ]*\)*/\2/g' \ + -e 's/^\./ /g' \ + -e 's/ \./ /g' \ + -e s/SJIS/SHIFT_JIS/g) CTYPE_FILES = $(addsuffix /LC_CTYPE,$(LOCALES)) gen-locales := $(addprefix $(common-objpfx)localedata/,$(CTYPE_FILES)) diff --git a/locale/findlocale.c b/locale/findlocale.c index 8d6e4e33e3..43ff7201c1 100644 --- a/locale/findlocale.c +++ b/locale/findlocale.c @@ -244,7 +244,14 @@ _nl_find_locale (const char *locale_path, size_t locale_path_len, locale_file = locale_file->successor[cnt]; if (locale_file == NULL) - return NULL; + { + /* If this is the second time we tried to load a failed + locale then the locale_file value comes from the cache + and we will not carry out any actual filesystem + operations so we must set ENOENT here. */ + __set_errno (ENOENT); + return NULL; + } } /* The LC_CTYPE category allows to check whether a locale is really @@ -291,8 +298,14 @@ _nl_find_locale (const char *locale_path, size_t locale_path_len, if (__gconv_compare_alias (upstr (ccodeset, ccodeset), upstr (clocale_codeset, clocale_codeset)) != 0) - /* The codesets are not identical, don't use the locale. */ - return NULL; + { + /* The codesets are not identical, don't use the locale. + If this is the second time we tried to load a locale + whose codeset doesn't match then the result came from + the cache and must set ENOENT here. */ + __set_errno (ENOENT); + return NULL; + } } /* Determine the locale name for which loading succeeded. This diff --git a/locale/loadlocale.c b/locale/loadlocale.c index 1e5f93e927..991c0591e9 100644 --- a/locale/loadlocale.c +++ b/locale/loadlocale.c @@ -237,7 +237,6 @@ _nl_load_locale (struct loaded_l10nfile *file, int category) int save_err; int alloc = ld_mapped; - file->decided = 1; file->data = NULL; fd = __open_nocancel (file->filename, O_RDONLY | O_CLOEXEC); @@ -345,6 +344,7 @@ _nl_load_locale (struct loaded_l10nfile *file, int category) newdata->alloc = alloc; file->data = newdata; + file->decided = 1; } void diff --git a/localedata/Makefile b/localedata/Makefile index 713e7aebad..6d0bac225b 100644 --- a/localedata/Makefile +++ b/localedata/Makefile @@ -239,6 +239,7 @@ tests = \ tst-iconv-emojis-trans \ tst-iconv-math-trans \ tst-leaks \ + tst-locale-loadlocale \ tst-mbswcs1 \ tst-mbswcs2 \ tst-mbswcs3 \ @@ -325,6 +326,7 @@ LOCALES := \ dsb_DE.UTF-8 \ dz_BT.UTF-8 \ en_GB.UTF-8 \ + en_US \ en_US.ANSI_X3.4-1968 \ en_US.ISO-8859-1\ en_US.UTF-8 \ @@ -406,6 +408,8 @@ include ../gen-locales.mk $(objpfx)tst-iconv-emojis-trans.out: $(gen-locales) $(objpfx)tst-iconv-math-trans.out: $(gen-locales) +# tst-locale-loadlocale: Needs an en_US-named locale for the test. +$(objpfx)tst-locale-loadlocale.out: $(gen-locales) endif include ../Rules diff --git a/localedata/gen-locale.sh b/localedata/gen-locale.sh index b8c422ad13..e400d02b82 100644 --- a/localedata/gen-locale.sh +++ b/localedata/gen-locale.sh @@ -48,9 +48,9 @@ generate_locale () } locfile=`echo $locfile|sed 's|.*/\([^/]*/LC_CTYPE\)|\1|'` -locale=`echo $locfile|sed 's|\([^.]*\)[.].*/LC_CTYPE|\1|'` -charmap=`echo $locfile|sed 's|[^.]*[.]\([^@ ]*\)\(@[^ ]*\)\?/LC_CTYPE|\1|'` -modifier=`echo $locfile|sed 's|[^.]*[.]\([^@ ]*\)\(@[^ ]*\)\?/LC_CTYPE|\2|'` +locale=`echo $locfile|sed 's|\([^.]*\).*/LC_CTYPE|\1|'` +charmap=`echo $locfile|sed -e 's|[^.]*\([^@ ]*\)\(@[^ ]*\)\?/LC_CTYPE|\1|' -e 's|^\.||g'` +modifier=`echo $locfile|sed 's|[^.]*\([^@ ]*\)\(@[^ ]*\)\?/LC_CTYPE|\2|'` echo "Generating locale $locale.$charmap: this might take a while..." @@ -63,10 +63,16 @@ echo "Generating locale $locale.$charmap: this might take a while..." # you to develop in-progress locales. flags="" +charmap_real="$charmap" + +# If no character map is specified then we fall back to UTF-8. +if [ -z "$charmap" ]; then + charmap_real="UTF-8" +fi + # For SJIS the charmap is SHIFT_JIS. We just want the locale to have # a slightly nicer name instead of using "*.SHIFT_SJIS", but that # means we need a mapping here. -charmap_real="$charmap" if [ "$charmap" = "SJIS" ]; then charmap_real="SHIFT_JIS" fi @@ -80,4 +86,12 @@ if [ "$charmap_real" = 'SHIFT_JIS' ] \ flags="$flags --no-warnings=ascii" fi -generate_locale $charmap_real $locale$modifier $locale.$charmap$modifier "$flags" +# If the character map is not specified then we output a locale +# with the just the name of the locale e.g. eo, en_US. This is +# used for test locales that act as fallbacks. +output_file="$locale.$charmap$modifier" +if [ -z "$charmap" ]; then + output_file="$locale" +fi + +generate_locale $charmap_real $locale$modifier $output_file "$flags" diff --git a/localedata/tst-locale-loadlocale.c b/localedata/tst-locale-loadlocale.c new file mode 100644 index 0000000000..64796e975d --- /dev/null +++ b/localedata/tst-locale-loadlocale.c @@ -0,0 +1,67 @@ +/* Test for locale loading error handling (Bug 14247) + Copyright (C) 2024 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + . */ + +#include +#include +#include +#include +#include + +static int +do_test (void) +{ + locale_t loc = NULL; + + /* We must load an en_US-based locale for the final test to fail. + The locale loading code will find the en_US locale already loaded + but the codesets won't match. */ + xsetlocale (LC_ALL, "en_US.UTF-8"); + + /* Call newlocale with an invalid locale. We expect the test system + does not have "invalidlocale" locale. */ + errno = 0; + loc = newlocale (LC_COLLATE_MASK | LC_CTYPE_MASK, "invalidlocale.utf8", 0); + TEST_VERIFY (loc == NULL && errno != 0); + + /* Call newlocale again with the same name. This triggers bug 14247 where + the second call fails to set errno correctly. */ + errno = 0; + loc = newlocale (LC_COLLATE_MASK | LC_CTYPE_MASK, "invalidlocale.utf8", 0); + TEST_VERIFY (loc == NULL && errno != 0); + + /* Next we attempt to load a locale that exists but whose codeset + does not match the codeset of the locale on the system. + This is more difficult to test but we rely on the fact that locale + processing will normalize the locale name and attempt to open + "en_US" with no codeset as a fallback and this will allow us to + compare a loaded "en_US" locale with a UTF-8 codeset to the + ficiticious "en_US.utf99" and get a codeset match failure. */ + errno = 0; + loc = newlocale (LC_COLLATE_MASK | LC_CTYPE_MASK, "en_US.utf99", 0); + TEST_VERIFY (loc == NULL && errno != 0); + + /* Call newlocale again with the same name. This triggers bug 14247 where + the second call fails to set errno correctly. */ + errno = 0; + loc = newlocale (LC_COLLATE_MASK | LC_CTYPE_MASK, "en_US.utf99", 0); + TEST_VERIFY (loc == NULL && errno != 0); + + return 0; +} + +#include