File setlocale-directory-traversal.patch of Package glibc.8004

2014-07-02  Florian Weimer  <fweimer@redhat.com>

	* manual/locale.texi (Locale Names): New section documenting
	locale name syntax.  Adjust menu and node chaining accordingly.
	(Choosing Locale): Reference Locale Names, Locale Categories.
	Mention setting LC_ALL=C.  Reflect that name syntax is now
	documented.
	(Locale Categories): New section title.  Reference Locale Names.
	LC_ALL is an environment variable, but not a category.
	(Setting the Locale): Remove "locale -a" invocation and LOCPATH
	description, now in Locale Name.  Reference that section.  Locale
	name syntax is now documented.

	[BZ #17137]
	* locale/findlocale.c (name_present, valid_locale_name): New
	functions.
	(_nl_find_locale): Use the loc_name variable to store name
	candidates.  Call name_present and valid_locale_name to check and
	validate locale names.  Return an error if the locale is invalid.

	* localedata/tst-setlocale3.c: New file.
	* localedata/Makefile (tests): Add tst-setlocale3.
	(tst-setlocale3-ENV): New variable.

	* locale/setlocale.c (setlocale): Use strdup for allocating
	composite name copy.

Index: glibc-2.19/locale/findlocale.c
===================================================================
--- glibc-2.19.orig/locale/findlocale.c
+++ glibc-2.19/locale/findlocale.c
@@ -17,6 +17,7 @@
    <http://www.gnu.org/licenses/>.  */
 
 #include <assert.h>
+#include <errno.h>
 #include <locale.h>
 #include <stdlib.h>
 #include <string.h>
@@ -57,6 +58,45 @@ struct loaded_l10nfile *_nl_locale_file_
 
 const char _nl_default_locale_path[] attribute_hidden = LOCALEDIR;
 
+/* Checks if the name is actually present, that is, not NULL and not
+   empty.  */
+static inline int
+name_present (const char *name)
+{
+  return name != NULL && name[0] != '\0';
+}
+
+/* Checks that the locale name neither extremely long, nor contains a
+   ".." path component (to prevent directory traversal).  */
+static inline int
+valid_locale_name (const char *name)
+{
+  /* Not set.  */
+  size_t namelen = strlen (name);
+  /* Name too long.  The limit is arbitrary and prevents stack overflow
+     issues later.  */
+  if (__glibc_unlikely (namelen > 255))
+    return 0;
+  /* Directory traversal attempt.  */
+  static const char slashdot[4] = {'/', '.', '.', '/'};
+  if (__glibc_unlikely (memmem (name, namelen,
+				slashdot, sizeof (slashdot)) != NULL))
+    return 0;
+  if (namelen == 2 && __glibc_unlikely (name[0] == '.' && name [1] == '.'))
+    return 0;
+  if (namelen >= 3
+      && __glibc_unlikely (((name[0] == '.'
+			     && name[1] == '.'
+			     && name[2] == '/')
+			    || (name[namelen - 3] == '/'
+				&& name[namelen - 2] == '.'
+				&& name[namelen - 1] == '.'))))
+    return 0;
+  /* If there is a slash in the name, it must start with one.  */
+  if (__glibc_unlikely (memchr (name, '/', namelen) != NULL) && name[0] != '/')
+    return 0;
+  return 1;
+}
 
 struct __locale_data *
 internal_function
@@ -65,7 +105,7 @@ _nl_find_locale (const char *locale_path
 {
   int mask;
   /* Name of the locale for this category.  */
-  char *loc_name;
+  char *loc_name = (char *) *name;
   const char *language;
   const char *modifier;
   const char *territory;
@@ -73,31 +113,39 @@ _nl_find_locale (const char *locale_path
   const char *normalized_codeset;
   struct loaded_l10nfile *locale_file;
 
-  if ((*name)[0] == '\0')
+  if (loc_name[0] == '\0')
     {
       /* The user decides which locale to use by setting environment
 	 variables.  */
-      *name = getenv ("LC_ALL");
-      if (*name == NULL || (*name)[0] == '\0')
-	*name = getenv (_nl_category_names.str
+      loc_name = getenv ("LC_ALL");
+      if (!name_present (loc_name))
+	loc_name = getenv (_nl_category_names.str
 			+ _nl_category_name_idxs[category]);
-      if (*name == NULL || (*name)[0] == '\0')
-	*name = getenv ("LANG");
+      if (!name_present (loc_name))
+	loc_name = getenv ("LANG");
+      if (!name_present (loc_name))
+	loc_name = (char *) _nl_C_name;
     }
 
-  if (*name == NULL || (*name)[0] == '\0'
-      || (__builtin_expect (__libc_enable_secure, 0)
-	  && strchr (*name, '/') != NULL))
-    *name = (char *) _nl_C_name;
+  /* We used to fall back to the C locale if the name contains a slash
+     character '/', but we now check for directory traversal in
+     valid_locale_name, so this is no longer necessary.  */
 
-  if (__builtin_expect (strcmp (*name, _nl_C_name), 1) == 0
-      || __builtin_expect (strcmp (*name, _nl_POSIX_name), 1) == 0)
+  if (__builtin_expect (strcmp (loc_name, _nl_C_name), 1) == 0
+      || __builtin_expect (strcmp (loc_name, _nl_POSIX_name), 1) == 0)
     {
       /* We need not load anything.  The needed data is contained in
 	 the library itself.  */
       *name = (char *) _nl_C_name;
       return _nl_C[category];
     }
+  else if (!valid_locale_name (loc_name))
+    {
+      __set_errno (EINVAL);
+      return NULL;
+    }
+
+  *name = loc_name;
 
   /* We really have to load some data.  First we try the archive,
      but only if there was no LOCPATH environment variable specified.  */
Index: glibc-2.19/locale/setlocale.c
===================================================================
--- glibc-2.19.orig/locale/setlocale.c
+++ glibc-2.19/locale/setlocale.c
@@ -272,6 +272,8 @@ setlocale (int category, const char *loc
 	 of entries of the form `CATEGORY=VALUE'.  */
       const char *newnames[__LC_LAST];
       struct __locale_data *newdata[__LC_LAST];
+      /* Copy of the locale argument, for in-place splitting.  */
+      char *locale_copy = NULL;
 
       /* Set all name pointers to the argument name.  */
       for (category = 0; category < __LC_LAST; ++category)
@@ -281,7 +283,13 @@ setlocale (int category, const char *loc
       if (__builtin_expect (strchr (locale, ';') != NULL, 0))
 	{
 	  /* This is a composite name.  Make a copy and split it up.  */
-	  char *np = strdupa (locale);
+	  locale_copy = strdup (locale);
+	  if (__glibc_unlikely (locale_copy == NULL))
+	    {
+	      __libc_rwlock_unlock (__libc_setlocale_lock);
+	      return NULL;
+	    }
+	  char *np = locale_copy;
 	  char *cp;
 	  int cnt;
 
@@ -299,6 +307,7 @@ setlocale (int category, const char *loc
 		{
 		error_return:
 		  __libc_rwlock_unlock (__libc_setlocale_lock);
+		  free (locale_copy);
 
 		  /* Bogus category name.  */
 		  ERROR_RETURN;
@@ -391,8 +400,9 @@ setlocale (int category, const char *loc
       /* Critical section left.  */
       __libc_rwlock_unlock (__libc_setlocale_lock);
 
-      /* Free the resources (the locale path variable).  */
+      /* Free the resources.  */
       free (locale_path);
+      free (locale_copy);
 
       return composite;
     }
Index: glibc-2.19/localedata/Makefile
===================================================================
--- glibc-2.19.orig/localedata/Makefile
+++ glibc-2.19/localedata/Makefile
@@ -77,7 +77,7 @@ locale_test_suite := tst_iswalnum tst_is
 
 tests = $(locale_test_suite) tst-digits tst-setlocale bug-iconv-trans \
 	tst-leaks tst-mbswcs6 tst-xlocale1 tst-xlocale2 bug-usesetlocale \
-	tst-strfmon1 tst-sscanf bug-setlocale1 tst-setlocale2
+	tst-strfmon1 tst-sscanf bug-setlocale1 tst-setlocale2 tst-setlocale3
 tests-static = bug-setlocale1-static
 tests += $(tests-static)
 ifeq (yes,$(build-shared))
@@ -292,6 +292,7 @@ bug-setlocale1-ARGS = -- $(host-test-pro
 bug-setlocale1-static-ENV = $(bug-setlocale1-ENV)
 bug-setlocale1-static-ARGS = $(bug-setlocale1-ARGS)
 tst-setlocale2-ENV = LOCPATH=$(common-objpfx)localedata
+tst-setlocale3-ENV = LOCPATH=$(common-objpfx)localedata
 
 $(objdir)/iconvdata/gconv-modules:
 	$(MAKE) -C ../iconvdata subdir=iconvdata $@
Index: glibc-2.19/localedata/tst-setlocale3.c
===================================================================
--- /dev/null
+++ glibc-2.19/localedata/tst-setlocale3.c
@@ -0,0 +1,203 @@
+/* Regression test for setlocale invalid environment variable handling.
+   Copyright (C) 2014 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, see
+   <http://www.gnu.org/licenses/>.  */
+
+#include <locale.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+
+/* The result of setlocale may be overwritten by subsequent calls, so
+   this wrapper makes a copy.  */
+static char *
+setlocale_copy (int category, const char *locale)
+{
+  const char *result = setlocale (category, locale);
+  if (result == NULL)
+    return NULL;
+  return strdup (result);
+}
+
+static char *de_locale;
+
+static void
+setlocale_fail (const char *envstring)
+{
+  setenv ("LC_CTYPE", envstring, 1);
+  if (setlocale (LC_CTYPE, "") != NULL)
+    {
+      printf ("unexpected setlocale success for \"%s\" locale\n", envstring);
+      exit (1);
+    }
+  const char *newloc = setlocale (LC_CTYPE, NULL);
+  if (strcmp (newloc, de_locale) != 0)
+    {
+      printf ("failed setlocale call \"%s\" changed locale to \"%s\"\n",
+	      envstring, newloc);
+      exit (1);
+    }
+}
+
+static void
+setlocale_success (const char *envstring)
+{
+  setenv ("LC_CTYPE", envstring, 1);
+  char *newloc = setlocale_copy (LC_CTYPE, "");
+  if (newloc == NULL)
+    {
+      printf ("setlocale for \"%s\": %m\n", envstring);
+      exit (1);
+    }
+  if (strcmp (newloc, de_locale) == 0)
+    {
+      printf ("setlocale with LC_CTYPE=\"%s\" left locale at \"%s\"\n",
+	      envstring, de_locale);
+      exit (1);
+    }
+  if (setlocale (LC_CTYPE, de_locale) == NULL)
+    {
+      printf ("restoring locale \"%s\" with LC_CTYPE=\"%s\": %m\n",
+	      de_locale, envstring);
+      exit (1);
+    }
+  char *newloc2 = setlocale_copy (LC_CTYPE, newloc);
+  if (newloc2 == NULL)
+    {
+      printf ("restoring locale \"%s\" following \"%s\": %m\n",
+	      newloc, envstring);
+      exit (1);
+    }
+  if (strcmp (newloc, newloc2) != 0)
+    {
+      printf ("representation of locale \"%s\" changed from \"%s\" to \"%s\"",
+	      envstring, newloc, newloc2);
+      exit (1);
+    }
+  free (newloc);
+  free (newloc2);
+
+  if (setlocale (LC_CTYPE, de_locale) == NULL)
+    {
+      printf ("restoring locale \"%s\" with LC_CTYPE=\"%s\": %m\n",
+	      de_locale, envstring);
+      exit (1);
+    }
+}
+
+/* Checks that a known-good locale still works if LC_ALL contains a
+   value which should be ignored.  */
+static void
+setlocale_ignore (const char *to_ignore)
+{
+  const char *fr_locale = "fr_FR.UTF-8";
+  setenv ("LC_CTYPE", fr_locale, 1);
+  char *expected_locale = setlocale_copy (LC_CTYPE, "");
+  if (expected_locale == NULL)
+    {
+      printf ("setlocale with LC_CTYPE=\"%s\" failed: %m\n", fr_locale);
+      exit (1);
+    }
+  if (setlocale (LC_CTYPE, de_locale) == NULL)
+    {
+      printf ("failed to restore locale: %m\n");
+      exit (1);
+    }
+  unsetenv ("LC_CTYPE");
+
+  setenv ("LC_ALL", to_ignore, 1);
+  setenv ("LC_CTYPE", fr_locale, 1);
+  const char *actual_locale = setlocale (LC_CTYPE, "");
+  if (actual_locale == NULL)
+    {
+      printf ("setlocale with LC_ALL, LC_CTYPE=\"%s\" failed: %m\n",
+	      fr_locale);
+      exit (1);
+    }
+  if (strcmp (actual_locale, expected_locale) != 0)
+    {
+      printf ("setlocale under LC_ALL failed: got \"%s\", expected \"%s\"\n",
+	      actual_locale, expected_locale);
+      exit (1);
+    }
+  unsetenv ("LC_CTYPE");
+  setlocale_success (fr_locale);
+  unsetenv ("LC_ALL");
+  free (expected_locale);
+}
+
+static int
+do_test (void)
+{
+  /* The glibc test harness sets this environment variable
+     uncondionally.  */
+  unsetenv ("LC_ALL");
+
+  de_locale = setlocale_copy (LC_CTYPE, "de_DE.UTF-8");
+  if (de_locale == NULL)
+    {
+      printf ("setlocale (LC_CTYPE, \"de_DE.UTF-8\"): %m\n");
+      return 1;
+    }
+  setlocale_success ("C");
+  setlocale_success ("en_US.UTF-8");
+  setlocale_success ("/en_US.UTF-8");
+  setlocale_success ("//en_US.UTF-8");
+  setlocale_ignore ("");
+
+  setlocale_fail ("does-not-exist");
+  setlocale_fail ("/");
+  setlocale_fail ("/../localedata/en_US.UTF-8");
+  setlocale_fail ("en_US.UTF-8/");
+  setlocale_fail ("en_US.UTF-8/..");
+  setlocale_fail ("en_US.UTF-8/../en_US.UTF-8");
+  setlocale_fail ("../localedata/en_US.UTF-8");
+  {
+    size_t large_length = 1024;
+    char *large_name = malloc (large_length + 1);
+    if (large_name == NULL)
+      {
+	puts ("malloc failure");
+	return 1;
+      }
+    memset (large_name, '/', large_length);
+    const char *suffix = "en_US.UTF-8";
+    strcpy (large_name + large_length - strlen (suffix), suffix);
+    setlocale_fail (large_name);
+    free (large_name);
+  }
+  {
+    size_t huge_length = 64 * 1024 * 1024;
+    char *huge_name = malloc (huge_length + 1);
+    if (huge_name == NULL)
+      {
+	puts ("malloc failure");
+	return 1;
+      }
+    memset (huge_name, 'X', huge_length);
+    huge_name[huge_length] = '\0';
+    /* Construct a composite locale specification. */
+    const char *prefix = "LC_CTYPE=de_DE.UTF-8;LC_TIME=";
+    memcpy (huge_name, prefix, strlen (prefix));
+    setlocale_fail (huge_name);
+    free (huge_name);
+  }
+
+  return 0;
+}
+
+#define TEST_FUNCTION do_test ()
+#include "../test-skeleton.c"
Index: glibc-2.19/manual/locale.texi
===================================================================
--- glibc-2.19.orig/manual/locale.texi
+++ glibc-2.19/manual/locale.texi
@@ -29,6 +29,7 @@ will follow the conventions preferred by
 * Setting the Locale::          How a program specifies the locale
                                  with library functions.
 * Standard Locales::            Locale names available on all systems.
+* Locale Names::                Format of system-specific locale names.
 * Locale Information::          How to access the information for the locale.
 * Formatting Numbers::          A dedicated function to format numbers.
 * Yes-or-No Questions::         Check a Response against the locale.
@@ -99,14 +100,16 @@ locale named @samp{espana-castellano} to
 most of Spain.
 
 The set of locales supported depends on the operating system you are
-using, and so do their names.  We can't make any promises about what
-locales will exist, except for one standard locale called @samp{C} or
-@samp{POSIX}.  Later we will describe how to construct locales.
-@comment (@pxref{Building Locale Files}).
+using, and so do their names, except that the standard locale called
+@samp{C} or @samp{POSIX} always exist.  @xref{Locale Names}.
+
+In order to force the system to always use the default locale, the
+user can set the @code{LC_ALL} environment variable to @samp{C}.
 
 @cindex combining locales
-A user also has the option of specifying different locales for different
-purposes---in effect, choosing a mixture of multiple locales.
+A user also has the option of specifying different locales for
+different purposes---in effect, choosing a mixture of multiple
+locales.  @xref{Locale Categories}.
 
 For example, the user might specify the locale @samp{espana-castellano}
 for most purposes, but specify the locale @samp{usa-english} for
@@ -120,7 +123,7 @@ which locales apply.  However, the user
 for a particular subset of those purposes.
 
 @node Locale Categories, Setting the Locale, Choosing Locale, Locales
-@section Categories of Activities that Locales Affect
+@section Locale Categories
 @cindex categories for locales
 @cindex locale categories
 
@@ -128,7 +131,11 @@ The purposes that locales serve are grou
 that a user or a program can choose the locale for each category
 independently.  Here is a table of categories; each name is both an
 environment variable that a user can set, and a macro name that you can
-use as an argument to @code{setlocale}.
+use as the first argument to @code{setlocale}.
+
+The contents of the environment variable (or the string in the second
+argument to @code{setlocale}) has to be a valid locale name.
+@xref{Locale Names}.
 
 @vtable @code
 @comment locale.h
@@ -172,7 +179,7 @@ for affirmative and negative responses.
 @comment locale.h
 @comment ISO
 @item LC_ALL
-This is not an environment variable; it is only a macro that you can use
+This is not a category; it is only a macro that you can use
 with @code{setlocale} to set a single locale for all purposes.  Setting
 this environment variable overwrites all selections by the other
 @code{LC_*} variables or @code{LANG}.
@@ -355,13 +362,7 @@ The symbols in this section are defined
 @c   strndup @ascuheap @acsmem
 @c   strcasecmp_l ok (C locale)
 The function @code{setlocale} sets the current locale for category
-@var{category} to @var{locale}.  A list of all the locales the system
-provides can be created by running
-
-@pindex locale
-@smallexample
-  locale -a
-@end smallexample
+@var{category} to @var{locale}.
 
 If @var{category} is @code{LC_ALL}, this specifies the locale for all
 purposes.  The other possible values of @var{category} specify an
@@ -386,10 +387,9 @@ is passed in as @var{locale} parameter.
 
 When you read the current locale for category @code{LC_ALL}, the value
 encodes the entire combination of selected locales for all categories.
-In this case, the value is not just a single locale name.  In fact, we
-don't make any promises about what it looks like.  But if you specify
-the same ``locale name'' with @code{LC_ALL} in a subsequent call to
-@code{setlocale}, it restores the same combination of locale selections.
+If you specify the same ``locale name'' with @code{LC_ALL} in a
+subsequent call to @code{setlocale}, it restores the same combination
+of locale selections.
 
 To be sure you can use the returned string encoding the currently selected
 locale at a later time, you must make a copy of the string.  It is not
@@ -405,20 +405,15 @@ for @var{category}.
 If a nonempty string is given for @var{locale}, then the locale of that
 name is used if possible.
 
+The effective locale name (either the second argument to
+@code{setlocale}, or if the argument is an empty string, the name
+obtained from the process environment) must be valid locale name.
+@xref{Locale Names}.
+
 If you specify an invalid locale name, @code{setlocale} returns a null
 pointer and leaves the current locale unchanged.
 @end deftypefun
 
-The path used for finding locale data can be set using the
-@code{LOCPATH} environment variable. The default path for finding
-locale data is system specific.  It is computed from the value given
-as the prefix while configuring the C library.  This value normally is
-@file{/usr} or @file{/}.  For the former the complete path is:
-
-@smallexample
-/usr/lib/locale
-@end smallexample
-
 Here is an example showing how you might use @code{setlocale} to
 temporarily switch to a new locale.
 
@@ -458,7 +453,7 @@ locale categories, and future versions o
 portability, assume that any symbol beginning with @samp{LC_} might be
 defined in @file{locale.h}.
 
-@node Standard Locales, Locale Information, Setting the Locale, Locales
+@node Standard Locales, Locale Names, Setting the Locale, Locales
 @section Standard Locales
 
 The only locale names you can count on finding on all operating systems
@@ -492,7 +487,94 @@ with the environment, rather than trying
 locale explicitly by name.  Remember, different machines might have
 different sets of locales installed.
 
-@node Locale Information, Formatting Numbers, Standard Locales, Locales
+@node Locale Names, Locale Information, Standard Locales, Locales
+@section Locale Names
+
+The following command prints a list of locales supported by the
+system:
+
+@pindex locale
+@smallexample
+  locale -a
+@end smallexample
+
+@strong{Portability Note:} With the notable exception of the standard
+locale names @samp{C} and @samp{POSIX}, locale names are
+system-specific.
+
+Most locale names follow XPG syntax and consist of up to four parts:
+
+@smallexample
+@var{language}[_@var{territory}[.@var{codeset}]][@@@var{modifier}]
+@end smallexample
+
+Beside the first part, all of them are allowed to be missing.  If the
+full specified locale is not found, less specific ones are looked for.
+The various parts will be stripped off, in the following order:
+
+@enumerate
+@item
+codeset
+@item
+normalized codeset
+@item
+territory
+@item
+modifier
+@end enumerate
+
+For example, the locale name @samp{de_AT.iso885915@@euro} denotes a
+German-language locale for use in Austria, using the ISO-8859-15
+(Latin-9) character set, and with the Euro as the currency symbol.
+
+In addition to locale names which follow XPG syntax, systems may
+provide aliases such as @samp{german}.  Both categories of names must
+not contain the slash character @samp{/}.
+
+If the locale name starts with a slash @samp{/}, it is treated as a
+path relative to the configured locale directories; see @code{LOCPATH}
+below.  The specified path must not contain a component @samp{..}, or
+the name is invalid, and @code{setlocale} will fail.
+
+@strong{Portability Note:} POSIX suggests that if a locale name starts
+with a slash @samp{/}, it is resolved as an absolute path.  However,
+@theglibc{} treats it as a relative path under the directories listed
+in @code{LOCPATH} (or the default locale directory if @code{LOCPATH}
+is unset).
+
+Locale names which are longer than an implementation-defined limit are
+invalid and cause @code{setlocale} to fail.
+
+As a special case, locale names used with @code{LC_ALL} can combine
+several locales, reflecting different locale settings for different
+categories.  For example, you might want to use a U.S. locale with ISO
+A4 paper format, so you set @code{LANG} to @samp{en_US.UTF-8}, and
+@code{LC_PAPER} to @samp{de_DE.UTF-8}.  In this case, the
+@code{LC_ALL}-style combined locale name is
+
+@smallexample
+LC_CTYPE=en_US.UTF-8;LC_TIME=en_US.UTF-8;LC_PAPER=de_DE.UTF-8;@dots{}
+@end smallexample
+
+followed by other category settings not shown here.
+
+@vindex LOCPATH
+The path used for finding locale data can be set using the
+@code{LOCPATH} environment variable.  This variable lists the
+directories in which to search for locale definitions, separated by a
+colon @samp{:}.
+
+The default path for finding locale data is system specific.  A typical
+value for the @code{LOCPATH} default is:
+
+@smallexample
+/usr/share/locale
+@end smallexample
+
+The value of @code{LOCPATH} is ignored by privileged programs for
+security reasons, and only the default directory is used.
+
+@node Locale Information, Formatting Numbers, Locale Names, Locales
 @section Accessing Locale Information
 
 There are several ways to access locale information.  The simplest
openSUSE Build Service is sponsored by