gtk2/gtk/fnmatch.c

320 lines
7.3 KiB
C
Raw Normal View History

1997-11-24 22:37:52 +00:00
/* Copyright (C) 1991, 1992, 1993 Free Software Foundation, Inc.
*
* This library is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2 of the License, or (at your option) any later version.
*
* This library is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
2012-02-27 13:01:10 +00:00
* License along with this library. If not, see <http://www.gnu.org/licenses/>.
*/
1997-11-24 22:37:52 +00:00
/*
* Modified by the GTK+ Team and others 1997-2000. See the AUTHORS
* file for a list of people on the GTK+ Team. See the ChangeLog
* files for a list of changes. These files are distributed with
* GTK+ at ftp://ftp.gtk.org/pub/gtk/.
*/
/*
* Stripped down, converted to UTF-8 and test cases added
*
* Owen Taylor, 13 December 2002;
*/
1997-11-24 22:37:52 +00:00
#include "config.h"
#include "gtkprivate.h"
#include <string.h>
#include <glib.h>
static gunichar
get_char (const char **str,
gboolean casefold)
{
gunichar c = g_utf8_get_char (*str);
*str = g_utf8_next_char (*str);
1997-11-24 22:37:52 +00:00
if (casefold)
c = g_unichar_tolower (c);
1997-11-24 22:37:52 +00:00
return c;
}
1997-11-24 22:37:52 +00:00
#if defined(G_OS_WIN32) || defined(G_WITH_CYGWIN)
#define DO_ESCAPE 0
#else
#define DO_ESCAPE 1
#endif
1997-11-24 22:37:52 +00:00
static gunichar
get_unescaped_char (const char **str,
gboolean *was_escaped,
gboolean casefold)
{
gunichar c = get_char (str, casefold);
*was_escaped = DO_ESCAPE && c == '\\';
if (*was_escaped)
c = get_char (str, casefold);
return c;
}
1997-11-24 22:37:52 +00:00
/* Match STRING against the filename pattern PATTERN, returning zero if
it matches, nonzero if not. */
static gboolean
gtk_fnmatch_intern (const char *pattern,
const char *string,
gboolean component_start,
gboolean no_leading_period,
gboolean casefold)
{
const char *p = pattern, *n = string;
while (*p)
1997-11-24 22:37:52 +00:00
{
const char *last_n = n;
gunichar c = get_char (&p, casefold);
gunichar nc = get_char (&n, casefold);
1997-11-24 22:37:52 +00:00
switch (c)
{
case '?':
if (nc == '\0')
return FALSE;
else if (nc == G_DIR_SEPARATOR)
return FALSE;
else if (nc == '.' && component_start && no_leading_period)
return FALSE;
1997-11-24 22:37:52 +00:00
break;
case '\\':
if (DO_ESCAPE)
c = get_char (&p, casefold);
if (nc != c)
return FALSE;
1997-11-24 22:37:52 +00:00
break;
case '*':
if (nc == '.' && component_start && no_leading_period)
return FALSE;
{
2021-05-03 11:33:49 +00:00
const char *last_p;
for (last_p = p, c = get_char (&p, casefold);
c == '?' || c == '*';
last_p = p, c = get_char (&p, casefold))
{
if (c == '?')
{
if (nc == '\0')
return FALSE;
else if (nc == G_DIR_SEPARATOR)
return FALSE;
else
{
last_n = n; nc = get_char (&n, casefold);
}
}
}
1997-11-24 22:37:52 +00:00
/* If the pattern ends with wildcards, we have a
* guaranteed match unless there is a dir separator
* in the remainder of the string.
*/
if (c == '\0')
{
if (strchr (last_n, G_DIR_SEPARATOR) != NULL)
return FALSE;
else
return TRUE;
}
1997-11-24 22:37:52 +00:00
if (DO_ESCAPE && c == '\\')
c = get_char (&p, casefold);
1997-11-24 22:37:52 +00:00
for (p = last_p; nc != '\0';)
{
if ((c == '[' || nc == c) &&
gtk_fnmatch_intern (p, last_n, component_start, no_leading_period, casefold))
return TRUE;
component_start = (nc == G_DIR_SEPARATOR);
last_n = n;
nc = get_char (&n, casefold);
}
return FALSE;
1997-11-24 22:37:52 +00:00
}
case '[':
{
/* Nonzero if the sense of the character class is inverted. */
gboolean not;
gboolean was_escaped;
1997-11-24 22:37:52 +00:00
if (nc == '\0' || nc == G_DIR_SEPARATOR)
return FALSE;
1997-11-24 22:37:52 +00:00
if (nc == '.' && component_start && no_leading_period)
return FALSE;
1997-11-24 22:37:52 +00:00
not = (*p == '!' || *p == '^');
if (not)
++p;
c = get_unescaped_char (&p, &was_escaped, casefold);
1997-11-24 22:37:52 +00:00
for (;;)
{
register gunichar cstart = c, cend = c;
1997-11-24 22:37:52 +00:00
if (c == '\0')
/* [ (unterminated) loses. */
return FALSE;
1997-11-24 22:37:52 +00:00
c = get_unescaped_char (&p, &was_escaped, casefold);
if (!was_escaped && c == '-' && *p != ']')
1997-11-24 22:37:52 +00:00
{
cend = get_unescaped_char (&p, &was_escaped, casefold);
1997-11-24 22:37:52 +00:00
if (cend == '\0')
return FALSE;
1997-11-24 22:37:52 +00:00
c = get_char (&p, casefold);
1997-11-24 22:37:52 +00:00
}
if (nc >= cstart && nc <= cend)
1997-11-24 22:37:52 +00:00
goto matched;
if (!was_escaped && c == ']')
1997-11-24 22:37:52 +00:00
break;
}
if (!not)
return FALSE;
1997-11-24 22:37:52 +00:00
break;
matched:;
/* Skip the rest of the [...] that already matched. */
/* XXX 1003.2d11 is unclear if was_escaped is right. */
while (was_escaped || c != ']')
1997-11-24 22:37:52 +00:00
{
if (c == '\0')
/* [... (unterminated) loses. */
return FALSE;
1997-11-24 22:37:52 +00:00
c = get_unescaped_char (&p, &was_escaped, casefold);
1997-11-24 22:37:52 +00:00
}
if (not)
return FALSE;
1997-11-24 22:37:52 +00:00
}
break;
default:
if (c != nc)
return FALSE;
1997-11-24 22:37:52 +00:00
}
component_start = (nc == G_DIR_SEPARATOR);
1997-11-24 22:37:52 +00:00
}
if (*n == '\0')
return TRUE;
return FALSE;
}
/* Match STRING against the filename pattern PATTERN, returning zero if
* it matches, nonzero if not.
*
* GTK+ used to use a old version of GNU fnmatch() that was buggy
2014-02-07 18:32:47 +00:00
* in various ways and didnt handle UTF-8. The following is
* converted to UTF-8. To simplify the process of making it
* correct, this is special-cased to the combinations of flags
* that gtkfilesel.c uses.
*
* FNM_FILE_NAME - always set
* FNM_LEADING_DIR - never set
* FNM_NOESCAPE - set only on windows
* FNM_CASEFOLD - set only on windows
*/
gboolean
_gtk_fnmatch (const char *pattern,
const char *string,
gboolean no_leading_period,
gboolean casefold)
{
return gtk_fnmatch_intern (pattern, string, TRUE, no_leading_period, casefold);
}
/* Turn a glob pattern into a case-insensitive one, by replacing
* alphabetic characters by [xX] ranges.
*/
char *
_gtk_make_ci_glob_pattern (const char *pattern)
{
GString *s;
gboolean in_range = FALSE;
s = g_string_new ("");
for (const char *p = pattern; *p; p = g_utf8_next_char (p))
{
gunichar c = g_utf8_get_char (p);
if (in_range)
{
g_string_append_unichar (s, c);
if (c == ']')
in_range = FALSE;
continue;
}
#if DO_ESCAPE
if (c == '\\')
{
g_string_append (s, "\\");
p = g_utf8_next_char (p);
if (*p == '\0')
break;
c = g_utf8_get_char (p);
g_string_append_unichar (s, c);
continue;
}
#endif
if (c == '[')
{
g_string_append (s, "[");
p = g_utf8_next_char (p);
if (*p == '\0')
break;
c = g_utf8_get_char (p);
g_string_append_unichar (s, c);
in_range = TRUE;
continue;
}
else if (g_unichar_isalpha (c))
{
g_string_append (s, "[");
g_string_append_unichar (s, g_unichar_tolower (c));
g_string_append_unichar (s, g_unichar_toupper (c));
g_string_append (s, "]");
}
else
{
g_string_append_unichar (s, c);
}
}
return g_string_free (s, FALSE);
}