gtk2/gtk/css/gtkcsstokenizer.c
Timm Bäder 42fedfc870 csstokenizer: Keep a GString for name parsing around
We only every parse one of those at a time, so keep a GString around for
it. This way we don't have to create a new GString every time we parse
an identifier. Doesn't spare us the strdup though.
2021-01-05 08:53:13 +01:00

1483 lines
38 KiB
C

/* GSK - The GIMP Toolkit
* Copyright (C) 2011 Benjamin Otte <otte@gnome.org>
*
* This library is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2 of the License, or (at your option) any later version.
*
* This library is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with this library. If not, see <http://www.gnu.org/licenses/>.
*/
#include "config.h"
#include "gtkcsstokenizerprivate.h"
#include "gtkcssenums.h"
#include "gtkcsserror.h"
#include "gtkcsslocationprivate.h"
#include <math.h>
#include <string.h>
struct _GtkCssTokenizer
{
int ref_count;
GBytes *bytes;
GString *name_buffer;
const char *data;
const char *end;
GtkCssLocation position;
};
void
gtk_css_token_clear (GtkCssToken *token)
{
switch (token->type)
{
case GTK_CSS_TOKEN_STRING:
case GTK_CSS_TOKEN_IDENT:
case GTK_CSS_TOKEN_FUNCTION:
case GTK_CSS_TOKEN_AT_KEYWORD:
case GTK_CSS_TOKEN_HASH_UNRESTRICTED:
case GTK_CSS_TOKEN_HASH_ID:
case GTK_CSS_TOKEN_URL:
g_free (token->string.string);
break;
case GTK_CSS_TOKEN_SIGNED_INTEGER_DIMENSION:
case GTK_CSS_TOKEN_SIGNLESS_INTEGER_DIMENSION:
case GTK_CSS_TOKEN_DIMENSION:
g_free (token->dimension.dimension);
break;
default:
g_assert_not_reached ();
case GTK_CSS_TOKEN_EOF:
case GTK_CSS_TOKEN_WHITESPACE:
case GTK_CSS_TOKEN_OPEN_PARENS:
case GTK_CSS_TOKEN_CLOSE_PARENS:
case GTK_CSS_TOKEN_OPEN_SQUARE:
case GTK_CSS_TOKEN_CLOSE_SQUARE:
case GTK_CSS_TOKEN_OPEN_CURLY:
case GTK_CSS_TOKEN_CLOSE_CURLY:
case GTK_CSS_TOKEN_COMMA:
case GTK_CSS_TOKEN_COLON:
case GTK_CSS_TOKEN_SEMICOLON:
case GTK_CSS_TOKEN_CDC:
case GTK_CSS_TOKEN_CDO:
case GTK_CSS_TOKEN_DELIM:
case GTK_CSS_TOKEN_SIGNED_INTEGER:
case GTK_CSS_TOKEN_SIGNLESS_INTEGER:
case GTK_CSS_TOKEN_SIGNED_NUMBER:
case GTK_CSS_TOKEN_SIGNLESS_NUMBER:
case GTK_CSS_TOKEN_PERCENTAGE:
case GTK_CSS_TOKEN_INCLUDE_MATCH:
case GTK_CSS_TOKEN_DASH_MATCH:
case GTK_CSS_TOKEN_PREFIX_MATCH:
case GTK_CSS_TOKEN_SUFFIX_MATCH:
case GTK_CSS_TOKEN_SUBSTRING_MATCH:
case GTK_CSS_TOKEN_COLUMN:
case GTK_CSS_TOKEN_BAD_STRING:
case GTK_CSS_TOKEN_BAD_URL:
case GTK_CSS_TOKEN_COMMENT:
break;
}
token->type = GTK_CSS_TOKEN_EOF;
}
static void
gtk_css_token_init (GtkCssToken *token,
GtkCssTokenType type)
{
token->type = type;
switch ((guint)type)
{
case GTK_CSS_TOKEN_EOF:
case GTK_CSS_TOKEN_WHITESPACE:
case GTK_CSS_TOKEN_OPEN_PARENS:
case GTK_CSS_TOKEN_CLOSE_PARENS:
case GTK_CSS_TOKEN_OPEN_SQUARE:
case GTK_CSS_TOKEN_CLOSE_SQUARE:
case GTK_CSS_TOKEN_OPEN_CURLY:
case GTK_CSS_TOKEN_CLOSE_CURLY:
case GTK_CSS_TOKEN_COMMA:
case GTK_CSS_TOKEN_COLON:
case GTK_CSS_TOKEN_SEMICOLON:
case GTK_CSS_TOKEN_CDC:
case GTK_CSS_TOKEN_CDO:
case GTK_CSS_TOKEN_INCLUDE_MATCH:
case GTK_CSS_TOKEN_DASH_MATCH:
case GTK_CSS_TOKEN_PREFIX_MATCH:
case GTK_CSS_TOKEN_SUFFIX_MATCH:
case GTK_CSS_TOKEN_SUBSTRING_MATCH:
case GTK_CSS_TOKEN_COLUMN:
case GTK_CSS_TOKEN_BAD_STRING:
case GTK_CSS_TOKEN_BAD_URL:
case GTK_CSS_TOKEN_COMMENT:
break;
default:
g_assert_not_reached ();
}
}
static void
append_ident (GString *string,
const char *ident)
{
/* XXX */
g_string_append (string, ident);
}
static void
append_string (GString *string,
const char *s)
{
g_string_append_c (string, '"');
/* XXX */
g_string_append (string, s);
g_string_append_c (string, '"');
}
/*
* gtk_css_token_is_finite:
* @token: a #GtkCssToken
*
* A token is considered finite when it would stay the same no matter
* what bytes follow it in the data stream.
*
* An obvious example for this is the ';' token.
*
* Returns: %TRUE if the token is considered finite.
**/
gboolean
gtk_css_token_is_finite (const GtkCssToken *token)
{
switch (token->type)
{
case GTK_CSS_TOKEN_EOF:
case GTK_CSS_TOKEN_STRING:
case GTK_CSS_TOKEN_FUNCTION:
case GTK_CSS_TOKEN_URL:
case GTK_CSS_TOKEN_PERCENTAGE:
case GTK_CSS_TOKEN_OPEN_PARENS:
case GTK_CSS_TOKEN_CLOSE_PARENS:
case GTK_CSS_TOKEN_OPEN_SQUARE:
case GTK_CSS_TOKEN_CLOSE_SQUARE:
case GTK_CSS_TOKEN_OPEN_CURLY:
case GTK_CSS_TOKEN_CLOSE_CURLY:
case GTK_CSS_TOKEN_COMMA:
case GTK_CSS_TOKEN_COLON:
case GTK_CSS_TOKEN_SEMICOLON:
case GTK_CSS_TOKEN_CDC:
case GTK_CSS_TOKEN_CDO:
case GTK_CSS_TOKEN_INCLUDE_MATCH:
case GTK_CSS_TOKEN_DASH_MATCH:
case GTK_CSS_TOKEN_PREFIX_MATCH:
case GTK_CSS_TOKEN_SUFFIX_MATCH:
case GTK_CSS_TOKEN_SUBSTRING_MATCH:
case GTK_CSS_TOKEN_COLUMN:
case GTK_CSS_TOKEN_COMMENT:
return TRUE;
default:
g_assert_not_reached ();
case GTK_CSS_TOKEN_WHITESPACE:
case GTK_CSS_TOKEN_IDENT:
case GTK_CSS_TOKEN_AT_KEYWORD:
case GTK_CSS_TOKEN_HASH_UNRESTRICTED:
case GTK_CSS_TOKEN_HASH_ID:
case GTK_CSS_TOKEN_DELIM:
case GTK_CSS_TOKEN_SIGNED_INTEGER:
case GTK_CSS_TOKEN_SIGNLESS_INTEGER:
case GTK_CSS_TOKEN_SIGNED_NUMBER:
case GTK_CSS_TOKEN_SIGNLESS_NUMBER:
case GTK_CSS_TOKEN_BAD_STRING:
case GTK_CSS_TOKEN_BAD_URL:
case GTK_CSS_TOKEN_SIGNED_INTEGER_DIMENSION:
case GTK_CSS_TOKEN_SIGNLESS_INTEGER_DIMENSION:
case GTK_CSS_TOKEN_DIMENSION:
return FALSE;
}
}
/*
* gtk_css_token_is_preserved:
* @token: a #GtkCssToken
* @out_closing: (allow-none): Type of the token that closes a block
* started with this token
*
* A token is considered preserved when it does not start a block.
*
* Tokens that start a block require different error recovery when parsing,
* so CSS parsers want to look at this function
*
* Returns: %TRUE if the token is considered preserved.
**/
gboolean
gtk_css_token_is_preserved (const GtkCssToken *token,
GtkCssTokenType *out_closing)
{
switch (token->type)
{
case GTK_CSS_TOKEN_FUNCTION:
case GTK_CSS_TOKEN_OPEN_PARENS:
if (out_closing)
*out_closing = GTK_CSS_TOKEN_CLOSE_PARENS;
return FALSE;
case GTK_CSS_TOKEN_OPEN_SQUARE:
if (out_closing)
*out_closing = GTK_CSS_TOKEN_CLOSE_SQUARE;
return FALSE;
case GTK_CSS_TOKEN_OPEN_CURLY:
if (out_closing)
*out_closing = GTK_CSS_TOKEN_CLOSE_CURLY;
return FALSE;
default:
g_assert_not_reached ();
case GTK_CSS_TOKEN_EOF:
case GTK_CSS_TOKEN_WHITESPACE:
case GTK_CSS_TOKEN_STRING:
case GTK_CSS_TOKEN_URL:
case GTK_CSS_TOKEN_PERCENTAGE:
case GTK_CSS_TOKEN_CLOSE_PARENS:
case GTK_CSS_TOKEN_CLOSE_SQUARE:
case GTK_CSS_TOKEN_CLOSE_CURLY:
case GTK_CSS_TOKEN_COMMA:
case GTK_CSS_TOKEN_COLON:
case GTK_CSS_TOKEN_SEMICOLON:
case GTK_CSS_TOKEN_CDC:
case GTK_CSS_TOKEN_CDO:
case GTK_CSS_TOKEN_INCLUDE_MATCH:
case GTK_CSS_TOKEN_DASH_MATCH:
case GTK_CSS_TOKEN_PREFIX_MATCH:
case GTK_CSS_TOKEN_SUFFIX_MATCH:
case GTK_CSS_TOKEN_SUBSTRING_MATCH:
case GTK_CSS_TOKEN_COLUMN:
case GTK_CSS_TOKEN_COMMENT:
case GTK_CSS_TOKEN_IDENT:
case GTK_CSS_TOKEN_AT_KEYWORD:
case GTK_CSS_TOKEN_HASH_UNRESTRICTED:
case GTK_CSS_TOKEN_HASH_ID:
case GTK_CSS_TOKEN_DELIM:
case GTK_CSS_TOKEN_SIGNED_INTEGER:
case GTK_CSS_TOKEN_SIGNLESS_INTEGER:
case GTK_CSS_TOKEN_SIGNED_NUMBER:
case GTK_CSS_TOKEN_SIGNLESS_NUMBER:
case GTK_CSS_TOKEN_BAD_STRING:
case GTK_CSS_TOKEN_BAD_URL:
case GTK_CSS_TOKEN_SIGNED_INTEGER_DIMENSION:
case GTK_CSS_TOKEN_SIGNLESS_INTEGER_DIMENSION:
case GTK_CSS_TOKEN_DIMENSION:
if (out_closing)
*out_closing = GTK_CSS_TOKEN_EOF;
return TRUE;
}
}
gboolean
gtk_css_token_is_ident (const GtkCssToken *token,
const char *ident)
{
return gtk_css_token_is (token, GTK_CSS_TOKEN_IDENT)
&& (g_ascii_strcasecmp (token->string.string, ident) == 0);
}
gboolean
gtk_css_token_is_function (const GtkCssToken *token,
const char *ident)
{
return gtk_css_token_is (token, GTK_CSS_TOKEN_FUNCTION)
&& (g_ascii_strcasecmp (token->string.string, ident) == 0);
}
gboolean
gtk_css_token_is_delim (const GtkCssToken *token,
gunichar delim)
{
return gtk_css_token_is (token, GTK_CSS_TOKEN_DELIM)
&& token->delim.delim == delim;
}
void
gtk_css_token_print (const GtkCssToken *token,
GString *string)
{
char buf[G_ASCII_DTOSTR_BUF_SIZE];
switch (token->type)
{
case GTK_CSS_TOKEN_STRING:
append_string (string, token->string.string);
break;
case GTK_CSS_TOKEN_IDENT:
append_ident (string, token->string.string);
break;
case GTK_CSS_TOKEN_URL:
g_string_append (string, "url(");
append_ident (string, token->string.string);
g_string_append (string, ")");
break;
case GTK_CSS_TOKEN_FUNCTION:
append_ident (string, token->string.string);
g_string_append_c (string, '(');
break;
case GTK_CSS_TOKEN_AT_KEYWORD:
g_string_append_c (string, '@');
append_ident (string, token->string.string);
break;
case GTK_CSS_TOKEN_HASH_UNRESTRICTED:
case GTK_CSS_TOKEN_HASH_ID:
g_string_append_c (string, '#');
append_ident (string, token->string.string);
break;
case GTK_CSS_TOKEN_DELIM:
g_string_append_unichar (string, token->delim.delim);
break;
case GTK_CSS_TOKEN_SIGNED_INTEGER:
case GTK_CSS_TOKEN_SIGNED_NUMBER:
if (token->number.number >= 0)
g_string_append_c (string, '+');
G_GNUC_FALLTHROUGH;
case GTK_CSS_TOKEN_SIGNLESS_INTEGER:
case GTK_CSS_TOKEN_SIGNLESS_NUMBER:
g_ascii_dtostr (buf, G_ASCII_DTOSTR_BUF_SIZE, token->number.number);
g_string_append (string, buf);
break;
case GTK_CSS_TOKEN_PERCENTAGE:
g_ascii_dtostr (buf, G_ASCII_DTOSTR_BUF_SIZE, token->number.number);
g_string_append (string, buf);
g_string_append_c (string, '%');
break;
case GTK_CSS_TOKEN_SIGNED_INTEGER_DIMENSION:
if (token->dimension.value >= 0)
g_string_append_c (string, '+');
G_GNUC_FALLTHROUGH;
case GTK_CSS_TOKEN_SIGNLESS_INTEGER_DIMENSION:
case GTK_CSS_TOKEN_DIMENSION:
g_ascii_dtostr (buf, G_ASCII_DTOSTR_BUF_SIZE, token->dimension.value);
g_string_append (string, buf);
append_ident (string, token->dimension.dimension);
break;
case GTK_CSS_TOKEN_EOF:
break;
case GTK_CSS_TOKEN_WHITESPACE:
g_string_append (string, " ");
break;
case GTK_CSS_TOKEN_OPEN_PARENS:
g_string_append (string, "(");
break;
case GTK_CSS_TOKEN_CLOSE_PARENS:
g_string_append (string, ")");
break;
case GTK_CSS_TOKEN_OPEN_SQUARE:
g_string_append (string, "[");
break;
case GTK_CSS_TOKEN_CLOSE_SQUARE:
g_string_append (string, "]");
break;
case GTK_CSS_TOKEN_OPEN_CURLY:
g_string_append (string, "{");
break;
case GTK_CSS_TOKEN_CLOSE_CURLY:
g_string_append (string, "}");
break;
case GTK_CSS_TOKEN_COMMA:
g_string_append (string, ",");
break;
case GTK_CSS_TOKEN_COLON:
g_string_append (string, ":");
break;
case GTK_CSS_TOKEN_SEMICOLON:
g_string_append (string, ";");
break;
case GTK_CSS_TOKEN_CDO:
g_string_append (string, "<!--");
break;
case GTK_CSS_TOKEN_CDC:
g_string_append (string, "-->");
break;
case GTK_CSS_TOKEN_INCLUDE_MATCH:
g_string_append (string, "~=");
break;
case GTK_CSS_TOKEN_DASH_MATCH:
g_string_append (string, "|=");
break;
case GTK_CSS_TOKEN_PREFIX_MATCH:
g_string_append (string, "^=");
break;
case GTK_CSS_TOKEN_SUFFIX_MATCH:
g_string_append (string, "$=");
break;
case GTK_CSS_TOKEN_SUBSTRING_MATCH:
g_string_append (string, "*=");
break;
case GTK_CSS_TOKEN_COLUMN:
g_string_append (string, "||");
break;
case GTK_CSS_TOKEN_BAD_STRING:
g_string_append (string, "\"\n");
break;
case GTK_CSS_TOKEN_BAD_URL:
g_string_append (string, "url(bad url)");
break;
case GTK_CSS_TOKEN_COMMENT:
g_string_append (string, "/* comment */");
break;
default:
g_assert_not_reached ();
break;
}
}
char *
gtk_css_token_to_string (const GtkCssToken *token)
{
GString *string;
string = g_string_new (NULL);
gtk_css_token_print (token, string);
return g_string_free (string, FALSE);
}
static void
gtk_css_token_init_string (GtkCssToken *token,
GtkCssTokenType type,
char *string)
{
token->type = type;
switch ((guint)type)
{
case GTK_CSS_TOKEN_STRING:
case GTK_CSS_TOKEN_IDENT:
case GTK_CSS_TOKEN_FUNCTION:
case GTK_CSS_TOKEN_AT_KEYWORD:
case GTK_CSS_TOKEN_HASH_UNRESTRICTED:
case GTK_CSS_TOKEN_HASH_ID:
case GTK_CSS_TOKEN_URL:
token->string.string = string;
break;
default:
g_assert_not_reached ();
}
}
static void
gtk_css_token_init_delim (GtkCssToken *token,
gunichar delim)
{
token->type = GTK_CSS_TOKEN_DELIM;
token->delim.delim = delim;
}
static void
gtk_css_token_init_number (GtkCssToken *token,
GtkCssTokenType type,
double value)
{
token->type = type;
switch ((guint)type)
{
case GTK_CSS_TOKEN_SIGNED_INTEGER:
case GTK_CSS_TOKEN_SIGNLESS_INTEGER:
case GTK_CSS_TOKEN_SIGNED_NUMBER:
case GTK_CSS_TOKEN_SIGNLESS_NUMBER:
case GTK_CSS_TOKEN_PERCENTAGE:
token->number.number = value;
break;
default:
g_assert_not_reached ();
}
}
static void
gtk_css_token_init_dimension (GtkCssToken *token,
GtkCssTokenType type,
double value,
char *dimension)
{
token->type = type;
switch ((guint)type)
{
case GTK_CSS_TOKEN_SIGNED_INTEGER_DIMENSION:
case GTK_CSS_TOKEN_SIGNLESS_INTEGER_DIMENSION:
case GTK_CSS_TOKEN_DIMENSION:
token->dimension.value = value;
token->dimension.dimension = dimension;
break;
default:
g_assert_not_reached ();
}
}
GtkCssTokenizer *
gtk_css_tokenizer_new (GBytes *bytes)
{
GtkCssTokenizer *tokenizer;
tokenizer = g_slice_new0 (GtkCssTokenizer);
tokenizer->ref_count = 1;
tokenizer->bytes = g_bytes_ref (bytes);
tokenizer->name_buffer = g_string_new (NULL);
tokenizer->data = g_bytes_get_data (bytes, NULL);
tokenizer->end = tokenizer->data + g_bytes_get_size (bytes);
gtk_css_location_init (&tokenizer->position);
return tokenizer;
}
GtkCssTokenizer *
gtk_css_tokenizer_ref (GtkCssTokenizer *tokenizer)
{
tokenizer->ref_count++;
return tokenizer;
}
void
gtk_css_tokenizer_unref (GtkCssTokenizer *tokenizer)
{
tokenizer->ref_count--;
if (tokenizer->ref_count > 0)
return;
g_string_free (tokenizer->name_buffer, TRUE);
g_bytes_unref (tokenizer->bytes);
g_slice_free (GtkCssTokenizer, tokenizer);
}
const GtkCssLocation *
gtk_css_tokenizer_get_location (GtkCssTokenizer *tokenizer)
{
return &tokenizer->position;
}
static void G_GNUC_PRINTF(2, 3)
gtk_css_tokenizer_parse_error (GError **error,
const char *format,
...)
{
va_list args;
va_start (args, format);
if (error)
{
*error = g_error_new_valist (GTK_CSS_PARSER_ERROR,
GTK_CSS_PARSER_ERROR_SYNTAX,
format, args);
}
else
{
char *s = g_strdup_vprintf (format, args);
g_print ("error: %s\n", s);
g_free (s);
}
va_end (args);
}
static gboolean
is_newline (char c)
{
return c == '\n'
|| c == '\r'
|| c == '\f';
}
static gboolean
is_whitespace (char c)
{
return is_newline (c)
|| c == '\t'
|| c == ' ';
}
static gboolean
is_multibyte (char c)
{
return c & 0x80;
}
static gboolean
is_name_start (char c)
{
return is_multibyte (c)
|| g_ascii_isalpha (c)
|| c == '_';
}
static gboolean
is_name (char c)
{
return is_name_start (c)
|| g_ascii_isdigit (c)
|| c == '-';
}
static gboolean
is_non_printable (char c)
{
return (c >= 0 && c <= 0x08)
|| c == 0x0B
|| c == 0x0E
|| c == 0x1F
|| c == 0x7F;
}
static gboolean
is_valid_escape (const char *data,
const char *end)
{
switch (end - data)
{
default:
if (is_newline (data[1]))
return FALSE;
G_GNUC_FALLTHROUGH;
case 1:
return data[0] == '\\';
case 0:
return FALSE;
}
}
static inline gsize
gtk_css_tokenizer_remaining (GtkCssTokenizer *tokenizer)
{
return tokenizer->end - tokenizer->data;
}
static gboolean
gtk_css_tokenizer_has_valid_escape (GtkCssTokenizer *tokenizer)
{
return is_valid_escape (tokenizer->data, tokenizer->end);
}
static gboolean
gtk_css_tokenizer_has_identifier (GtkCssTokenizer *tokenizer)
{
const char *data = tokenizer->data;
if (data == tokenizer->end)
return FALSE;
if (*data == '-')
{
data++;
if (data == tokenizer->end)
return FALSE;
if (*data == '-')
return TRUE;
}
if (is_name_start (*data))
return TRUE;
if (*data == '\\')
{
data++;
if (data == tokenizer->end)
return TRUE; /* really? */
if (is_newline (*data))
return FALSE;
return TRUE;
}
return FALSE;
}
static gboolean
gtk_css_tokenizer_has_number (GtkCssTokenizer *tokenizer)
{
const char *data = tokenizer->data;
if (data == tokenizer->end)
return FALSE;
if (*data == '-' || *data == '+')
{
data++;
if (data == tokenizer->end)
return FALSE;
}
if (*data == '.')
{
data++;
if (data == tokenizer->end)
return FALSE;
}
return g_ascii_isdigit (*data);
}
static void
gtk_css_tokenizer_consume_newline (GtkCssTokenizer *tokenizer)
{
gsize n;
if (gtk_css_tokenizer_remaining (tokenizer) > 1 &&
tokenizer->data[0] == '\r' && tokenizer->data[1] == '\n')
n = 2;
else
n = 1;
tokenizer->data += n;
gtk_css_location_advance_newline (&tokenizer->position, n == 2 ? TRUE : FALSE);
}
static inline void
gtk_css_tokenizer_consume (GtkCssTokenizer *tokenizer,
gsize n_bytes,
gsize n_characters)
{
/* NB: must not contain newlines! */
tokenizer->data += n_bytes;
gtk_css_location_advance (&tokenizer->position, n_bytes, n_characters);
}
static inline void
gtk_css_tokenizer_consume_ascii (GtkCssTokenizer *tokenizer)
{
/* NB: must not contain newlines! */
gtk_css_tokenizer_consume (tokenizer, 1, 1);
}
static inline void
gtk_css_tokenizer_consume_whitespace (GtkCssTokenizer *tokenizer)
{
if (is_newline (*tokenizer->data))
gtk_css_tokenizer_consume_newline (tokenizer);
else
gtk_css_tokenizer_consume_ascii (tokenizer);
}
static inline void
gtk_css_tokenizer_consume_char (GtkCssTokenizer *tokenizer,
GString *string)
{
if (is_newline (*tokenizer->data))
gtk_css_tokenizer_consume_newline (tokenizer);
else
{
gsize char_size = g_utf8_next_char (tokenizer->data) - tokenizer->data;
if (string)
g_string_append_len (string, tokenizer->data, char_size);
gtk_css_tokenizer_consume (tokenizer, char_size, 1);
}
}
static void
gtk_css_tokenizer_read_whitespace (GtkCssTokenizer *tokenizer,
GtkCssToken *token)
{
do {
gtk_css_tokenizer_consume_whitespace (tokenizer);
} while (tokenizer->data != tokenizer->end &&
is_whitespace (*tokenizer->data));
gtk_css_token_init (token, GTK_CSS_TOKEN_WHITESPACE);
}
static gunichar
gtk_css_tokenizer_read_escape (GtkCssTokenizer *tokenizer)
{
gunichar value = 0;
guint i;
gtk_css_tokenizer_consume (tokenizer, 1, 1);
for (i = 0; i < 6 && tokenizer->data < tokenizer->end && g_ascii_isxdigit (*tokenizer->data); i++)
{
value = value * 16 + g_ascii_xdigit_value (*tokenizer->data);
gtk_css_tokenizer_consume (tokenizer, 1, 1);
}
if (i == 0)
{
gsize remaining = gtk_css_tokenizer_remaining (tokenizer);
if (remaining == 0)
return 0xFFFD;
value = g_utf8_get_char_validated (tokenizer->data, remaining);
if (value == (gunichar) -1 || value == (gunichar) -2)
value = 0;
gtk_css_tokenizer_consume_char (tokenizer, NULL);
}
else
{
if (is_whitespace (*tokenizer->data))
gtk_css_tokenizer_consume_ascii (tokenizer);
}
if (!g_unichar_validate (value) || g_unichar_type (value) == G_UNICODE_SURROGATE)
return 0xFFFD;
return value;
}
static char *
gtk_css_tokenizer_read_name (GtkCssTokenizer *tokenizer)
{
g_string_set_size (tokenizer->name_buffer, 0);
do {
if (*tokenizer->data == '\\')
{
if (gtk_css_tokenizer_has_valid_escape (tokenizer))
{
gunichar value = gtk_css_tokenizer_read_escape (tokenizer);
g_string_append_unichar (tokenizer->name_buffer, value);
}
else
{
gtk_css_tokenizer_consume_ascii (tokenizer);
if (tokenizer->data == tokenizer->end)
{
g_string_append_unichar (tokenizer->name_buffer, 0xFFFD);
break;
}
gtk_css_tokenizer_consume_char (tokenizer, tokenizer->name_buffer);
}
}
else if (is_name (*tokenizer->data))
{
gtk_css_tokenizer_consume_char (tokenizer, tokenizer->name_buffer);
}
else
{
break;
}
}
while (tokenizer->data != tokenizer->end);
return g_strndup (tokenizer->name_buffer->str, tokenizer->name_buffer->len);
}
static void
gtk_css_tokenizer_read_bad_url (GtkCssTokenizer *tokenizer,
GtkCssToken *token)
{
while (tokenizer->data < tokenizer->end && *tokenizer->data != ')')
{
if (gtk_css_tokenizer_has_valid_escape (tokenizer))
gtk_css_tokenizer_read_escape (tokenizer);
else
gtk_css_tokenizer_consume_char (tokenizer, NULL);
}
if (tokenizer->data < tokenizer->end)
gtk_css_tokenizer_consume_ascii (tokenizer);
gtk_css_token_init (token, GTK_CSS_TOKEN_BAD_URL);
}
static gboolean
gtk_css_tokenizer_read_url (GtkCssTokenizer *tokenizer,
GtkCssToken *token,
GError **error)
{
GString *url = g_string_new (NULL);
while (tokenizer->data < tokenizer->end && is_whitespace (*tokenizer->data))
gtk_css_tokenizer_consume_whitespace (tokenizer);
while (tokenizer->data < tokenizer->end)
{
if (*tokenizer->data == ')')
{
gtk_css_tokenizer_consume_ascii (tokenizer);
break;
}
else if (is_whitespace (*tokenizer->data))
{
do
gtk_css_tokenizer_consume_whitespace (tokenizer);
while (tokenizer->data < tokenizer->end && is_whitespace (*tokenizer->data));
if (*tokenizer->data == ')')
{
gtk_css_tokenizer_consume_ascii (tokenizer);
break;
}
else if (tokenizer->data >= tokenizer->end)
{
break;
}
else
{
gtk_css_tokenizer_read_bad_url (tokenizer, token);
gtk_css_tokenizer_parse_error (error, "Whitespace only allowed at start and end of url");
return FALSE;
}
}
else if (is_non_printable (*tokenizer->data))
{
gtk_css_tokenizer_read_bad_url (tokenizer, token);
g_string_free (url, TRUE);
gtk_css_tokenizer_parse_error (error, "Nonprintable character 0x%02X in url", *tokenizer->data);
return FALSE;
}
else if (*tokenizer->data == '"' ||
*tokenizer->data == '\'' ||
*tokenizer->data == '(')
{
gtk_css_tokenizer_read_bad_url (tokenizer, token);
gtk_css_tokenizer_parse_error (error, "Invalid character %c in url", *tokenizer->data);
g_string_free (url, TRUE);
return FALSE;
}
else if (gtk_css_tokenizer_has_valid_escape (tokenizer))
{
g_string_append_unichar (url, gtk_css_tokenizer_read_escape (tokenizer));
}
else if (*tokenizer->data == '\\')
{
gtk_css_tokenizer_read_bad_url (tokenizer, token);
gtk_css_tokenizer_parse_error (error, "Newline may not follow '\' escape character");
g_string_free (url, TRUE);
return FALSE;
}
else
{
gtk_css_tokenizer_consume_char (tokenizer, url);
}
}
gtk_css_token_init_string (token, GTK_CSS_TOKEN_URL, g_string_free (url, FALSE));
return TRUE;
}
static gboolean
gtk_css_tokenizer_read_ident_like (GtkCssTokenizer *tokenizer,
GtkCssToken *token,
GError **error)
{
char *name = gtk_css_tokenizer_read_name (tokenizer);
if (*tokenizer->data == '(')
{
gtk_css_tokenizer_consume_ascii (tokenizer);
if (g_ascii_strcasecmp (name, "url") == 0)
{
const char *data = tokenizer->data;
while (is_whitespace (*data))
data++;
if (*data != '"' && *data != '\'')
{
g_free (name);
return gtk_css_tokenizer_read_url (tokenizer, token, error);
}
}
gtk_css_token_init_string (token, GTK_CSS_TOKEN_FUNCTION, name);
return TRUE;
}
else
{
gtk_css_token_init_string (token, GTK_CSS_TOKEN_IDENT, name);
return TRUE;
}
}
static void
gtk_css_tokenizer_read_numeric (GtkCssTokenizer *tokenizer,
GtkCssToken *token)
{
int sign = 1, exponent_sign = 1;
gint64 integer, fractional = 0, fractional_length = 1, exponent = 0;
gboolean is_int = TRUE, has_sign = FALSE;
const char *data = tokenizer->data;
double value;
if (*data == '-')
{
has_sign = TRUE;
sign = -1;
data++;
}
else if (*data == '+')
{
has_sign = TRUE;
data++;
}
for (integer = 0; data < tokenizer->end && g_ascii_isdigit (*data); data++)
{
/* check for overflow here? */
integer = 10 * integer + g_ascii_digit_value (*data);
}
if (data + 1 < tokenizer->end && *data == '.' && g_ascii_isdigit (data[1]))
{
is_int = FALSE;
data++;
fractional = g_ascii_digit_value (*data);
fractional_length = 10;
data++;
while (data < tokenizer->end && g_ascii_isdigit (*data))
{
if (fractional_length < G_MAXINT64 / 10)
{
fractional = 10 * fractional + g_ascii_digit_value (*data);
fractional_length *= 10;
}
data++;
}
}
if (data + 1 < tokenizer->end && (*data == 'e' || *data == 'E') &&
(g_ascii_isdigit (data[1]) ||
(data + 2 < tokenizer->end && (data[1] == '+' || data[1] == '-') && g_ascii_isdigit (data[2]))))
{
is_int = FALSE;
data++;
if (*data == '-')
{
exponent_sign = -1;
data++;
}
else if (*data == '+')
{
data++;
}
while (data < tokenizer->end && g_ascii_isdigit (*data))
{
exponent = 10 * exponent + g_ascii_digit_value (*data);
data++;
}
}
gtk_css_tokenizer_consume (tokenizer, data - tokenizer->data, data - tokenizer->data);
value = sign * (integer + ((double) fractional / fractional_length)) * pow (10, exponent_sign * exponent);
if (gtk_css_tokenizer_has_identifier (tokenizer))
{
GtkCssTokenType type;
if (is_int)
type = has_sign ? GTK_CSS_TOKEN_SIGNED_INTEGER_DIMENSION : GTK_CSS_TOKEN_SIGNLESS_INTEGER_DIMENSION;
else
type = GTK_CSS_TOKEN_DIMENSION;
gtk_css_token_init_dimension (token, type, value, gtk_css_tokenizer_read_name (tokenizer));
}
else if (gtk_css_tokenizer_remaining (tokenizer) > 0 && *tokenizer->data == '%')
{
gtk_css_token_init_number (token, GTK_CSS_TOKEN_PERCENTAGE, value);
gtk_css_tokenizer_consume_ascii (tokenizer);
}
else
{
GtkCssTokenType type;
if (is_int)
type = has_sign ? GTK_CSS_TOKEN_SIGNED_INTEGER : GTK_CSS_TOKEN_SIGNLESS_INTEGER;
else
type = has_sign ? GTK_CSS_TOKEN_SIGNED_NUMBER : GTK_CSS_TOKEN_SIGNLESS_NUMBER;
gtk_css_token_init_number (token, type,value);
}
}
static void
gtk_css_tokenizer_read_delim (GtkCssTokenizer *tokenizer,
GtkCssToken *token)
{
gtk_css_token_init_delim (token, g_utf8_get_char (tokenizer->data));
gtk_css_tokenizer_consume_char (tokenizer, NULL);
}
static gboolean
gtk_css_tokenizer_read_dash (GtkCssTokenizer *tokenizer,
GtkCssToken *token,
GError **error)
{
if (gtk_css_tokenizer_remaining (tokenizer) == 1)
{
gtk_css_tokenizer_read_delim (tokenizer, token);
return TRUE;
}
else if (gtk_css_tokenizer_has_number (tokenizer))
{
gtk_css_tokenizer_read_numeric (tokenizer, token);
return TRUE;
}
else if (gtk_css_tokenizer_remaining (tokenizer) >= 3 &&
tokenizer->data[1] == '-' &&
tokenizer->data[2] == '>')
{
gtk_css_token_init (token, GTK_CSS_TOKEN_CDC);
gtk_css_tokenizer_consume (tokenizer, 3, 3);
return TRUE;
}
else if (gtk_css_tokenizer_has_identifier (tokenizer))
{
return gtk_css_tokenizer_read_ident_like (tokenizer, token, error);
}
else
{
gtk_css_tokenizer_read_delim (tokenizer, token);
return TRUE;
}
}
static gboolean
gtk_css_tokenizer_read_string (GtkCssTokenizer *tokenizer,
GtkCssToken *token,
GError **error)
{
GString *string = g_string_new (NULL);
char end = *tokenizer->data;
gtk_css_tokenizer_consume_ascii (tokenizer);
while (tokenizer->data < tokenizer->end)
{
if (*tokenizer->data == end)
{
gtk_css_tokenizer_consume_ascii (tokenizer);
break;
}
else if (*tokenizer->data == '\\')
{
if (gtk_css_tokenizer_remaining (tokenizer) == 1)
{
gtk_css_tokenizer_consume_ascii (tokenizer);
break;
}
else if (is_newline (tokenizer->data[1]))
{
gtk_css_tokenizer_consume_ascii (tokenizer);
gtk_css_tokenizer_consume_newline (tokenizer);
}
else
{
g_string_append_unichar (string, gtk_css_tokenizer_read_escape (tokenizer));
}
}
else if (is_newline (*tokenizer->data))
{
g_string_free (string, TRUE);
gtk_css_token_init (token, GTK_CSS_TOKEN_BAD_STRING);
gtk_css_tokenizer_parse_error (error, "Newlines inside strings must be escaped");
return FALSE;
}
else
{
gtk_css_tokenizer_consume_char (tokenizer, string);
}
}
gtk_css_token_init_string (token, GTK_CSS_TOKEN_STRING, g_string_free (string, FALSE));
return TRUE;
}
static gboolean
gtk_css_tokenizer_read_comment (GtkCssTokenizer *tokenizer,
GtkCssToken *token,
GError **error)
{
gtk_css_tokenizer_consume (tokenizer, 2, 2);
while (tokenizer->data < tokenizer->end)
{
if (gtk_css_tokenizer_remaining (tokenizer) > 1 &&
tokenizer->data[0] == '*' && tokenizer->data[1] == '/')
{
gtk_css_tokenizer_consume (tokenizer, 2, 2);
gtk_css_token_init (token, GTK_CSS_TOKEN_COMMENT);
return TRUE;
}
gtk_css_tokenizer_consume_char (tokenizer, NULL);
}
gtk_css_token_init (token, GTK_CSS_TOKEN_COMMENT);
gtk_css_tokenizer_parse_error (error, "Comment not terminated at end of document.");
return FALSE;
}
static void
gtk_css_tokenizer_read_match (GtkCssTokenizer *tokenizer,
GtkCssToken *token,
GtkCssTokenType type)
{
if (gtk_css_tokenizer_remaining (tokenizer) > 1 && tokenizer->data[1] == '=')
{
gtk_css_token_init (token, type);
gtk_css_tokenizer_consume (tokenizer, 2, 2);
}
else
{
gtk_css_tokenizer_read_delim (tokenizer, token);
}
}
gboolean
gtk_css_tokenizer_read_token (GtkCssTokenizer *tokenizer,
GtkCssToken *token,
GError **error)
{
if (tokenizer->data == tokenizer->end)
{
gtk_css_token_init (token, GTK_CSS_TOKEN_EOF);
return TRUE;
}
if (tokenizer->data[0] == '/' && gtk_css_tokenizer_remaining (tokenizer) > 1 &&
tokenizer->data[1] == '*')
return gtk_css_tokenizer_read_comment (tokenizer, token, error);
switch (*tokenizer->data)
{
case '\n':
case '\r':
case '\t':
case '\f':
case ' ':
gtk_css_tokenizer_read_whitespace (tokenizer, token);
return TRUE;
case '"':
return gtk_css_tokenizer_read_string (tokenizer, token, error);
case '#':
gtk_css_tokenizer_consume_ascii (tokenizer);
if (is_name (*tokenizer->data) || gtk_css_tokenizer_has_valid_escape (tokenizer))
{
GtkCssTokenType type;
if (gtk_css_tokenizer_has_identifier (tokenizer))
type = GTK_CSS_TOKEN_HASH_ID;
else
type = GTK_CSS_TOKEN_HASH_UNRESTRICTED;
gtk_css_token_init_string (token,
type,
gtk_css_tokenizer_read_name (tokenizer));
}
else
{
gtk_css_token_init_delim (token, '#');
}
return TRUE;
case '$':
gtk_css_tokenizer_read_match (tokenizer, token, GTK_CSS_TOKEN_SUFFIX_MATCH);
return TRUE;
case '\'':
return gtk_css_tokenizer_read_string (tokenizer, token, error);
case '(':
gtk_css_token_init (token, GTK_CSS_TOKEN_OPEN_PARENS);
gtk_css_tokenizer_consume_ascii (tokenizer);
return TRUE;
case ')':
gtk_css_token_init (token, GTK_CSS_TOKEN_CLOSE_PARENS);
gtk_css_tokenizer_consume_ascii (tokenizer);
return TRUE;
case '*':
gtk_css_tokenizer_read_match (tokenizer, token, GTK_CSS_TOKEN_SUBSTRING_MATCH);
return TRUE;
case '+':
if (gtk_css_tokenizer_has_number (tokenizer))
gtk_css_tokenizer_read_numeric (tokenizer, token);
else
gtk_css_tokenizer_read_delim (tokenizer, token);
return TRUE;
case ',':
gtk_css_token_init (token, GTK_CSS_TOKEN_COMMA);
gtk_css_tokenizer_consume_ascii (tokenizer);
return TRUE;
case '-':
return gtk_css_tokenizer_read_dash (tokenizer, token, error);
case '.':
if (gtk_css_tokenizer_has_number (tokenizer))
gtk_css_tokenizer_read_numeric (tokenizer, token);
else
gtk_css_tokenizer_read_delim (tokenizer, token);
return TRUE;
case ':':
gtk_css_token_init (token, GTK_CSS_TOKEN_COLON);
gtk_css_tokenizer_consume_ascii (tokenizer);
return TRUE;
case ';':
gtk_css_token_init (token, GTK_CSS_TOKEN_SEMICOLON);
gtk_css_tokenizer_consume_ascii (tokenizer);
return TRUE;
case '<':
if (gtk_css_tokenizer_remaining (tokenizer) >= 4 &&
tokenizer->data[1] == '!' &&
tokenizer->data[2] == '-' &&
tokenizer->data[3] == '-')
{
gtk_css_token_init (token, GTK_CSS_TOKEN_CDO);
gtk_css_tokenizer_consume (tokenizer, 4, 4);
}
else
{
gtk_css_tokenizer_read_delim (tokenizer, token);
}
return TRUE;
case '@':
gtk_css_tokenizer_consume_ascii (tokenizer);
if (gtk_css_tokenizer_has_identifier (tokenizer))
{
gtk_css_token_init_string (token,
GTK_CSS_TOKEN_AT_KEYWORD,
gtk_css_tokenizer_read_name (tokenizer));
}
else
{
gtk_css_token_init_delim (token, '@');
}
return TRUE;
case '[':
gtk_css_token_init (token, GTK_CSS_TOKEN_OPEN_SQUARE);
gtk_css_tokenizer_consume_ascii (tokenizer);
return TRUE;
case '\\':
if (gtk_css_tokenizer_has_valid_escape (tokenizer))
{
return gtk_css_tokenizer_read_ident_like (tokenizer, token, error);
}
else
{
gtk_css_token_init_delim (token, '\\');
gtk_css_tokenizer_consume_ascii (tokenizer);
gtk_css_tokenizer_parse_error (error, "Newline may not follow '\' escape character");
return FALSE;
}
case ']':
gtk_css_token_init (token, GTK_CSS_TOKEN_CLOSE_SQUARE);
gtk_css_tokenizer_consume_ascii (tokenizer);
return TRUE;
case '^':
gtk_css_tokenizer_read_match (tokenizer, token, GTK_CSS_TOKEN_PREFIX_MATCH);
return TRUE;
case '{':
gtk_css_token_init (token, GTK_CSS_TOKEN_OPEN_CURLY);
gtk_css_tokenizer_consume_ascii (tokenizer);
return TRUE;
case '}':
gtk_css_token_init (token, GTK_CSS_TOKEN_CLOSE_CURLY);
gtk_css_tokenizer_consume_ascii (tokenizer);
return TRUE;
case '|':
if (gtk_css_tokenizer_remaining (tokenizer) > 1 && tokenizer->data[1] == '|')
{
gtk_css_token_init (token, GTK_CSS_TOKEN_COLUMN);
gtk_css_tokenizer_consume (tokenizer, 2, 2);
}
else
{
gtk_css_tokenizer_read_match (tokenizer, token, GTK_CSS_TOKEN_DASH_MATCH);
}
return TRUE;
case '~':
gtk_css_tokenizer_read_match (tokenizer, token, GTK_CSS_TOKEN_INCLUDE_MATCH);
return TRUE;
default:
if (g_ascii_isdigit (*tokenizer->data))
{
gtk_css_tokenizer_read_numeric (tokenizer, token);
return TRUE;
}
else if (is_name_start (*tokenizer->data))
{
return gtk_css_tokenizer_read_ident_like (tokenizer, token, error);
}
else
{
gtk_css_tokenizer_read_delim (tokenizer, token);
return TRUE;
}
}
}