mirror of
https://sourceware.org/git/glibc.git
synced 2024-11-09 23:00:07 +00:00
locale: Introduce translate_unicode_codepoint into linereader.c
This will permit reusing the Unicode character processing for different character encodings, not just the current <U...> encoding. Reviewed-by: Carlos O'Donell <carlos@redhat.com> Tested-by: Carlos O'Donell <carlos@redhat.com>
This commit is contained in:
parent
19d4944459
commit
7dcaabb94c
@ -596,6 +596,83 @@ get_ident (struct linereader *lr)
|
||||
return &lr->token;
|
||||
}
|
||||
|
||||
/* Process a decoded Unicode codepoint WCH in a string, placing the
|
||||
multibyte sequence into LRB. Return false if the character is not
|
||||
found in CHARMAP/REPERTOIRE. */
|
||||
static bool
|
||||
translate_unicode_codepoint (struct localedef_t *locale,
|
||||
const struct charmap_t *charmap,
|
||||
const struct repertoire_t *repertoire,
|
||||
uint32_t wch, struct lr_buffer *lrb)
|
||||
{
|
||||
/* See whether the charmap contains the Uxxxxxxxx names. */
|
||||
char utmp[10];
|
||||
snprintf (utmp, sizeof (utmp), "U%08X", wch);
|
||||
struct charseq *seq = charmap_find_value (charmap, utmp, 9);
|
||||
|
||||
if (seq == NULL)
|
||||
{
|
||||
/* No, this isn't the case. Now determine from
|
||||
the repertoire the name of the character and
|
||||
find it in the charmap. */
|
||||
if (repertoire != NULL)
|
||||
{
|
||||
const char *symbol = repertoire_find_symbol (repertoire, wch);
|
||||
if (symbol != NULL)
|
||||
seq = charmap_find_value (charmap, symbol, strlen (symbol));
|
||||
}
|
||||
|
||||
if (seq == NULL)
|
||||
{
|
||||
#ifndef NO_TRANSLITERATION
|
||||
/* Transliterate if possible. */
|
||||
if (locale != NULL)
|
||||
{
|
||||
if ((locale->avail & CTYPE_LOCALE) == 0)
|
||||
{
|
||||
/* Load the CTYPE data now. */
|
||||
int old_needed = locale->needed;
|
||||
|
||||
locale->needed = 0;
|
||||
locale = load_locale (LC_CTYPE, locale->name,
|
||||
locale->repertoire_name,
|
||||
charmap, locale);
|
||||
locale->needed = old_needed;
|
||||
}
|
||||
|
||||
uint32_t *translit;
|
||||
if ((locale->avail & CTYPE_LOCALE) != 0
|
||||
&& ((translit = find_translit (locale, charmap, wch))
|
||||
!= NULL))
|
||||
/* The CTYPE data contains a matching
|
||||
transliteration. */
|
||||
{
|
||||
for (int i = 0; translit[i] != 0; ++i)
|
||||
{
|
||||
snprintf (utmp, sizeof (utmp), "U%08X", translit[i]);
|
||||
seq = charmap_find_value (charmap, utmp, 9);
|
||||
assert (seq != NULL);
|
||||
adds (lrb, seq->bytes, seq->nbytes);
|
||||
}
|
||||
return true;
|
||||
}
|
||||
}
|
||||
#endif /* NO_TRANSLITERATION */
|
||||
|
||||
/* Not a known name. */
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
if (seq != NULL)
|
||||
{
|
||||
adds (lrb, seq->bytes, seq->nbytes);
|
||||
return true;
|
||||
}
|
||||
else
|
||||
return false;
|
||||
}
|
||||
|
||||
|
||||
static struct token *
|
||||
get_string (struct linereader *lr, const struct charmap_t *charmap,
|
||||
@ -635,7 +712,7 @@ get_string (struct linereader *lr, const struct charmap_t *charmap,
|
||||
}
|
||||
else
|
||||
{
|
||||
int illegal_string = 0;
|
||||
bool illegal_string = false;
|
||||
size_t buf2act = 0;
|
||||
size_t buf2max = 56 * sizeof (uint32_t);
|
||||
int ch;
|
||||
@ -695,7 +772,7 @@ get_string (struct linereader *lr, const struct charmap_t *charmap,
|
||||
{
|
||||
/* <> is no correct name. Ignore it and also signal an
|
||||
error. */
|
||||
illegal_string = 1;
|
||||
illegal_string = true;
|
||||
continue;
|
||||
}
|
||||
|
||||
@ -709,8 +786,6 @@ get_string (struct linereader *lr, const struct charmap_t *charmap,
|
||||
|
||||
if (cp == &lrb.buf[lrb.act])
|
||||
{
|
||||
char utmp[10];
|
||||
|
||||
/* Yes, it is. */
|
||||
addc (&lrb, '\0');
|
||||
wch = strtoul (lrb.buf + startidx + 1, NULL, 16);
|
||||
@ -721,81 +796,9 @@ get_string (struct linereader *lr, const struct charmap_t *charmap,
|
||||
if (return_widestr)
|
||||
ADDWC (wch);
|
||||
|
||||
/* See whether the charmap contains the Uxxxxxxxx names. */
|
||||
snprintf (utmp, sizeof (utmp), "U%08X", wch);
|
||||
seq = charmap_find_value (charmap, utmp, 9);
|
||||
|
||||
if (seq == NULL)
|
||||
{
|
||||
/* No, this isn't the case. Now determine from
|
||||
the repertoire the name of the character and
|
||||
find it in the charmap. */
|
||||
if (repertoire != NULL)
|
||||
{
|
||||
const char *symbol;
|
||||
|
||||
symbol = repertoire_find_symbol (repertoire, wch);
|
||||
|
||||
if (symbol != NULL)
|
||||
seq = charmap_find_value (charmap, symbol,
|
||||
strlen (symbol));
|
||||
}
|
||||
|
||||
if (seq == NULL)
|
||||
{
|
||||
#ifndef NO_TRANSLITERATION
|
||||
/* Transliterate if possible. */
|
||||
if (locale != NULL)
|
||||
{
|
||||
uint32_t *translit;
|
||||
|
||||
if ((locale->avail & CTYPE_LOCALE) == 0)
|
||||
{
|
||||
/* Load the CTYPE data now. */
|
||||
int old_needed = locale->needed;
|
||||
|
||||
locale->needed = 0;
|
||||
locale = load_locale (LC_CTYPE,
|
||||
locale->name,
|
||||
locale->repertoire_name,
|
||||
charmap, locale);
|
||||
locale->needed = old_needed;
|
||||
}
|
||||
|
||||
if ((locale->avail & CTYPE_LOCALE) != 0
|
||||
&& ((translit = find_translit (locale,
|
||||
charmap, wch))
|
||||
!= NULL))
|
||||
/* The CTYPE data contains a matching
|
||||
transliteration. */
|
||||
{
|
||||
int i;
|
||||
|
||||
for (i = 0; translit[i] != 0; ++i)
|
||||
{
|
||||
char utmp[10];
|
||||
|
||||
snprintf (utmp, sizeof (utmp), "U%08X",
|
||||
translit[i]);
|
||||
seq = charmap_find_value (charmap, utmp,
|
||||
9);
|
||||
assert (seq != NULL);
|
||||
adds (&lrb, seq->bytes, seq->nbytes);
|
||||
}
|
||||
|
||||
continue;
|
||||
}
|
||||
}
|
||||
#endif /* NO_TRANSLITERATION */
|
||||
|
||||
/* Not a known name. */
|
||||
illegal_string = 1;
|
||||
}
|
||||
}
|
||||
|
||||
if (seq != NULL)
|
||||
adds (&lrb, seq->bytes, seq->nbytes);
|
||||
|
||||
if (!translate_unicode_codepoint (locale, charmap,
|
||||
repertoire, wch, &lrb))
|
||||
illegal_string = true;
|
||||
continue;
|
||||
}
|
||||
}
|
||||
@ -812,7 +815,7 @@ get_string (struct linereader *lr, const struct charmap_t *charmap,
|
||||
/* This name is not in the charmap. */
|
||||
lr_error (lr, _("symbol `%.*s' not in charmap"),
|
||||
(int) (lrb.act - startidx), &lrb.buf[startidx]);
|
||||
illegal_string = 1;
|
||||
illegal_string = true;
|
||||
}
|
||||
|
||||
if (return_widestr)
|
||||
@ -833,7 +836,7 @@ get_string (struct linereader *lr, const struct charmap_t *charmap,
|
||||
/* This name is not in the repertoire map. */
|
||||
lr_error (lr, _("symbol `%.*s' not in repertoire map"),
|
||||
(int) (lrb.act - startidx), &lrb.buf[startidx]);
|
||||
illegal_string = 1;
|
||||
illegal_string = true;
|
||||
}
|
||||
else
|
||||
ADDWC (wch);
|
||||
@ -850,7 +853,7 @@ get_string (struct linereader *lr, const struct charmap_t *charmap,
|
||||
if (ch == '\n' || ch == EOF)
|
||||
{
|
||||
lr_error (lr, _("unterminated string"));
|
||||
illegal_string = 1;
|
||||
illegal_string = true;
|
||||
}
|
||||
|
||||
if (illegal_string)
|
||||
|
Loading…
Reference in New Issue
Block a user