Correct handling of different collation sections.
This commit is contained in:
parent
eacc655548
commit
ab80bec0cb
@ -47,6 +47,7 @@ struct element_t;
|
||||
/* Data type for list of strings. */
|
||||
struct section_list
|
||||
{
|
||||
struct section_list *def_next;
|
||||
struct section_list *next;
|
||||
/* Name of the section. */
|
||||
const char *name;
|
||||
@ -144,6 +145,8 @@ struct locale_collate_t
|
||||
int cur_weight_max;
|
||||
|
||||
/* List of known scripts. */
|
||||
struct section_list *known_sections;
|
||||
/* List of used sections. */
|
||||
struct section_list *sections;
|
||||
/* Current section using definition. */
|
||||
struct section_list *current_section;
|
||||
@ -151,6 +154,9 @@ struct locale_collate_t
|
||||
struct section_list unnamed_section;
|
||||
/* To make handling of errors easier we have another section. */
|
||||
struct section_list error_section;
|
||||
/* Sometimes we are defining the values for collating symbols before
|
||||
the first actual section. */
|
||||
struct section_list symbol_section;
|
||||
|
||||
/* Start of the order list. */
|
||||
struct element_t *start;
|
||||
@ -562,7 +568,7 @@ read_directions (struct linereader *ldfile, struct token *arg,
|
||||
|
||||
static struct element_t *
|
||||
find_element (struct linereader *ldfile, struct locale_collate_t *collate,
|
||||
const char *str, size_t len, uint32_t *wcstr)
|
||||
const char *str, size_t len)
|
||||
{
|
||||
struct element_t *result = NULL;
|
||||
|
||||
@ -668,13 +674,26 @@ insert_weights (struct linereader *ldfile, struct element_t *elem,
|
||||
elem->weights[weight_cnt].w[0] = NULL;
|
||||
elem->weights[weight_cnt].cnt = 1;
|
||||
}
|
||||
else if (arg->tok == tok_bsymbol)
|
||||
else if (arg->tok == tok_bsymbol || arg->tok == tok_ucs4)
|
||||
{
|
||||
struct element_t *val = find_element (ldfile, collate,
|
||||
arg->val.str.startmb,
|
||||
arg->val.str.lenmb,
|
||||
arg->val.str.startwc);
|
||||
char ucs4str[10];
|
||||
struct element_t *val;
|
||||
char *symstr;
|
||||
size_t symlen;
|
||||
|
||||
if (arg->tok == tok_bsymbol)
|
||||
{
|
||||
symstr = arg->val.str.startmb;
|
||||
symlen = arg->val.str.lenmb;
|
||||
}
|
||||
else
|
||||
{
|
||||
snprintf (ucs4str, sizeof (ucs4str), "U%08X", arg->val.ucs4);
|
||||
symstr = ucs4str;
|
||||
symlen = 9;
|
||||
}
|
||||
|
||||
val = find_element (ldfile, collate, symstr, symlen);
|
||||
if (val == NULL)
|
||||
break;
|
||||
|
||||
@ -720,7 +739,7 @@ insert_weights (struct linereader *ldfile, struct element_t *elem,
|
||||
}
|
||||
|
||||
charelem = find_element (ldfile, collate, startp,
|
||||
cp - startp, NULL);
|
||||
cp - startp);
|
||||
++cp;
|
||||
}
|
||||
else
|
||||
@ -731,7 +750,7 @@ insert_weights (struct linereader *ldfile, struct element_t *elem,
|
||||
string as if that would be bsymbols. Otherwise we
|
||||
would have to match back to bsymbols somehow and this
|
||||
is normally not what people normally expect. */
|
||||
charelem = find_element (ldfile, collate, cp++, 1, NULL);
|
||||
charelem = find_element (ldfile, collate, cp++, 1);
|
||||
}
|
||||
|
||||
if (charelem == NULL)
|
||||
@ -1349,7 +1368,7 @@ static void
|
||||
collate_startup (struct linereader *ldfile, struct localedef_t *locale,
|
||||
struct localedef_t *copy_locale, int ignore_content)
|
||||
{
|
||||
if (!ignore_content)
|
||||
if (!ignore_content && locale->categories[LC_COLLATE].collate == NULL)
|
||||
{
|
||||
struct locale_collate_t *collate;
|
||||
|
||||
@ -1432,8 +1451,9 @@ collate_finish (struct localedef_t *locale, struct charmap_t *charmap)
|
||||
or in none. */
|
||||
for (i = 0; i < nrules; ++i)
|
||||
for (sect = collate->sections; sect != NULL; sect = sect->next)
|
||||
if ((sect->rules[i] & sort_position)
|
||||
!= (collate->sections->rules[i] & sort_position))
|
||||
if (sect->rules != NULL
|
||||
&& ((sect->rules[i] & sort_position)
|
||||
!= (collate->sections->rules[i] & sort_position)))
|
||||
{
|
||||
error (0, 0, _("\
|
||||
%s: `position' must be used for a specific level in all sections or none"),
|
||||
@ -1771,6 +1791,9 @@ Computing table size for collation table might take a while..."),
|
||||
{
|
||||
if (need_undefined)
|
||||
{
|
||||
/* This seems not to be enforced by recent standards. Don't
|
||||
emit an error, simply append UNDEFINED at the end. */
|
||||
if (0)
|
||||
error (0, 0, _("no definition of `UNDEFINED'"));
|
||||
|
||||
/* Add UNDEFINED at the end. */
|
||||
@ -1793,6 +1816,8 @@ Computing table size for collation table might take a while..."),
|
||||
ruleset the same index. Since there are never many section we can
|
||||
use an O(n^2) algorithm here. */
|
||||
sect = collate->sections;
|
||||
while (sect != NULL && sect->rules == NULL)
|
||||
sect = sect->next;
|
||||
assert (sect != NULL);
|
||||
ruleidx = 0;
|
||||
do
|
||||
@ -1800,7 +1825,8 @@ Computing table size for collation table might take a while..."),
|
||||
struct section_list *osect = collate->sections;
|
||||
|
||||
while (osect != sect)
|
||||
if (memcmp (osect->rules, sect->rules, nrules) == 0)
|
||||
if (osect->rules != NULL
|
||||
&& memcmp (osect->rules, sect->rules, nrules) == 0)
|
||||
break;
|
||||
else
|
||||
osect = osect->next;
|
||||
@ -1811,7 +1837,9 @@ Computing table size for collation table might take a while..."),
|
||||
sect->ruleidx = osect->ruleidx;
|
||||
|
||||
/* Next section. */
|
||||
do
|
||||
sect = sect->next;
|
||||
while (sect != NULL && sect->rules == NULL);
|
||||
}
|
||||
while (sect != NULL);
|
||||
/* We are currently not prepared for more than 256 rulesets. But this
|
||||
@ -1993,7 +2021,7 @@ collate_output (struct localedef_t *locale, struct charmap_t *charmap,
|
||||
|
||||
/* Prepare the ruleset table. */
|
||||
for (sect = collate->sections, i = 0; sect != NULL; sect = sect->next)
|
||||
if (sect->ruleidx == i)
|
||||
if (sect->rules != NULL && sect->ruleidx == i)
|
||||
{
|
||||
int j;
|
||||
|
||||
@ -2670,7 +2698,7 @@ collate_read (struct linereader *ldfile, struct localedef_t *result,
|
||||
|
||||
/* Get the locale definition. */
|
||||
copy_locale = load_locale (LC_COLLATE, now->val.str.startmb,
|
||||
repertoire_name, charmap);
|
||||
repertoire_name, charmap, NULL);
|
||||
if ((copy_locale->avail & COLLATE_LOCALE) == 0)
|
||||
{
|
||||
/* Not yet loaded. So do it now. */
|
||||
@ -2708,6 +2736,19 @@ collate_read (struct linereader *ldfile, struct localedef_t *result,
|
||||
|
||||
switch (nowtok)
|
||||
{
|
||||
case tok_copy:
|
||||
/* Allow copying other locales. */
|
||||
now = lr_token (ldfile, charmap, NULL);
|
||||
if (now->tok != tok_string)
|
||||
goto err_label;
|
||||
|
||||
if (! ignore_content)
|
||||
load_locale (LC_COLLATE, now->val.str.startmb, repertoire_name,
|
||||
charmap, result);
|
||||
|
||||
lr_ignore_rest (ldfile, 1);
|
||||
break;
|
||||
|
||||
case tok_coll_weight_max:
|
||||
/* Ignore the rest of the line if we don't need the input of
|
||||
this line. */
|
||||
@ -2751,8 +2792,11 @@ collate_read (struct linereader *ldfile, struct localedef_t *result,
|
||||
/* Check whether this section is already known. */
|
||||
struct section_list *known = collate->sections;
|
||||
while (known != NULL)
|
||||
{
|
||||
if (strcmp (known->name, arg->val.str.startmb) == 0)
|
||||
break;
|
||||
known = known->next;
|
||||
}
|
||||
|
||||
if (known != NULL)
|
||||
{
|
||||
@ -2822,15 +2866,12 @@ collate_read (struct linereader *ldfile, struct localedef_t *result,
|
||||
repertoire, symbol, symbol_len))
|
||||
goto col_elem_free;
|
||||
|
||||
if (insert_entry (&collate->elem_table,
|
||||
symbol, symbol_len,
|
||||
insert_entry (&collate->elem_table, symbol, symbol_len,
|
||||
new_element (collate,
|
||||
arg->val.str.startmb,
|
||||
arg->val.str.lenmb - 1,
|
||||
arg->val.str.startwc,
|
||||
symbol, symbol_len, 0)) < 0)
|
||||
lr_error (ldfile, _("\
|
||||
error while adding collating element"));
|
||||
symbol, symbol_len, 0));
|
||||
}
|
||||
else
|
||||
{
|
||||
@ -2909,11 +2950,8 @@ error while adding collating element"));
|
||||
repertoire, symbol, symbol_len))
|
||||
goto col_sym_free;
|
||||
|
||||
if (insert_entry (&collate->sym_table,
|
||||
symbol, symbol_len,
|
||||
new_symbol (collate)) < 0)
|
||||
lr_error (ldfile, _("\
|
||||
error while adding collating symbol"));
|
||||
insert_entry (&collate->sym_table, symbol, symbol_len,
|
||||
new_symbol (collate));
|
||||
}
|
||||
else if (symbol_len != endsymbol_len)
|
||||
{
|
||||
@ -2972,11 +3010,8 @@ error while adding collating symbol"));
|
||||
repertoire, symbuf, symbol_len))
|
||||
goto col_sym_free;
|
||||
|
||||
if (insert_entry (&collate->sym_table,
|
||||
symbuf, symbol_len,
|
||||
new_symbol (collate)) < 0)
|
||||
lr_error (ldfile, _("\
|
||||
error while adding collating symbol"));
|
||||
insert_entry (&collate->sym_table, symbuf,
|
||||
symbol_len, new_symbol (collate));
|
||||
|
||||
/* Increment the counter. */
|
||||
++from;
|
||||
@ -3074,6 +3109,44 @@ error while adding equivalent collating symbol"));
|
||||
lr_ignore_rest (ldfile, 1);
|
||||
break;
|
||||
|
||||
case tok_script:
|
||||
/* We get told about the scripts we know. */
|
||||
arg = lr_token (ldfile, charmap, repertoire);
|
||||
if (arg->tok != tok_bsymbol)
|
||||
goto err_label;
|
||||
else
|
||||
{
|
||||
struct section_list *runp = collate->known_sections;
|
||||
char *name;
|
||||
|
||||
while (runp != NULL)
|
||||
if (strncmp (runp->name, arg->val.str.startmb,
|
||||
arg->val.str.lenmb) == 0
|
||||
&& runp->name[arg->val.str.lenmb] == '\0')
|
||||
break;
|
||||
else
|
||||
runp = runp->def_next;
|
||||
|
||||
if (runp != NULL)
|
||||
{
|
||||
lr_error (ldfile, _("duplicate definition of script `%s'"),
|
||||
runp->name);
|
||||
lr_ignore_rest (ldfile, 0);
|
||||
break;
|
||||
}
|
||||
|
||||
runp = (struct section_list *) xcalloc (1, sizeof (*runp));
|
||||
name = strncpy (xmalloc (arg->val.str.lenmb + 1),
|
||||
arg->val.str.startmb, arg->val.str.lenmb);
|
||||
name[arg->val.str.lenmb] = '\0';
|
||||
runp->name = name;
|
||||
|
||||
runp->def_next = collate->known_sections;
|
||||
collate->known_sections = runp;
|
||||
}
|
||||
lr_ignore_rest (ldfile, 1);
|
||||
break;
|
||||
|
||||
case tok_order_start:
|
||||
/* Ignore the rest of the line if we don't need the input of
|
||||
this line. */
|
||||
@ -3094,10 +3167,13 @@ error while adding equivalent collating symbol"));
|
||||
if (arg->tok == tok_bsymbol)
|
||||
{
|
||||
/* This better should be a section name. */
|
||||
struct section_list *sp = collate->sections;
|
||||
struct section_list *sp = collate->known_sections;
|
||||
while (sp != NULL
|
||||
&& strcmp (sp->name, arg->val.str.startmb) != 0)
|
||||
sp = sp->next;
|
||||
&& (sp->name == NULL
|
||||
|| strncmp (sp->name, arg->val.str.startmb,
|
||||
arg->val.str.lenmb) != 0
|
||||
|| sp->name[arg->val.str.lenmb] != '\0'))
|
||||
sp = sp->def_next;
|
||||
|
||||
if (sp == NULL)
|
||||
{
|
||||
@ -3109,15 +3185,21 @@ error while adding equivalent collating symbol"));
|
||||
|
||||
if (collate->error_section.first == NULL)
|
||||
{
|
||||
collate->error_section.next = collate->sections;
|
||||
if (collate->sections == NULL)
|
||||
collate->sections = &collate->error_section;
|
||||
else
|
||||
{
|
||||
sp = collate->sections;
|
||||
while (sp->next != NULL)
|
||||
sp = sp->next;
|
||||
|
||||
collate->error_section.next = NULL;
|
||||
sp->next = &collate->error_section;
|
||||
}
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
/* Remember this section. */
|
||||
collate->current_section = sp;
|
||||
|
||||
/* One should not be allowed to open the same
|
||||
section twice. */
|
||||
if (sp->first != NULL)
|
||||
@ -3126,8 +3208,13 @@ error while adding equivalent collating symbol"));
|
||||
"LC_COLLATE", sp->name);
|
||||
else
|
||||
{
|
||||
sp->next = collate->sections;
|
||||
collate->sections = sp;
|
||||
if (collate->current_section == NULL)
|
||||
collate->current_section = sp;
|
||||
else
|
||||
{
|
||||
sp->next = collate->current_section->next;
|
||||
collate->current_section->next = sp;
|
||||
}
|
||||
}
|
||||
|
||||
/* Next should come the end of the line or a semicolon. */
|
||||
@ -3381,10 +3468,10 @@ error while adding equivalent collating symbol"));
|
||||
break;
|
||||
}
|
||||
|
||||
if (state != 1 && state != 3 && state != 5)
|
||||
if (state != 0 && state != 1 && state != 3 && state != 5)
|
||||
goto err_label;
|
||||
|
||||
if (state == 5 && nowtok == tok_ucs4)
|
||||
if ((state == 0 || state == 5) && nowtok == tok_ucs4)
|
||||
goto err_label;
|
||||
|
||||
if (nowtok == tok_ucs4)
|
||||
@ -3399,7 +3486,41 @@ error while adding equivalent collating symbol"));
|
||||
symlen = arg->val.str.lenmb;
|
||||
}
|
||||
|
||||
if (state == 3)
|
||||
if (state == 0)
|
||||
{
|
||||
/* We are outside an `order_start' region. This means
|
||||
we must only accept definitions of values for
|
||||
collation symbols since these are purely abstract
|
||||
values and don't need dorections associated. */
|
||||
struct element_t *seqp;
|
||||
|
||||
if (find_entry (&collate->seq_table, symstr, symlen,
|
||||
(void **) &seqp) == 0)
|
||||
{
|
||||
/* It's already defined. First check whether this
|
||||
is really a collating symbol. */
|
||||
if (seqp->is_character)
|
||||
goto err_label;
|
||||
|
||||
goto move_entry;
|
||||
}
|
||||
else
|
||||
{
|
||||
void *result;
|
||||
|
||||
if (find_entry (&collate->sym_table, symstr, symlen,
|
||||
&result) != 0)
|
||||
/* No collating symbol, it's an error. */
|
||||
goto err_label;
|
||||
|
||||
/* Maybe this is the first time we define a symbol
|
||||
value and it is before the first actual section. */
|
||||
if (collate->sections == NULL)
|
||||
collate->sections = collate->current_section =
|
||||
&collate->symbol_section;
|
||||
}
|
||||
}
|
||||
else if (state == 3)
|
||||
{
|
||||
/* It is possible that we already have this collation sequence.
|
||||
In this case we move the entry. */
|
||||
@ -3416,6 +3537,7 @@ error while adding equivalent collating symbol"));
|
||||
if (find_entry (&collate->seq_table, symstr, symlen,
|
||||
(void **) &seqp) == 0)
|
||||
{
|
||||
move_entry:
|
||||
/* Remove the entry from the old position. */
|
||||
if (seqp->last == NULL)
|
||||
collate->start = seqp->next;
|
||||
|
Loading…
x
Reference in New Issue
Block a user