(collate_output): Update.

* locale/programs/ld-collate.c (collate_output): Emit correct
	information for collation elements.
	Don't write over end of array idx.
	* posix/regex.c: Handle also collation elements at end of range.

	* posix/PTESTS: Fix a few typos.
This commit is contained in:
Ulrich Drepper 1999-12-31 22:21:25 +00:00
parent 1c5d461740
commit ac8295d23b
5 changed files with 86 additions and 81 deletions

View File

@ -1,5 +1,12 @@
1999-12-31 Ulrich Drepper <drepper@cygnus.com> 1999-12-31 Ulrich Drepper <drepper@cygnus.com>
* locale/programs/ld-collate.c (collate_output): Emit correct
information for collation elements.
Don't write over end of array idx.
* posix/regex.c: Handle also collation elements at end of range.
* posix/PTESTS: Fix a few typos.
* posix/bits/posix2_lim.h: Remove _POSIX2_EQUIV_CLASS_MAX. I have * posix/bits/posix2_lim.h: Remove _POSIX2_EQUIV_CLASS_MAX. I have
no idea where this came from. no idea where this came from.
* sysdeps/posix/sysconf.c: Remove _POSIX2_EQUIV_CLASS_MAX * sysdeps/posix/sysconf.c: Remove _POSIX2_EQUIV_CLASS_MAX

View File

@ -91,8 +91,6 @@ struct element_t
unsigned int used_in_level; unsigned int used_in_level;
struct element_list_t *weights; struct element_list_t *weights;
/* Index in the `weight' table in the output file for the character. */
int32_t weights_idx;
/* Nonzero if this is a real character definition. */ /* Nonzero if this is a real character definition. */
int is_character; int is_character;
@ -301,7 +299,6 @@ new_element (struct locale_collate_t *collate, const char *mbs, size_t mbslen,
/* Will be allocated later. */ /* Will be allocated later. */
newp->weights = NULL; newp->weights = NULL;
newp->weights_idx = 0;
newp->file = NULL; newp->file = NULL;
newp->line = 0; newp->line = 0;
@ -1809,9 +1806,6 @@ output_weight (struct obstack *pool, struct locale_collate_t *collate,
obstack_grow (pool, buf, len); obstack_grow (pool, buf, len);
} }
/* Remember the index. */
elem->weights_idx = retval;
return retval | ((elem->section->ruleidx & 0x7f) << 24); return retval | ((elem->section->ruleidx & 0x7f) << 24);
} }
@ -1899,11 +1893,26 @@ collate_output (struct localedef_t *locale, struct charmap_t *charmap,
/* If we have no LC_COLLATE data emit only the number of rules as zero. */ /* If we have no LC_COLLATE data emit only the number of rules as zero. */
if (collate == NULL) if (collate == NULL)
{ {
int32_t dummy = 0;
while (cnt < _NL_ITEM_INDEX (_NL_NUM_LC_COLLATE)) while (cnt < _NL_ITEM_INDEX (_NL_NUM_LC_COLLATE))
{ {
iov[2 + cnt].iov_base = (char *) ""; /* The words have to be handled specially. */
iov[2 + cnt].iov_len = 0; if (cnt == _NL_ITEM_INDEX (_NL_COLLATE_HASH_SIZE)
idx[1 + cnt] = idx[cnt] + iov[2 + cnt].iov_len; || cnt == _NL_ITEM_INDEX (_NL_COLLATE_HASH_LAYERS)
|| cnt == _NL_ITEM_INDEX (_NL_COLLATE_SYMB_HASH_SIZEMB))
{
iov[2 + cnt].iov_base = &dummy;
iov[2 + cnt].iov_len = sizeof (int32_t);
}
else
{
iov[2 + cnt].iov_base = (char *) "";
iov[2 + cnt].iov_len = 0;
}
if (cnt + 1 < _NL_ITEM_INDEX (_NL_NUM_LC_COLLATE))
idx[1 + cnt] = idx[cnt] + iov[2 + cnt].iov_len;
++cnt; ++cnt;
} }
@ -2453,23 +2462,20 @@ collate_output (struct localedef_t *locale, struct charmap_t *charmap,
elem_table[idx * 2] = hash; elem_table[idx * 2] = hash;
elem_table[idx * 2 + 1] = obstack_object_size (&extrapool); elem_table[idx * 2 + 1] = obstack_object_size (&extrapool);
/* Now add the index into the weights table. We know the
address is always 32bit aligned. */
if (sizeof (int) == sizeof (int32_t))
obstack_int_grow (&extrapool, runp->weights_idx);
else
obstack_grow (&extrapool, &runp->weights_idx,
sizeof (int32_t));
/* The the string itself including length. */ /* The the string itself including length. */
obstack_1grow (&extrapool, namelen); obstack_1grow (&extrapool, namelen);
obstack_grow (&extrapool, runp->name, namelen); obstack_grow (&extrapool, runp->name, namelen);
/* And the multibyte representation. */
obstack_1grow (&extrapool, runp->nmbs);
obstack_grow (&extrapool, runp->mbs, runp->nmbs);
/* And align again to 32 bits. */ /* And align again to 32 bits. */
if ((1 + namelen) % sizeof (int32_t) != 0) if ((1 + namelen + 1 + runp->nmbs) % sizeof (int32_t) != 0)
obstack_grow (&extrapool, "\0\0", obstack_grow (&extrapool, "\0\0",
(sizeof (int32_t) (sizeof (int32_t)
- (1 + namelen) % sizeof (int32_t))); - ((1 + namelen + 1 + runp->nmbs)
% sizeof (int32_t))));
} }
} }
@ -2492,7 +2498,6 @@ collate_output (struct localedef_t *locale, struct charmap_t *charmap,
assert (cnt == _NL_ITEM_INDEX (_NL_COLLATE_SYMB_EXTRAMB)); assert (cnt == _NL_ITEM_INDEX (_NL_COLLATE_SYMB_EXTRAMB));
iov[2 + cnt].iov_len = obstack_object_size (&extrapool); iov[2 + cnt].iov_len = obstack_object_size (&extrapool);
iov[2 + cnt].iov_base = obstack_finish (&extrapool); iov[2 + cnt].iov_base = obstack_finish (&extrapool);
idx[1 + cnt] = idx[cnt] + iov[2 + cnt].iov_len;
++cnt; ++cnt;

View File

@ -115,7 +115,7 @@
3¦3¦[][.-.]-0]¦ab0-]¦ 3¦3¦[][.-.]-0]¦ab0-]¦
3¦3¦[A-[.].]c]¦ab]!¦ 3¦3¦[A-[.].]c]¦ab]!¦
# GA122 # GA122
-2¦-2¦[[.ch]]¦abc¦ -2¦-2¦[[.ch.]]¦abc¦
-2¦-2¦[[.ab.][.CD.][.EF.]]¦yZabCDEFQ9¦ -2¦-2¦[[.ab.][.CD.][.EF.]]¦yZabCDEFQ9¦
# GA125 # GA125
2¦2¦[[=a=]b]¦Abc¦ 2¦2¦[[=a=]b]¦Abc¦
@ -163,12 +163,12 @@
2¦6¦bc[d-w]xy¦abchxyz¦ 2¦6¦bc[d-w]xy¦abchxyz¦
# GA129 # GA129
1¦1¦[a-cd-f]¦dbccde¦ 1¦1¦[a-cd-f]¦dbccde¦
-1¦-1¦[a-ce-f¦dBCCdE¦ -1¦-1¦[a-ce-f]¦dBCCdE¦
2¦4¦b[n-zA-M]Y¦absY9Z¦ 2¦4¦b[n-zA-M]Y¦absY9Z¦
2¦4¦b[n-zA-M]Y¦abGY9Z¦ 2¦4¦b[n-zA-M]Y¦abGY9Z¦
# GA130 # GA130
3¦3¦[-xy]¦ac-¦ 3¦3¦[-xy]¦ac-¦
2¦4¦[c[-xy]D¦ac-D+¦ 2¦4¦c[-xy]D¦ac-D+¦
2¦2¦[--/]¦a.b¦ 2¦2¦[--/]¦a.b¦
2¦4¦c[--/]D¦ac.D+b¦ 2¦4¦c[--/]D¦ac.D+b¦
2¦2¦[^-ac]¦abcde-¦ 2¦2¦[^-ac]¦abcde-¦
@ -189,7 +189,7 @@
3¦4¦[a-c][e-f]¦acbedf¦ 3¦4¦[a-c][e-f]¦acbedf¦
4¦8¦abc*XYZ¦890abXYZ#*¦ 4¦8¦abc*XYZ¦890abXYZ#*¦
4¦9¦abc*XYZ¦890abcXYZ#*¦ 4¦9¦abc*XYZ¦890abcXYZ#*¦
4¦15¦abc*XYZ¦890abccccccccXYZ#*¦ 4¦15¦abc*XYZ¦890abcccccccXYZ#*¦
-1¦-1¦abc*XYZ¦890abc*XYZ#*¦ -1¦-1¦abc*XYZ¦890abc*XYZ#*¦
# GA132 # GA132
2¦4¦\(*bc\)¦a*bc¦ 2¦4¦\(*bc\)¦a*bc¦
@ -267,7 +267,7 @@
1¦1¦^a¦abc¦ 1¦1¦^a¦abc¦
-1¦-1¦^b¦abc¦ -1¦-1¦^b¦abc¦
-1¦-1¦^[a-zA-Z]¦99Nine¦ -1¦-1¦^[a-zA-Z]¦99Nine¦
1¦4¦^[a-zA-Z]¦Nine99¦ 1¦4¦^[a-zA-Z]*¦Nine99¦
# GA145(1) # GA145(1)
1¦2¦\(^a\)\1¦aabc¦ 1¦2¦\(^a\)\1¦aabc¦
-1¦-1¦\(^a\)\1¦^a^abc¦ -1¦-1¦\(^a\)\1¦^a^abc¦
@ -284,7 +284,7 @@
3¦3¦a$¦cba¦ 3¦3¦a$¦cba¦
-1¦-1¦a$¦abc¦ -1¦-1¦a$¦abc¦
5¦7¦[a-z]*$¦99ZZxyz¦ 5¦7¦[a-z]*$¦99ZZxyz¦
-1¦-1¦[a-z]*$¦99ZZxyz99¦ 9¦9¦[a-z]*$¦99ZZxyz99¦
3¦3¦$$¦ab$¦ 3¦3¦$$¦ab$¦
-1¦-1¦$$¦$ab¦ -1¦-1¦$$¦$ab¦
3¦3¦\$$¦ab$¦ 3¦3¦\$$¦ab$¦

View File

@ -110,7 +110,7 @@
{ 3, 3, "[][.-.]-0]", "ab0-]", }, { 3, 3, "[][.-.]-0]", "ab0-]", },
{ 3, 3, "[A-[.].]c]", "ab]!", }, { 3, 3, "[A-[.].]c]", "ab]!", },
{ 0, 0, "GA122", NULL, }, { 0, 0, "GA122", NULL, },
{ -2, -2, "[[.ch]]", "abc", }, { -2, -2, "[[.ch.]]", "abc", },
{ -2, -2, "[[.ab.][.CD.][.EF.]]", "yZabCDEFQ9", }, { -2, -2, "[[.ab.][.CD.][.EF.]]", "yZabCDEFQ9", },
{ 0, 0, "GA125", NULL, }, { 0, 0, "GA125", NULL, },
{ 2, 2, "[[=a=]b]", "Abc", }, { 2, 2, "[[=a=]b]", "Abc", },
@ -158,12 +158,12 @@
{ 2, 6, "bc[d-w]xy", "abchxyz", }, { 2, 6, "bc[d-w]xy", "abchxyz", },
{ 0, 0, "GA129", NULL, }, { 0, 0, "GA129", NULL, },
{ 1, 1, "[a-cd-f]", "dbccde", }, { 1, 1, "[a-cd-f]", "dbccde", },
{ -1, -1, "[a-ce-f", "dBCCdE", }, { -1, -1, "[a-ce-f]", "dBCCdE", },
{ 2, 4, "b[n-zA-M]Y", "absY9Z", }, { 2, 4, "b[n-zA-M]Y", "absY9Z", },
{ 2, 4, "b[n-zA-M]Y", "abGY9Z", }, { 2, 4, "b[n-zA-M]Y", "abGY9Z", },
{ 0, 0, "GA130", NULL, }, { 0, 0, "GA130", NULL, },
{ 3, 3, "[-xy]", "ac-", }, { 3, 3, "[-xy]", "ac-", },
{ 2, 4, "[c[-xy]D", "ac-D+", }, { 2, 4, "c[-xy]D", "ac-D+", },
{ 2, 2, "[--/]", "a.b", }, { 2, 2, "[--/]", "a.b", },
{ 2, 4, "c[--/]D", "ac.D+b", }, { 2, 4, "c[--/]D", "ac.D+b", },
{ 2, 2, "[^-ac]", "abcde-", }, { 2, 2, "[^-ac]", "abcde-", },
@ -184,7 +184,7 @@
{ 3, 4, "[a-c][e-f]", "acbedf", }, { 3, 4, "[a-c][e-f]", "acbedf", },
{ 4, 8, "abc*XYZ", "890abXYZ#*", }, { 4, 8, "abc*XYZ", "890abXYZ#*", },
{ 4, 9, "abc*XYZ", "890abcXYZ#*", }, { 4, 9, "abc*XYZ", "890abcXYZ#*", },
{ 4, 15, "abc*XYZ", "890abccccccccXYZ#*", }, { 4, 15, "abc*XYZ", "890abcccccccXYZ#*", },
{ -1, -1, "abc*XYZ", "890abc*XYZ#*", }, { -1, -1, "abc*XYZ", "890abc*XYZ#*", },
{ 0, 0, "GA132", NULL, }, { 0, 0, "GA132", NULL, },
{ 2, 4, "\\(*bc\\)", "a*bc", }, { 2, 4, "\\(*bc\\)", "a*bc", },
@ -262,7 +262,7 @@
{ 1, 1, "^a", "abc", }, { 1, 1, "^a", "abc", },
{ -1, -1, "^b", "abc", }, { -1, -1, "^b", "abc", },
{ -1, -1, "^[a-zA-Z]", "99Nine", }, { -1, -1, "^[a-zA-Z]", "99Nine", },
{ 1, 4, "^[a-zA-Z]", "Nine99", }, { 1, 4, "^[a-zA-Z]*", "Nine99", },
{ 0, 0, "GA145(1)", NULL, }, { 0, 0, "GA145(1)", NULL, },
{ 1, 2, "\\(^a\\)\\1", "aabc", }, { 1, 2, "\\(^a\\)\\1", "aabc", },
{ -1, -1, "\\(^a\\)\\1", "^a^abc", }, { -1, -1, "\\(^a\\)\\1", "^a^abc", },
@ -274,7 +274,7 @@
{ 3, 3, "a$", "cba", }, { 3, 3, "a$", "cba", },
{ -1, -1, "a$", "abc", }, { -1, -1, "a$", "abc", },
{ 5, 7, "[a-z]*$", "99ZZxyz", }, { 5, 7, "[a-z]*$", "99ZZxyz", },
{ -1, -1, "[a-z]*$", "99ZZxyz99", }, { 9, 9, "[a-z]*$", "99ZZxyz99", },
{ 3, 3, "$$", "ab$", }, { 3, 3, "$$", "ab$", },
{ -1, -1, "$$", "$ab", }, { -1, -1, "$$", "$ab", },
{ 3, 3, "\\$$", "ab$", }, { 3, 3, "\\$$", "ab$", },

View File

@ -1570,7 +1570,8 @@ static boolean at_begline_loc_p _RE_ARGS ((const char *pattern, const char *p,
reg_syntax_t syntax)); reg_syntax_t syntax));
static boolean at_endline_loc_p _RE_ARGS ((const char *p, const char *pend, static boolean at_endline_loc_p _RE_ARGS ((const char *p, const char *pend,
reg_syntax_t syntax)); reg_syntax_t syntax));
static reg_errcode_t compile_range _RE_ARGS ((const char **p_ptr, static reg_errcode_t compile_range _RE_ARGS ((unsigned int range_start,
const char **p_ptr,
const char *pend, const char *pend,
char *translate, char *translate,
reg_syntax_t syntax, reg_syntax_t syntax,
@ -2174,6 +2175,7 @@ regex_compile (pattern, size, syntax, bufp)
case '[': case '[':
{ {
boolean had_char_class = false; boolean had_char_class = false;
unsigned int range_start = 0xffffffff;
if (p == pend) FREE_STACK_RETURN (REG_EBRACK); if (p == pend) FREE_STACK_RETURN (REG_EBRACK);
@ -2217,6 +2219,7 @@ regex_compile (pattern, size, syntax, bufp)
PATFETCH (c1); PATFETCH (c1);
SET_LIST_BIT (c1); SET_LIST_BIT (c1);
range_start = c1;
continue; continue;
} }
@ -2241,8 +2244,10 @@ regex_compile (pattern, size, syntax, bufp)
&& *p != ']') && *p != ']')
{ {
reg_errcode_t ret reg_errcode_t ret
= compile_range (&p, pend, translate, syntax, b); = compile_range (range_start, &p, pend, translate,
syntax, b);
if (ret != REG_NOERROR) FREE_STACK_RETURN (ret); if (ret != REG_NOERROR) FREE_STACK_RETURN (ret);
range_start = 0xffffffff;
} }
else if (p[0] == '-' && p[1] != ']') else if (p[0] == '-' && p[1] != ']')
@ -2252,8 +2257,9 @@ regex_compile (pattern, size, syntax, bufp)
/* Move past the `-'. */ /* Move past the `-'. */
PATFETCH (c1); PATFETCH (c1);
ret = compile_range (&p, pend, translate, syntax, b); ret = compile_range (c, &p, pend, translate, syntax, b);
if (ret != REG_NOERROR) FREE_STACK_RETURN (ret); if (ret != REG_NOERROR) FREE_STACK_RETURN (ret);
range_start = 0xffffffff;
} }
/* See if we're at the beginning of a possible character /* See if we're at the beginning of a possible character
@ -2376,6 +2382,7 @@ regex_compile (pattern, size, syntax, bufp)
PATUNFETCH; PATUNFETCH;
SET_LIST_BIT ('['); SET_LIST_BIT ('[');
SET_LIST_BIT (':'); SET_LIST_BIT (':');
range_start = ':';
had_char_class = false; had_char_class = false;
} }
} }
@ -2503,6 +2510,16 @@ regex_compile (pattern, size, syntax, bufp)
#endif #endif
had_char_class = true; had_char_class = true;
} }
else
{
c1++;
while (c1--)
PATUNFETCH;
SET_LIST_BIT ('[');
SET_LIST_BIT ('=');
range_start = '=';
had_char_class = false;
}
} }
else if (syntax & RE_CHAR_CLASSES && c == '[' && *p == '.') else if (syntax & RE_CHAR_CLASSES && c == '[' && *p == '.')
{ {
@ -2553,6 +2570,7 @@ regex_compile (pattern, size, syntax, bufp)
/* Set the bit for the character. */ /* Set the bit for the character. */
SET_LIST_BIT (str[0]); SET_LIST_BIT (str[0]);
range_start = ((const unsigned char *) str)[0];
} }
#ifdef _LIBC #ifdef _LIBC
else else
@ -2561,9 +2579,7 @@ regex_compile (pattern, size, syntax, bufp)
those known to the collate implementation. those known to the collate implementation.
First find out whether the bytes in `str' are First find out whether the bytes in `str' are
actually from exactly one character. */ actually from exactly one character. */
const unsigned char *weights;
int32_t table_size; int32_t table_size;
const int32_t *table;
const int32_t *symb_table; const int32_t *symb_table;
const unsigned char *extra; const unsigned char *extra;
int32_t idx; int32_t idx;
@ -2574,10 +2590,6 @@ regex_compile (pattern, size, syntax, bufp)
int32_t hash; int32_t hash;
int ch; int ch;
table = (const int32_t *)
_NL_CURRENT (LC_COLLATE, _NL_COLLATE_TABLEMB);
weights = (const unsigned char *)
_NL_CURRENT (LC_COLLATE, _NL_COLLATE_WEIGHTMB);
table_size = table_size =
_NL_CURRENT_WORD (LC_COLLATE, _NL_CURRENT_WORD (LC_COLLATE,
_NL_COLLATE_SYMB_HASH_SIZEMB); _NL_COLLATE_SYMB_HASH_SIZEMB);
@ -2598,17 +2610,15 @@ regex_compile (pattern, size, syntax, bufp)
{ {
/* First compare the hashing value. */ /* First compare the hashing value. */
if (symb_table[2 * elem] == hash if (symb_table[2 * elem] == hash
&& (c1 == extra[symb_table[2 * elem + 1] && c1 == extra[symb_table[2 * elem + 1]]
+ sizeof (int32_t)])
&& memcmp (str, && memcmp (str,
&extra[symb_table[2 * elem + 1] &extra[symb_table[2 * elem + 1]
+ sizeof (int32_t) + 1], + 1],
c1) == 0) c1) == 0)
{ {
/* Yep, this is the entry. */ /* Yep, this is the entry. */
idx = *((int32_t *) idx = symb_table[2 * elem + 1];
(extra idx += 1 + extra[idx];
+ symb_table[2 * elem + 1]));
break; break;
} }
@ -2624,40 +2634,21 @@ regex_compile (pattern, size, syntax, bufp)
class. */ class. */
PATFETCH (c); PATFETCH (c);
/* Now we have to go throught the whole table /* Now add the multibyte character(s) we found
and find all characters which have the same to the acceptabed list.
first level weight.
XXX Note that this is not entirely correct. XXX Note that this is not entirely correct.
we would have to match multibyte sequences we would have to match multibyte sequences
but this is not possible with the current but this is not possible with the current
implementation. */ implementation. Also, we have to match
for (ch = 1; ch < 256; ++ch) collating symbols, which expand to more than
/* XXX This test would have to be changed if we one file, as a whole and not allow the
would allow matching multibyte sequences. */ individual bytes. */
if (table[ch] > 0) c1 = extra[idx++];
{ if (c1 == 1)
int32_t idx2 = table[ch]; range_start = extra[idx];
size_t len = weights[idx2]; while (c1-- > 0)
SET_LIST_BIT (extra[idx++]);
/* Test whether the lenghts match. */
if (weights[idx] == len)
{
/* They do. New compare the bytes of
the weight. */
size_t cnt = 0;
while (cnt < len
&& (weights[idx + 1 + cnt]
== weights[idx2 + 1 + cnt]))
++len;
if (cnt == len)
/* They match. Mark the character as
acceptable. */
SET_LIST_BIT (ch);
}
}
} }
#endif #endif
had_char_class = false; had_char_class = false;
@ -2668,7 +2659,8 @@ regex_compile (pattern, size, syntax, bufp)
while (c1--) while (c1--)
PATUNFETCH; PATUNFETCH;
SET_LIST_BIT ('['); SET_LIST_BIT ('[');
SET_LIST_BIT ('='); SET_LIST_BIT ('.');
range_start = '.';
had_char_class = false; had_char_class = false;
} }
} }
@ -2676,6 +2668,7 @@ regex_compile (pattern, size, syntax, bufp)
{ {
had_char_class = false; had_char_class = false;
SET_LIST_BIT (c); SET_LIST_BIT (c);
range_start = c;
} }
} }
@ -3425,7 +3418,8 @@ group_in_compile_stack (compile_stack, regnum)
`regex_compile' itself. */ `regex_compile' itself. */
static reg_errcode_t static reg_errcode_t
compile_range (p_ptr, pend, translate, syntax, b) compile_range (range_start, p_ptr, pend, translate, syntax, b)
unsigned int range_start;
const char **p_ptr, *pend; const char **p_ptr, *pend;
RE_TRANSLATE_TYPE translate; RE_TRANSLATE_TYPE translate;
reg_syntax_t syntax; reg_syntax_t syntax;
@ -3434,7 +3428,7 @@ compile_range (p_ptr, pend, translate, syntax, b)
unsigned this_char; unsigned this_char;
const char *p = *p_ptr; const char *p = *p_ptr;
unsigned int range_start, range_end; unsigned int range_end;
if (p == pend) if (p == pend)
return REG_ERANGE; return REG_ERANGE;
@ -3447,7 +3441,6 @@ compile_range (p_ptr, pend, translate, syntax, b)
We also want to fetch the endpoints without translating them; the We also want to fetch the endpoints without translating them; the
appropriate translation is done in the bit-setting loop below. */ appropriate translation is done in the bit-setting loop below. */
/* The SVR4 compiler on the 3B2 had trouble with unsigned const char *. */ /* The SVR4 compiler on the 3B2 had trouble with unsigned const char *. */
range_start = ((const unsigned char *) p)[-2];
range_end = ((const unsigned char *) p)[0]; range_end = ((const unsigned char *) p)[0];
/* Have to increment the pointer into the pattern string, so the /* Have to increment the pointer into the pattern string, so the