regex: Unnest nested functions in regcomp.c

This refactor moves four functions out of a nested scope and converts
them into static always_inline functions. collseqwc, table_size,
symb_table, extra are now initialized to zero because they are passed as
function arguments.

On x86-64, .text is 16 byte larger likely due to the 4 stores.
This is nothing compared to the amount of work that regcomp has to do
looking up the collation weights, or other functions.

If the non-buildable `sysdeps/generic/dl-machine.h` doesn't count,
this patch removes the last `auto inline` usage from glibc.

Reviewed-by: Adhemerval Zanella  <adhemerval.zanella@linaro.org>
Reviewed-by: Carlos O'Donell <carlos@redhat.com>
This commit is contained in:
Fangrui Song 2021-11-02 10:07:59 -07:00
parent db432f033d
commit fdcd177fd3
1 changed files with 266 additions and 248 deletions

View File

@ -2831,29 +2831,17 @@ build_collating_symbol (bitset_t sbcset, const unsigned char *name)
}
#endif /* not _LIBC */
/* This function parse bracket expression like "[abc]", "[a-c]",
"[[.a-a.]]" etc. */
static bin_tree_t *
parse_bracket_exp (re_string_t *regexp, re_dfa_t *dfa, re_token_t *token,
reg_syntax_t syntax, reg_errcode_t *err)
{
#ifdef _LIBC
const unsigned char *collseqmb;
const char *collseqwc;
uint32_t nrules;
int32_t table_size;
const int32_t *symb_table;
const unsigned char *extra;
/* Local function for parse_bracket_exp used in _LIBC environment.
Seek the collating symbol entry corresponding to NAME.
Return the index of the symbol in the SYMB_TABLE,
or -1 if not found. */
auto inline int32_t
static inline int32_t
__attribute__ ((always_inline))
seek_collating_symbol_entry (const unsigned char *name, size_t name_len)
seek_collating_symbol_entry (const unsigned char *name, size_t name_len,
const int32_t *symb_table, int32_t table_size,
const unsigned char *extra)
{
int32_t elem;
@ -2877,15 +2865,17 @@ parse_bracket_exp (re_string_t *regexp, re_dfa_t *dfa, re_token_t *token,
Look up the collation sequence value of BR_ELEM.
Return the value if succeeded, UINT_MAX otherwise. */
auto inline unsigned int
static inline unsigned int
__attribute__ ((always_inline))
lookup_collation_sequence_value (bracket_elem_t *br_elem)
lookup_collation_sequence_value (bracket_elem_t *br_elem, uint32_t nrules,
const unsigned char *collseqmb,
const char *collseqwc, int32_t table_size,
const int32_t *symb_table,
const unsigned char *extra)
{
if (br_elem->type == SB_CHAR)
{
/*
if (MB_CUR_MAX == 1)
*/
/* if (MB_CUR_MAX == 1) */
if (nrules == 0)
return collseqmb[br_elem->opr.ch];
else
@ -2906,7 +2896,9 @@ parse_bracket_exp (re_string_t *regexp, re_dfa_t *dfa, re_token_t *token,
{
int32_t elem, idx;
elem = seek_collating_symbol_entry (br_elem->opr.name,
sym_name_len);
sym_name_len,
symb_table, table_size,
extra);
if (elem != -1)
{
/* We found the entry. */
@ -2945,10 +2937,14 @@ parse_bracket_exp (re_string_t *regexp, re_dfa_t *dfa, re_token_t *token,
mbcset->range_ends, is a pointer argument since we may
update it. */
auto inline reg_errcode_t
static inline reg_errcode_t
__attribute__ ((always_inline))
build_range_exp (bitset_t sbcset, re_charset_t *mbcset, int *range_alloc,
bracket_elem_t *start_elem, bracket_elem_t *end_elem)
bracket_elem_t *start_elem, bracket_elem_t *end_elem,
re_dfa_t *dfa, reg_syntax_t syntax, uint32_t nrules,
const unsigned char *collseqmb, const char *collseqwc,
int32_t table_size, const int32_t *symb_table,
const unsigned char *extra)
{
unsigned int ch;
uint32_t start_collseq;
@ -2963,8 +2959,10 @@ parse_bracket_exp (re_string_t *regexp, re_dfa_t *dfa, re_token_t *token,
return REG_ERANGE;
/* FIXME: Implement rational ranges here, too. */
start_collseq = lookup_collation_sequence_value (start_elem);
end_collseq = lookup_collation_sequence_value (end_elem);
start_collseq = lookup_collation_sequence_value (start_elem, nrules, collseqmb, collseqwc,
table_size, symb_table, extra);
end_collseq = lookup_collation_sequence_value (end_elem, nrules, collseqmb, collseqwc,
table_size, symb_table, extra);
/* Check start/end collation sequence values. */
if (__glibc_unlikely (start_collseq == UINT_MAX
|| end_collseq == UINT_MAX))
@ -2985,7 +2983,7 @@ parse_bracket_exp (re_string_t *regexp, re_dfa_t *dfa, re_token_t *token,
/* There is not enough space, need realloc. */
uint32_t *new_array_start;
uint32_t *new_array_end;
Idx new_nranges;
int new_nranges;
/* +1 in case of mbcset->nranges is 0. */
new_nranges = 2 * mbcset->nranges + 1;
@ -3011,9 +3009,7 @@ parse_bracket_exp (re_string_t *regexp, re_dfa_t *dfa, re_token_t *token,
for (ch = 0; ch < SBC_MAX; ch++)
{
uint32_t ch_collseq;
/*
if (MB_CUR_MAX == 1)
*/
/* if (MB_CUR_MAX == 1) */
if (nrules == 0)
ch_collseq = collseqmb[ch];
else
@ -3030,16 +3026,19 @@ parse_bracket_exp (re_string_t *regexp, re_dfa_t *dfa, re_token_t *token,
COLL_SYM_ALLOC is the allocated size of mbcset->coll_sym, is a
pointer argument since we may update it. */
auto inline reg_errcode_t
static inline reg_errcode_t
__attribute__ ((always_inline))
build_collating_symbol (bitset_t sbcset, re_charset_t *mbcset,
Idx *coll_sym_alloc, const unsigned char *name)
int *coll_sym_alloc, const unsigned char *name,
uint32_t nrules, int32_t table_size,
const int32_t *symb_table, const unsigned char *extra)
{
int32_t elem, idx;
size_t name_len = strlen ((const char *) name);
if (nrules != 0)
{
elem = seek_collating_symbol_entry (name, name_len);
elem = seek_collating_symbol_entry (name, name_len, symb_table,
table_size, extra);
if (elem != -1)
{
/* We found the entry. */
@ -3063,7 +3062,7 @@ parse_bracket_exp (re_string_t *regexp, re_dfa_t *dfa, re_token_t *token,
{
/* Not enough, realloc it. */
/* +1 in case of mbcset->ncoll_syms is 0. */
Idx new_coll_sym_alloc = 2 * mbcset->ncoll_syms + 1;
int new_coll_sym_alloc = 2 * mbcset->ncoll_syms + 1;
/* Use realloc since mbcset->coll_syms is NULL
if *alloc == 0. */
int32_t *new_coll_syms = re_realloc (mbcset->coll_syms, int32_t,
@ -3087,6 +3086,22 @@ parse_bracket_exp (re_string_t *regexp, re_dfa_t *dfa, re_token_t *token,
}
}
}
#endif /* _LIBC */
/* This function parse bracket expression like "[abc]", "[a-c]",
"[[.a-a.]]" etc. */
static bin_tree_t *
parse_bracket_exp (re_string_t *regexp, re_dfa_t *dfa, re_token_t *token,
reg_syntax_t syntax, reg_errcode_t *err)
{
#ifdef _LIBC
const unsigned char *collseqmb;
const char *collseqwc = NULL;
uint32_t nrules;
int32_t table_size = 0;
const int32_t *symb_table = NULL;
const unsigned char *extra = NULL;
#endif
re_token_t br_token;
@ -3230,7 +3245,9 @@ parse_bracket_exp (re_string_t *regexp, re_dfa_t *dfa, re_token_t *token,
#ifdef _LIBC
*err = build_range_exp (sbcset, mbcset, &range_alloc,
&start_elem, &end_elem);
&start_elem, &end_elem,
dfa, syntax, nrules, collseqmb, collseqwc,
table_size, symb_table, extra);
#else
# ifdef RE_ENABLE_I18N
*err = build_range_exp (syntax, sbcset,
@ -3283,7 +3300,8 @@ parse_bracket_exp (re_string_t *regexp, re_dfa_t *dfa, re_token_t *token,
#ifdef RE_ENABLE_I18N
mbcset, &coll_sym_alloc,
#endif /* RE_ENABLE_I18N */
start_elem.opr.name);
start_elem.opr.name,
nrules, table_size, symb_table, extra);
if (__glibc_unlikely (*err != REG_NOERROR))
goto parse_bracket_exp_free_return;
break;