Browse Source

new regex: stop confusing ourself with _LIBC being undefined;
nuke one struct initializer which was invisibly becoming a memset -
this improves speed by x2:

test 0 pattern 0 '.?.?.?.?.?.?.?Log\.13'
- 0.249795s
+ 0.133522s
test 0 pattern 1 '(.?)(.?)(.?)(.?)(.?)(.?)(.?)Log\.13'
- 0.360115s
+ 0.191959s

text data bss dec hex filename
- 515009 2731 15396 533136 82290 lib/libuClibc-0.9.30-svn.so
+ 514961 2727 15396 533084 8225c lib/libuClibc-0.9.30-svn.so

Denis Vlasenko 16 years ago
parent
commit
1c8c83e23b

+ 33 - 42
libc/misc/regex/regcomp.c

@@ -344,7 +344,7 @@ re_compile_fastmap_iter (regex_t *bufp, const re_dfastate_t *init_state,
 	  if (cset->non_match || cset->ncoll_syms || cset->nequiv_classes
 	  if (cset->non_match || cset->ncoll_syms || cset->nequiv_classes
 	      || cset->nranges || cset->nchar_classes)
 	      || cset->nranges || cset->nchar_classes)
 	    {
 	    {
-# ifdef _LIBC
+# if 0
 	      if (_NL_CURRENT_WORD (LC_COLLATE, _NL_COLLATE_NRULES) != 0)
 	      if (_NL_CURRENT_WORD (LC_COLLATE, _NL_COLLATE_NRULES) != 0)
 		{
 		{
 		  /* In this case we want to catch the bytes which are
 		  /* In this case we want to catch the bytes which are
@@ -364,7 +364,7 @@ re_compile_fastmap_iter (regex_t *bufp, const re_dfastate_t *init_state,
 		for (i = 0; i < SBC_MAX; ++i)
 		for (i = 0; i < SBC_MAX; ++i)
 		  if (__btowc (i) == WEOF)
 		  if (__btowc (i) == WEOF)
 		    re_set_fastmap (fastmap, icase, i);
 		    re_set_fastmap (fastmap, icase, i);
-# endif /* not _LIBC */
+# endif
 	    }
 	    }
 	  for (i = 0; i < cset->nmbchars; ++i)
 	  for (i = 0; i < cset->nmbchars; ++i)
 	    {
 	    {
@@ -610,18 +610,16 @@ libc_hidden_def(regfree)
 /* Entry points compatible with 4.2 BSD regex library.  We don't define
 /* Entry points compatible with 4.2 BSD regex library.  We don't define
    them unless specifically requested.  */
    them unless specifically requested.  */
 
 
-#if defined _REGEX_RE_COMP || defined _LIBC || defined __UCLIBC__
+#if defined _REGEX_RE_COMP || defined __UCLIBC__
 
 
 /* BSD has one and only one pattern buffer.  */
 /* BSD has one and only one pattern buffer.  */
 static struct re_pattern_buffer *re_comp_buf;
 static struct re_pattern_buffer *re_comp_buf;
 
 
 char *
 char *
-# if defined _LIBC || defined __UCLIBC__
+/* Make BCD definitions weak in libc, so POSIX programs can redefine
-/* Make these definitions weak in libc, so POSIX programs can redefine
    these names if they don't use our functions, and still use
    these names if they don't use our functions, and still use
    regcomp/regexec above without link errors.  */
    regcomp/regexec above without link errors.  */
 weak_function
 weak_function
-# endif
 re_comp (const char *s)
 re_comp (const char *s)
 {
 {
   reg_errcode_t ret;
   reg_errcode_t ret;
@@ -679,7 +677,7 @@ re_comp (const char *s)
   return (char *) gettext (__re_error_msgid + __re_error_msgid_idx[(int) ret]);
   return (char *) gettext (__re_error_msgid + __re_error_msgid_idx[(int) ret]);
 }
 }
 
 
-#ifdef _LIBC
+#if 0
 libc_freeres_fn (free_mem)
 libc_freeres_fn (free_mem)
 {
 {
   regfree (re_comp_buf);
   regfree (re_comp_buf);
@@ -801,7 +799,7 @@ static reg_errcode_t
 init_dfa (re_dfa_t *dfa, size_t pat_len)
 init_dfa (re_dfa_t *dfa, size_t pat_len)
 {
 {
   unsigned int table_size;
   unsigned int table_size;
-#ifndef _LIBC
+#if 1
   char *codeset_name;
   char *codeset_name;
 #endif
 #endif
 
 
@@ -830,7 +828,7 @@ init_dfa (re_dfa_t *dfa, size_t pat_len)
 #else
 #else
   dfa->mb_cur_max = 1;
   dfa->mb_cur_max = 1;
 #endif
 #endif
-#ifdef _LIBC
+#if 0
   if (dfa->mb_cur_max == 6
   if (dfa->mb_cur_max == 6
       && strcmp (_NL_CURRENT (LC_CTYPE, _NL_CTYPE_CODESET_NAME), "UTF-8") == 0)
       && strcmp (_NL_CURRENT (LC_CTYPE, _NL_CTYPE_CODESET_NAME), "UTF-8") == 0)
     dfa->is_utf8 = 1;
     dfa->is_utf8 = 1;
@@ -880,7 +878,7 @@ init_dfa (re_dfa_t *dfa, size_t pat_len)
 		wint_t wch = __btowc (ch);
 		wint_t wch = __btowc (ch);
 		if (wch != WEOF)
 		if (wch != WEOF)
 		  dfa->sb_char[i] |= (bitset_word_t) 1 << j;
 		  dfa->sb_char[i] |= (bitset_word_t) 1 << j;
-# ifndef _LIBC
+# if 1
 		if (isascii (ch) && wch != ch)
 		if (isascii (ch) && wch != ch)
 		  dfa->map_notascii = 1;
 		  dfa->map_notascii = 1;
 # endif
 # endif
@@ -2541,8 +2539,8 @@ parse_dup_op (bin_tree_t *elem, re_string_t *regexp, re_dfa_t *dfa,
    I'm not sure, but maybe enough.  */
    I'm not sure, but maybe enough.  */
 #define BRACKET_NAME_BUF_SIZE 32
 #define BRACKET_NAME_BUF_SIZE 32
 
 
-#ifndef _LIBC
+#if 1
-  /* Local function for parse_bracket_exp only used in case of NOT _LIBC.
+  /* Local function for parse_bracket_exp only used in case of NOT glibc.
      Build the range expression which starts from START_ELEM, and ends
      Build the range expression which starts from START_ELEM, and ends
      at END_ELEM.  The result are written to MBCSET and SBCSET.
      at END_ELEM.  The result are written to MBCSET and SBCSET.
      RANGE_ALLOC is the allocated size of mbcset->range_starts, and
      RANGE_ALLOC is the allocated size of mbcset->range_starts, and
@@ -2599,7 +2597,7 @@ build_range_exp (bitset_t sbcset, bracket_elem_t *start_elem,
       return REG_ERANGE;
       return REG_ERANGE;
 
 
     /* Got valid collation sequence values, add them as a new entry.
     /* Got valid collation sequence values, add them as a new entry.
-       However, for !_LIBC we have no collation elements: if the
+       However, for !glibc we have no collation elements: if the
        character set is single byte, the single byte character set
        character set is single byte, the single byte character set
        that we build below suffices.  parse_bracket_exp passes
        that we build below suffices.  parse_bracket_exp passes
        no MBCSET if dfa->mb_cur_max == 1.  */
        no MBCSET if dfa->mb_cur_max == 1.  */
@@ -2661,10 +2659,10 @@ build_range_exp (bitset_t sbcset, bracket_elem_t *start_elem,
 # endif /* not RE_ENABLE_I18N */
 # endif /* not RE_ENABLE_I18N */
   return REG_NOERROR;
   return REG_NOERROR;
 }
 }
-#endif /* not _LIBC */
+#endif
 
 
-#ifndef _LIBC
+#if 1
-/* Helper function for parse_bracket_exp only used in case of NOT _LIBC..
+/* Helper function for parse_bracket_exp only used in case of NOT glibc.
    Build the collating element which is represented by NAME.
    Build the collating element which is represented by NAME.
    The result are written to MBCSET and SBCSET.
    The result are written to MBCSET and SBCSET.
    COLL_SYM_ALLOC is the allocated size of mbcset->coll_sym, is a
    COLL_SYM_ALLOC is the allocated size of mbcset->coll_sym, is a
@@ -2682,13 +2680,10 @@ build_collating_symbol (bitset_t sbcset, const unsigned char *name)
   size_t name_len = strlen ((const char *) name);
   size_t name_len = strlen ((const char *) name);
   if (BE (name_len != 1, 0))
   if (BE (name_len != 1, 0))
     return REG_ECOLLATE;
     return REG_ECOLLATE;
-  else
+  bitset_set (sbcset, name[0]);
-    {
+  return REG_NOERROR;
-      bitset_set (sbcset, name[0]);
-      return REG_NOERROR;
-    }
 }
 }
-#endif /* not _LIBC */
+#endif
 
 
 /* This function parse bracket expression like "[abc]", "[a-c]",
 /* This function parse bracket expression like "[abc]", "[a-c]",
    "[[.a-a.]]" etc.  */
    "[[.a-a.]]" etc.  */
@@ -2697,7 +2692,7 @@ static bin_tree_t *
 parse_bracket_exp (re_string_t *regexp, re_dfa_t *dfa, re_token_t *token,
 parse_bracket_exp (re_string_t *regexp, re_dfa_t *dfa, re_token_t *token,
 		   reg_syntax_t syntax, reg_errcode_t *err)
 		   reg_syntax_t syntax, reg_errcode_t *err)
 {
 {
-#ifdef _LIBC
+#if 0
   const unsigned char *collseqmb;
   const unsigned char *collseqmb;
   const char *collseqwc;
   const char *collseqwc;
   uint32_t nrules;
   uint32_t nrules;
@@ -2705,15 +2700,13 @@ parse_bracket_exp (re_string_t *regexp, re_dfa_t *dfa, re_token_t *token,
   const int32_t *symb_table;
   const int32_t *symb_table;
   const unsigned char *extra;
   const unsigned char *extra;
 
 
-  /* Local function for parse_bracket_exp used in _LIBC environement.
+  /* Local function for parse_bracket_exp used in glibc.
      Seek the collating symbol entry correspondings to NAME.
      Seek the collating symbol entry correspondings to NAME.
      Return the index of the symbol in the SYMB_TABLE.  */
      Return the index of the symbol in the SYMB_TABLE.  */
 
 
   auto __inline__ int32_t
   auto __inline__ int32_t
   __attribute ((always_inline))
   __attribute ((always_inline))
-  seek_collating_symbol_entry (name, name_len)
+  seek_collating_symbol_entry (const unsigned char *name, size_t name_len)
-	 const unsigned char *name;
-	 size_t name_len;
     {
     {
       int32_t hash = elem_hash ((const char *) name, name_len);
       int32_t hash = elem_hash ((const char *) name, name_len);
       int32_t elem = hash % table_size;
       int32_t elem = hash % table_size;
@@ -2743,14 +2736,13 @@ parse_bracket_exp (re_string_t *regexp, re_dfa_t *dfa, re_token_t *token,
       return elem;
       return elem;
     }
     }
 
 
-  /* Local function for parse_bracket_exp used in _LIBC environement.
+  /* Local function for parse_bracket_exp used in glibc.
      Look up the collation sequence value of BR_ELEM.
      Look up the collation sequence value of BR_ELEM.
      Return the value if succeeded, UINT_MAX otherwise.  */
      Return the value if succeeded, UINT_MAX otherwise.  */
 
 
   auto __inline__ unsigned int
   auto __inline__ unsigned int
   __attribute ((always_inline))
   __attribute ((always_inline))
-  lookup_collation_sequence_value (br_elem)
+  lookup_collation_sequence_value (bracket_elem_t *br_elem)
-	 bracket_elem_t *br_elem;
     {
     {
       if (br_elem->type == SB_CHAR)
       if (br_elem->type == SB_CHAR)
 	{
 	{
@@ -2808,7 +2800,7 @@ parse_bracket_exp (re_string_t *regexp, re_dfa_t *dfa, re_token_t *token,
       return UINT_MAX;
       return UINT_MAX;
     }
     }
 
 
-  /* Local function for parse_bracket_exp used in _LIBC environement.
+  /* Local function for parse_bracket_exp used in glibc.
      Build the range expression which starts from START_ELEM, and ends
      Build the range expression which starts from START_ELEM, and ends
      at END_ELEM.  The result are written to MBCSET and SBCSET.
      at END_ELEM.  The result are written to MBCSET and SBCSET.
      RANGE_ALLOC is the allocated size of mbcset->range_starts, and
      RANGE_ALLOC is the allocated size of mbcset->range_starts, and
@@ -2892,7 +2884,7 @@ parse_bracket_exp (re_string_t *regexp, re_dfa_t *dfa, re_token_t *token,
       return REG_NOERROR;
       return REG_NOERROR;
     }
     }
 
 
-  /* Local function for parse_bracket_exp used in _LIBC environement.
+  /* Local function for parse_bracket_exp used in glibc.
      Build the collating element which is represented by NAME.
      Build the collating element which is represented by NAME.
      The result are written to MBCSET and SBCSET.
      The result are written to MBCSET and SBCSET.
      COLL_SYM_ALLOC is the allocated size of mbcset->coll_sym, is a
      COLL_SYM_ALLOC is the allocated size of mbcset->coll_sym, is a
@@ -2900,11 +2892,10 @@ parse_bracket_exp (re_string_t *regexp, re_dfa_t *dfa, re_token_t *token,
 
 
   auto __inline__ reg_errcode_t
   auto __inline__ reg_errcode_t
   __attribute ((always_inline))
   __attribute ((always_inline))
-  build_collating_symbol (sbcset, mbcset, coll_sym_alloc, name)
+  build_collating_symbol (re_charset_t *mbcset,
-	 re_charset_t *mbcset;
+		int *coll_sym_alloc,
-	 int *coll_sym_alloc;
+		bitset_t sbcset,
-	 bitset_t sbcset;
+		const unsigned char *name)
-	 const unsigned char *name;
     {
     {
       int32_t elem, idx;
       int32_t elem, idx;
       size_t name_len = strlen ((const char *) name);
       size_t name_len = strlen ((const char *) name);
@@ -2971,7 +2962,7 @@ parse_bracket_exp (re_string_t *regexp, re_dfa_t *dfa, re_token_t *token,
   bin_tree_t *work_tree;
   bin_tree_t *work_tree;
   int token_len;
   int token_len;
   int first_round = 1;
   int first_round = 1;
-#ifdef _LIBC
+#if 0
   collseqmb = (const unsigned char *)
   collseqmb = (const unsigned char *)
     _NL_CURRENT (LC_COLLATE, _NL_COLLATE_COLLSEQMB);
     _NL_CURRENT (LC_COLLATE, _NL_COLLATE_COLLSEQMB);
   nrules = _NL_CURRENT_WORD (LC_COLLATE, _NL_COLLATE_NRULES);
   nrules = _NL_CURRENT_WORD (LC_COLLATE, _NL_COLLATE_NRULES);
@@ -3092,7 +3083,7 @@ parse_bracket_exp (re_string_t *regexp, re_dfa_t *dfa, re_token_t *token,
 
 
 	  token_len = peek_token_bracket (token, regexp, syntax);
 	  token_len = peek_token_bracket (token, regexp, syntax);
 
 
-#ifdef _LIBC
+#if 0
 	  *err = build_range_exp (sbcset, mbcset, &range_alloc,
 	  *err = build_range_exp (sbcset, mbcset, &range_alloc,
 				  &start_elem, &end_elem);
 				  &start_elem, &end_elem);
 #else
 #else
@@ -3345,7 +3336,7 @@ build_equiv_class (bitset_t sbcset, re_charset_t *mbcset,
 build_equiv_class (bitset_t sbcset, const unsigned char *name)
 build_equiv_class (bitset_t sbcset, const unsigned char *name)
 #endif
 #endif
 {
 {
-#ifdef _LIBC
+#if 0
   uint32_t nrules = _NL_CURRENT_WORD (LC_COLLATE, _NL_COLLATE_NRULES);
   uint32_t nrules = _NL_CURRENT_WORD (LC_COLLATE, _NL_COLLATE_NRULES);
   if (nrules != 0)
   if (nrules != 0)
     {
     {
@@ -3414,7 +3405,7 @@ build_equiv_class (bitset_t sbcset, const unsigned char *name)
       mbcset->equiv_classes[mbcset->nequiv_classes++] = idx1;
       mbcset->equiv_classes[mbcset->nequiv_classes++] = idx1;
     }
     }
   else
   else
-#endif /* _LIBC */
+#endif
     {
     {
       if (BE (strlen ((const char *) name) != 1, 0))
       if (BE (strlen ((const char *) name) != 1, 0))
 	return REG_ECOLLATE;
 	return REG_ECOLLATE;
@@ -3654,7 +3645,7 @@ static void
 free_charset (re_charset_t *cset)
 free_charset (re_charset_t *cset)
 {
 {
   re_free (cset->mbchars);
   re_free (cset->mbchars);
-# ifdef _LIBC
+# if 0
   re_free (cset->coll_syms);
   re_free (cset->coll_syms);
   re_free (cset->equiv_classes);
   re_free (cset->equiv_classes);
   re_free (cset->range_starts);
   re_free (cset->range_starts);

+ 0 - 3
libc/misc/regex/regex.c

@@ -21,9 +21,6 @@
 #include <features.h>
 #include <features.h>
 
 
 #ifdef __UCLIBC__
 #ifdef __UCLIBC__
-/* TODO: having _LIBC *off* is very confusing - the rest of uclibc
- * has it *on*. Fix it. */
-# undef _LIBC
 # define _REGEX_RE_COMP
 # define _REGEX_RE_COMP
 # define HAVE_LANGINFO
 # define HAVE_LANGINFO
 # define HAVE_LANGINFO_CODESET
 # define HAVE_LANGINFO_CODESET

+ 16 - 20
libc/misc/regex/regex_internal.c

@@ -109,7 +109,7 @@ re_string_construct (re_string_t *pstr, const char *str, int len,
       if (dfa->mb_cur_max > 1)
       if (dfa->mb_cur_max > 1)
 	build_wcs_buffer (pstr);
 	build_wcs_buffer (pstr);
       else
       else
-#endif /* RE_ENABLE_I18N  */
+#endif
 	{
 	{
 	  if (trans != NULL)
 	  if (trans != NULL)
 	    re_string_translate_buffer (pstr);
 	    re_string_translate_buffer (pstr);
@@ -195,7 +195,7 @@ static void
 internal_function
 internal_function
 build_wcs_buffer (re_string_t *pstr)
 build_wcs_buffer (re_string_t *pstr)
 {
 {
-#if defined _LIBC || defined __UCLIBC__
+#if defined __UCLIBC__
   unsigned char buf[MB_LEN_MAX];
   unsigned char buf[MB_LEN_MAX];
   assert (MB_LEN_MAX >= pstr->mb_cur_max);
   assert (MB_LEN_MAX >= pstr->mb_cur_max);
 #else
 #else
@@ -266,7 +266,7 @@ build_wcs_upper_buffer (re_string_t *pstr)
   mbstate_t prev_st;
   mbstate_t prev_st;
   int src_idx, byte_idx, end_idx, remain_len;
   int src_idx, byte_idx, end_idx, remain_len;
   size_t mbclen;
   size_t mbclen;
-#if defined _LIBC || defined __UCLIBC__
+#if defined __UCLIBC__
   char buf[MB_LEN_MAX];
   char buf[MB_LEN_MAX];
   assert (MB_LEN_MAX >= pstr->mb_cur_max);
   assert (MB_LEN_MAX >= pstr->mb_cur_max);
 #else
 #else
@@ -565,7 +565,7 @@ re_string_reconstruct (re_string_t *pstr, int idx, int eflags)
 #ifdef RE_ENABLE_I18N
 #ifdef RE_ENABLE_I18N
       if (pstr->mb_cur_max > 1)
       if (pstr->mb_cur_max > 1)
 	memset (&pstr->cur_state, '\0', sizeof (mbstate_t));
 	memset (&pstr->cur_state, '\0', sizeof (mbstate_t));
-#endif /* RE_ENABLE_I18N */
+#endif
       pstr->len = pstr->raw_len;
       pstr->len = pstr->raw_len;
       pstr->stop = pstr->raw_stop;
       pstr->stop = pstr->raw_stop;
       pstr->valid_len = 0;
       pstr->valid_len = 0;
@@ -596,7 +596,7 @@ re_string_reconstruct (re_string_t *pstr, int idx, int eflags)
 	  if (pstr->mb_cur_max > 1)
 	  if (pstr->mb_cur_max > 1)
 	    memmove (pstr->wcs, pstr->wcs + offset,
 	    memmove (pstr->wcs, pstr->wcs + offset,
 		     (pstr->valid_len - offset) * sizeof (wint_t));
 		     (pstr->valid_len - offset) * sizeof (wint_t));
-#endif /* RE_ENABLE_I18N */
+#endif
 	  if (BE (pstr->mbs_allocated, 0))
 	  if (BE (pstr->mbs_allocated, 0))
 	    memmove (pstr->mbs, pstr->mbs + offset,
 	    memmove (pstr->mbs, pstr->mbs + offset,
 		     pstr->valid_len - offset);
 		     pstr->valid_len - offset);
@@ -634,7 +634,7 @@ re_string_reconstruct (re_string_t *pstr, int idx, int eflags)
 		  raw = pstr->raw_mbs + pstr->raw_mbs_idx;
 		  raw = pstr->raw_mbs + pstr->raw_mbs_idx;
 		  end = raw + (offset - pstr->mb_cur_max);
 		  end = raw + (offset - pstr->mb_cur_max);
 		  p = raw + offset - 1;
 		  p = raw + offset - 1;
-#ifdef _LIBC
+#if 0
 		  /* We know the wchar_t encoding is UCS4, so for the simple
 		  /* We know the wchar_t encoding is UCS4, so for the simple
 		     case, ASCII characters, skip the conversion step.  */
 		     case, ASCII characters, skip the conversion step.  */
 		  if (isascii (*p) && BE (pstr->trans == NULL, 1))
 		  if (isascii (*p) && BE (pstr->trans == NULL, 1))
@@ -729,7 +729,7 @@ re_string_reconstruct (re_string_t *pstr, int idx, int eflags)
 	build_wcs_buffer (pstr);
 	build_wcs_buffer (pstr);
     }
     }
   else
   else
-#endif /* RE_ENABLE_I18N */
+#endif
     if (BE (pstr->mbs_allocated, 0))
     if (BE (pstr->mbs_allocated, 0))
       {
       {
 	if (pstr->icase)
 	if (pstr->icase)
@@ -864,14 +864,11 @@ re_string_context_at (const re_string_t *input, int idx, int eflags)
       return (IS_WIDE_NEWLINE (wc) && input->newline_anchor
       return (IS_WIDE_NEWLINE (wc) && input->newline_anchor
 	      ? CONTEXT_NEWLINE : 0);
 	      ? CONTEXT_NEWLINE : 0);
     }
     }
-  else
 #endif
 #endif
-    {
+  c = re_string_byte_at (input, idx);
-      c = re_string_byte_at (input, idx);
+  if (bitset_contain (input->word_char, c))
-      if (bitset_contain (input->word_char, c))
+    return CONTEXT_WORD;
-	return CONTEXT_WORD;
+  return IS_NEWLINE (c) && input->newline_anchor ? CONTEXT_NEWLINE : 0;
-      return IS_NEWLINE (c) && input->newline_anchor ? CONTEXT_NEWLINE : 0;
-    }
 }
 }
 
 
 /* Functions for set operation.  */
 /* Functions for set operation.  */
@@ -1068,10 +1065,9 @@ re_node_set_init_union (re_node_set *dest, const re_node_set *src1,
     {
     {
       if (src1 != NULL && src1->nelem > 0)
       if (src1 != NULL && src1->nelem > 0)
 	return re_node_set_init_copy (dest, src1);
 	return re_node_set_init_copy (dest, src1);
-      else if (src2 != NULL && src2->nelem > 0)
+      if (src2 != NULL && src2->nelem > 0)
 	return re_node_set_init_copy (dest, src2);
 	return re_node_set_init_copy (dest, src2);
-      else
+      re_node_set_init_empty (dest);
-	re_node_set_init_empty (dest);
       return REG_NOERROR;
       return REG_NOERROR;
     }
     }
   for (i1 = i2 = id = 0 ; i1 < src1->nelem && i2 < src2->nelem ;)
   for (i1 = i2 = id = 0 ; i1 < src1->nelem && i2 < src2->nelem ;)
@@ -1197,8 +1193,7 @@ re_node_set_insert (re_node_set *set, int elem)
     {
     {
       if (BE (re_node_set_init_1 (set, elem) == REG_NOERROR, 1))
       if (BE (re_node_set_init_1 (set, elem) == REG_NOERROR, 1))
 	return 1;
 	return 1;
-      else
+      return -1;
-	return -1;
     }
     }
 
 
   if (BE (set->nelem, 0) == 0)
   if (BE (set->nelem, 0) == 0)
@@ -1544,11 +1539,12 @@ create_ci_newstate (const re_dfa_t *dfa, const re_node_set *nodes,
     {
     {
       re_token_t *node = dfa->nodes + nodes->elems[i];
       re_token_t *node = dfa->nodes + nodes->elems[i];
       re_token_type_t type = node->type;
       re_token_type_t type = node->type;
+
       if (type == CHARACTER && !node->constraint)
       if (type == CHARACTER && !node->constraint)
 	continue;
 	continue;
 #ifdef RE_ENABLE_I18N
 #ifdef RE_ENABLE_I18N
       newstate->accept_mb |= node->accept_mb;
       newstate->accept_mb |= node->accept_mb;
-#endif /* RE_ENABLE_I18N */
+#endif
 
 
       /* If the state has the halt node, the state is a halt state.  */
       /* If the state has the halt node, the state is a halt state.  */
       if (type == END_OF_RE)
       if (type == END_OF_RE)

+ 23 - 66
libc/misc/regex/regex_internal.h

@@ -27,66 +27,36 @@
 #include <stdlib.h>
 #include <stdlib.h>
 #include <string.h>
 #include <string.h>
 
 
-#if defined HAVE_LANGINFO_H || defined HAVE_LANGINFO_CODESET || defined _LIBC
+#if defined HAVE_LANGINFO_H || defined HAVE_LANGINFO_CODESET
 # include <langinfo.h>
 # include <langinfo.h>
 #endif
 #endif
-#if defined HAVE_LOCALE_H || defined _LIBC
+#if defined HAVE_LOCALE_H
 # include <locale.h>
 # include <locale.h>
 #endif
 #endif
-#if defined HAVE_WCHAR_H || defined _LIBC
+#if defined HAVE_WCHAR_H
 # include <wchar.h>
 # include <wchar.h>
 #endif
 #endif
-#if defined HAVE_WCTYPE_H || defined _LIBC
+#if defined HAVE_WCTYPE_H
 # include <wctype.h>
 # include <wctype.h>
 #endif
 #endif
-#if defined HAVE_STDBOOL_H || defined _LIBC
+#if defined HAVE_STDBOOL_H
 # include <stdbool.h>
 # include <stdbool.h>
 #endif
 #endif
-#if defined HAVE_STDINT_H || defined _LIBC
+#if defined HAVE_STDINT_H
 # include <stdint.h>
 # include <stdint.h>
 #endif
 #endif
-#if defined _LIBC
-# include <bits/libc-lock.h>
-#else
-# define __libc_lock_define(CLASS,NAME)
-# define __libc_lock_init(NAME) do { } while (0)
-# define __libc_lock_lock(NAME) do { } while (0)
-# define __libc_lock_unlock(NAME) do { } while (0)
-#endif
 
 
-/* In case that the system doesn't have isblank().  */
+#define __libc_lock_define(CLASS, NAME)
-#if !defined _LIBC && !defined HAVE_ISBLANK && !defined isblank && !defined __UCLIBC__
+#define __libc_lock_init(NAME)   do { } while (0)
-# define isblank(ch) ((ch) == ' ' || (ch) == '\t')
+#define __libc_lock_lock(NAME)   do { } while (0)
-#endif
+#define __libc_lock_unlock(NAME) do { } while (0)
 
 
-#if defined _LIBC && !defined __UCLIBC__
+#undef gettext
-# ifndef _RE_DEFINE_LOCALE_FUNCTIONS
+#undef gettext_noop
-#  define _RE_DEFINE_LOCALE_FUNCTIONS 1
+#define gettext(msgid)       (msgid)
-#   include <locale/localeinfo.h>
+#define gettext_noop(String) String
-#   include <locale/elem-hash.h>
-#   include <locale/coll-lookup.h>
-# endif
-#endif
-
-/* This is for other GNU distributions with internationalized messages.  */
-#if (HAVE_LIBINTL_H && ENABLE_NLS) || defined _LIBC
-# include <libintl.h>
-# ifdef _LIBC
-#  undef gettext
-#  define gettext(msgid) \
-  INTUSE(__dcgettext) (_libc_intl_domainname, msgid, LC_MESSAGES)
-# endif
-#else
-# define gettext(msgid) (msgid)
-#endif
 
 
-#ifndef gettext_noop
+#if (defined MB_CUR_MAX && HAVE_LOCALE_H && HAVE_WCTYPE_H && HAVE_WCHAR_H && HAVE_WCRTOMB && HAVE_MBRTOWC && HAVE_WCSCOLL)
-/* This define is so xgettext can find the internationalizable
-   strings.  */
-# define gettext_noop(String) String
-#endif
-
-#if (defined MB_CUR_MAX && HAVE_LOCALE_H && HAVE_WCTYPE_H && HAVE_WCHAR_H && HAVE_WCRTOMB && HAVE_MBRTOWC && HAVE_WCSCOLL) || _LIBC
 # define RE_ENABLE_I18N
 # define RE_ENABLE_I18N
 #endif
 #endif
 
 
@@ -108,15 +78,6 @@
 #define NEWLINE_CHAR '\n'
 #define NEWLINE_CHAR '\n'
 #define WIDE_NEWLINE_CHAR L'\n'
 #define WIDE_NEWLINE_CHAR L'\n'
 
 
-/* Rename to standard API for using out of glibc.  */
-#if !defined _LIBC && !defined __UCLIBC__
-# define __wctype wctype
-# define __iswctype iswctype
-# define __btowc btowc
-# define __wcrtomb wcrtomb
-# define attribute_hidden
-#endif /* not _LIBC */
-
 #ifdef __GNUC__
 #ifdef __GNUC__
 # define __attribute(arg) __attribute__ (arg)
 # define __attribute(arg) __attribute__ (arg)
 #else
 #else
@@ -236,23 +197,23 @@ typedef struct
   wchar_t *mbchars;
   wchar_t *mbchars;
 
 
   /* Collating symbols.  */
   /* Collating symbols.  */
-# ifdef _LIBC
+# if 0
   int32_t *coll_syms;
   int32_t *coll_syms;
 # endif
 # endif
 
 
   /* Equivalence classes. */
   /* Equivalence classes. */
-# ifdef _LIBC
+# if 0
   int32_t *equiv_classes;
   int32_t *equiv_classes;
 # endif
 # endif
 
 
   /* Range expressions. */
   /* Range expressions. */
-# ifdef _LIBC
+# if 0
   uint32_t *range_starts;
   uint32_t *range_starts;
   uint32_t *range_ends;
   uint32_t *range_ends;
-# else /* not _LIBC */
+# else
   wchar_t *range_starts;
   wchar_t *range_starts;
   wchar_t *range_ends;
   wchar_t *range_ends;
-# endif /* not _LIBC */
+# endif
 
 
   /* Character classes. */
   /* Character classes. */
   wctype_t *char_classes;
   wctype_t *char_classes;
@@ -399,7 +360,7 @@ static unsigned int re_string_context_at (const re_string_t *input, int idx,
 
 
 #include <alloca.h>
 #include <alloca.h>
 
 
-#ifndef _LIBC
+#if 1
 # if HAVE_ALLOCA
 # if HAVE_ALLOCA
 /* The OS usually guarantees only one guard page at the bottom of the stack,
 /* The OS usually guarantees only one guard page at the bottom of the stack,
    and a page size can be as small as 4096 bytes.  So we cannot safely
    and a page size can be as small as 4096 bytes.  So we cannot safely
@@ -544,11 +505,7 @@ typedef struct
 {
 {
   /* The string object corresponding to the input string.  */
   /* The string object corresponding to the input string.  */
   re_string_t input;
   re_string_t input;
-#if defined _LIBC || (defined __STDC_VERSION__ && __STDC_VERSION__ >= 199901L)
-  const re_dfa_t *const dfa;
-#else
   const re_dfa_t *dfa;
   const re_dfa_t *dfa;
-#endif
   /* EFLAGS of the argument of regexec.  */
   /* EFLAGS of the argument of regexec.  */
   int eflags;
   int eflags;
   /* Where the matching ends.  */
   /* Where the matching ends.  */
@@ -721,7 +678,7 @@ static int
 internal_function __attribute ((pure))
 internal_function __attribute ((pure))
 re_string_elem_size_at (const re_string_t *pstr, int idx)
 re_string_elem_size_at (const re_string_t *pstr, int idx)
 {
 {
-# ifdef _LIBC
+# if 0
   const unsigned char *p, *extra;
   const unsigned char *p, *extra;
   const int32_t *table, *indirect;
   const int32_t *table, *indirect;
   int32_t tmp;
   int32_t tmp;
@@ -739,7 +696,7 @@ re_string_elem_size_at (const re_string_t *pstr, int idx)
       tmp = findidx (&p);
       tmp = findidx (&p);
       return p - pstr->mbs - idx;
       return p - pstr->mbs - idx;
     }
     }
-# endif /* _LIBC */
+# endif
   return 1;
   return 1;
 }
 }
 #endif /* RE_ENABLE_I18N */
 #endif /* RE_ENABLE_I18N */

+ 16 - 34
libc/misc/regex/regexec.c

@@ -185,11 +185,6 @@ static int build_trtable (const re_dfa_t *dfa,
 static int check_node_accept_bytes (const re_dfa_t *dfa, int node_idx,
 static int check_node_accept_bytes (const re_dfa_t *dfa, int node_idx,
 				    const re_string_t *input, int idx)
 				    const re_string_t *input, int idx)
      internal_function;
      internal_function;
-# ifdef _LIBC
-static unsigned int find_collation_sequence_value (const unsigned char *mbs,
-						   size_t name_len)
-     internal_function;
-# endif
 #endif
 #endif
 static int group_nodes_into_DFAstates (const re_dfa_t *dfa,
 static int group_nodes_into_DFAstates (const re_dfa_t *dfa,
 				       const re_dfastate_t *state,
 				       const re_dfastate_t *state,
@@ -559,16 +554,14 @@ re_set_registers (bufp, regs, num_regs, starts, ends)
 /* Entry points compatible with 4.2 BSD regex library.  We don't define
 /* Entry points compatible with 4.2 BSD regex library.  We don't define
    them unless specifically requested.  */
    them unless specifically requested.  */
 
 
-#if defined _REGEX_RE_COMP || defined _LIBC || defined __UCLIBC__
+#if defined _REGEX_RE_COMP || defined __UCLIBC__
 int
 int
-# if defined _LIBC || defined __UCLIBC__
 weak_function
 weak_function
-# endif
 re_exec (const char *s)
 re_exec (const char *s)
 {
 {
   return 0 == regexec (re_comp_buf, s, 0, NULL, 0);
   return 0 == regexec (re_comp_buf, s, 0, NULL, 0);
 }
 }
-#endif /* _REGEX_RE_COMP */
+#endif
 
 
 /* Internal entry point.  */
 /* Internal entry point.  */
 
 
@@ -596,19 +589,13 @@ re_search_internal (preg, string, length, start, range, stop, nmatch, pmatch,
   int fl_longest_match, match_first, match_kind, match_last = -1;
   int fl_longest_match, match_first, match_kind, match_last = -1;
   int extra_nmatch;
   int extra_nmatch;
   int sb, ch;
   int sb, ch;
-#if defined _LIBC || (defined __STDC_VERSION__ && __STDC_VERSION__ >= 199901L)
-  re_match_context_t mctx = { .dfa = dfa };
-#else
   re_match_context_t mctx;
   re_match_context_t mctx;
-#endif
   char *fastmap = (preg->fastmap != NULL && preg->fastmap_accurate
   char *fastmap = (preg->fastmap != NULL && preg->fastmap_accurate
 		   && range && !preg->can_be_null) ? preg->fastmap : NULL;
 		   && range && !preg->can_be_null) ? preg->fastmap : NULL;
   RE_TRANSLATE_TYPE t = preg->translate;
   RE_TRANSLATE_TYPE t = preg->translate;
 
 
-#if !(defined _LIBC || (defined __STDC_VERSION__ && __STDC_VERSION__ >= 199901L))
   memset (&mctx, '\0', sizeof (re_match_context_t));
   memset (&mctx, '\0', sizeof (re_match_context_t));
   mctx.dfa = dfa;
   mctx.dfa = dfa;
-#endif
 
 
   extra_nmatch = (nmatch > preg->re_nsub) ? nmatch - (preg->re_nsub + 1) : 0;
   extra_nmatch = (nmatch > preg->re_nsub) ? nmatch - (preg->re_nsub + 1) : 0;
   nmatch -= extra_nmatch;
   nmatch -= extra_nmatch;
@@ -3754,12 +3741,12 @@ check_node_accept_bytes (const re_dfa_t *dfa, int node_idx,
   if (node->type == COMPLEX_BRACKET)
   if (node->type == COMPLEX_BRACKET)
     {
     {
       const re_charset_t *cset = node->opr.mbcset;
       const re_charset_t *cset = node->opr.mbcset;
-# ifdef _LIBC
+# if 0
       const unsigned char *pin
       const unsigned char *pin
 	= ((const unsigned char *) re_string_get_buffer (input) + str_idx);
 	= ((const unsigned char *) re_string_get_buffer (input) + str_idx);
       int j;
       int j;
       uint32_t nrules;
       uint32_t nrules;
-# endif /* _LIBC */
+# endif
       int match_len = 0;
       int match_len = 0;
       wchar_t wc = ((cset->nranges || cset->nchar_classes || cset->nmbchars)
       wchar_t wc = ((cset->nranges || cset->nchar_classes || cset->nmbchars)
 		    ? re_string_wchar_at (input, str_idx) : 0);
 		    ? re_string_wchar_at (input, str_idx) : 0);
@@ -3782,7 +3769,7 @@ check_node_accept_bytes (const re_dfa_t *dfa, int node_idx,
 	    }
 	    }
 	}
 	}
 
 
-# ifdef _LIBC
+# if 0
       nrules = _NL_CURRENT_WORD (LC_COLLATE, _NL_COLLATE_NRULES);
       nrules = _NL_CURRENT_WORD (LC_COLLATE, _NL_COLLATE_NRULES);
       if (nrules != 0)
       if (nrules != 0)
 	{
 	{
@@ -3871,15 +3858,13 @@ check_node_accept_bytes (const re_dfa_t *dfa, int node_idx,
 	    }
 	    }
 	}
 	}
       else
       else
-# endif /* _LIBC */
+# endif
 	{
 	{
 	  /* match with range expression?  */
 	  /* match with range expression?  */
-#if __GNUC__ >= 2
+	  wchar_t cmp_buf[6];
-	  wchar_t cmp_buf[] = {L'\0', L'\0', wc, L'\0', L'\0', L'\0'};
+
-#else
+	  memset (cmp_buf, 0, sizeof(cmp_buf));
-	  wchar_t cmp_buf[] = {L'\0', L'\0', L'\0', L'\0', L'\0', L'\0'};
 	  cmp_buf[2] = wc;
 	  cmp_buf[2] = wc;
-#endif
 	  for (i = 0; i < cset->nranges; ++i)
 	  for (i = 0; i < cset->nranges; ++i)
 	    {
 	    {
 	      cmp_buf[0] = cset->range_starts[i];
 	      cmp_buf[0] = cset->range_starts[i];
@@ -3892,21 +3877,18 @@ check_node_accept_bytes (const re_dfa_t *dfa, int node_idx,
 		}
 		}
 	    }
 	    }
 	}
 	}
-    check_node_accept_bytes_match:
+
+ check_node_accept_bytes_match:
       if (!cset->non_match)
       if (!cset->non_match)
 	return match_len;
 	return match_len;
-      else
+      if (match_len > 0)
-	{
+	return 0;
-	  if (match_len > 0)
+      return (elem_len > char_len) ? elem_len : char_len;
-	    return 0;
-	  else
-	    return (elem_len > char_len) ? elem_len : char_len;
-	}
     }
     }
   return 0;
   return 0;
 }
 }
 
 
-# ifdef _LIBC
+# if 0
 static unsigned int
 static unsigned int
 internal_function
 internal_function
 find_collation_sequence_value (const unsigned char *mbs, size_t mbs_len)
 find_collation_sequence_value (const unsigned char *mbs, size_t mbs_len)
@@ -3964,7 +3946,7 @@ find_collation_sequence_value (const unsigned char *mbs, size_t mbs_len)
       return UINT_MAX;
       return UINT_MAX;
     }
     }
 }
 }
-# endif /* _LIBC */
+# endif
 #endif /* RE_ENABLE_I18N */
 #endif /* RE_ENABLE_I18N */
 
 
 /* Check whether the node accepts the byte which is IDX-th
 /* Check whether the node accepts the byte which is IDX-th